d.sample returns a tensor, so we stack them to not lose the device

2019-02-28 14:30:49 -05:00 · 2019-02-28 14:30:49 -05:00 · 9740c40527
commit 9740c40527
parent 714443192d
1 changed files with 1 additions and 1 deletions
--- a/rltorch/agents/QEPAgent.py
+++ b/rltorch/agents/QEPAgent.py
@ -23,7 +23,7 @@ class QEPAgent:
    def fitness(self, policy_net, value_net, state_batch):
        action_probabilities = policy_net(state_batch)
        distributions = list(map(Categorical, action_probabilities))
-        actions = torch.tensor([d.sample() for d in distributions])
+        actions = torch.stack([d.sample() for d in distributions])
        with torch.no_grad():
            state_values = value_net(state_batch)