Fixed parallel implementation of getting experiences by using a queue

This commit is contained in:
Brandon Rozek 2019-02-13 00:36:23 -05:00
parent 5094ed53af
commit 115543d201
4 changed files with 33 additions and 22 deletions

View file

@ -2,17 +2,16 @@ from copy import deepcopy
import torch.multiprocessing as mp
class EnvironmentEpisode(mp.Process):
def __init__(self, env, actor, config, memory = None, logger = None, name = ""):
def __init__(self, env, actor, config, logger = None, name = ""):
super(EnvironmentEpisode, self).__init__()
self.env = env
self.actor = actor
self.memory = memory
self.config = deepcopy(config)
self.logger = logger
self.name = name
self.episode_num = 1
def run(self, printstat = False):
def run(self, printstat = False, memory = None):
state = self.env.reset()
done = False
episode_reward = 0
@ -21,8 +20,8 @@ class EnvironmentEpisode(mp.Process):
next_state, reward, done, _ = self.env.step(action)
episode_reward = episode_reward + reward
if self.memory is not None:
self.memory.append(state, action, reward, next_state, done)
if memory is not None:
memory.put((state, action, reward, next_state, done))
state = next_state
if printstat:

View file

@ -2,11 +2,10 @@ from copy import deepcopy
import torch.multiprocessing as mp
class EnvironmentRun(mp.Process):
def __init__(self, env, actor, config, memory = None, logger = None, name = ""):
def __init__(self, env, actor, config, logger = None, name = ""):
super(EnvironmentRun, self).__init__()
self.env = env
self.actor = actor
self.memory = memory
self.config = deepcopy(config)
self.logger = logger
self.name = name
@ -14,15 +13,15 @@ class EnvironmentRun(mp.Process):
self.episode_reward = 0
self.last_state = env.reset()
def run(self, iterations = 1, printstat = False):
def run(self, iterations = 1, printstat = False, memory = None):
state = self.last_state
for _ in range(iterations):
action = self.actor.act(state)
next_state, reward, done, _ = self.env.step(action)
self.episode_reward = self.episode_reward + reward
if self.memory is not None:
self.memory.append(state, action, reward, next_state, done)
if memory is not None:
memory.put((state, action, reward, next_state, done))
state = next_state
if done: