Fixed parallel implementation of getting experiences by using a queue
This commit is contained in:
parent
5094ed53af
commit
115543d201
4 changed files with 33 additions and 22 deletions
|
@ -2,17 +2,16 @@ from copy import deepcopy
|
|||
import torch.multiprocessing as mp
|
||||
|
||||
class EnvironmentEpisode(mp.Process):
|
||||
def __init__(self, env, actor, config, memory = None, logger = None, name = ""):
|
||||
def __init__(self, env, actor, config, logger = None, name = ""):
|
||||
super(EnvironmentEpisode, self).__init__()
|
||||
self.env = env
|
||||
self.actor = actor
|
||||
self.memory = memory
|
||||
self.config = deepcopy(config)
|
||||
self.logger = logger
|
||||
self.name = name
|
||||
self.episode_num = 1
|
||||
|
||||
def run(self, printstat = False):
|
||||
def run(self, printstat = False, memory = None):
|
||||
state = self.env.reset()
|
||||
done = False
|
||||
episode_reward = 0
|
||||
|
@ -21,8 +20,8 @@ class EnvironmentEpisode(mp.Process):
|
|||
next_state, reward, done, _ = self.env.step(action)
|
||||
|
||||
episode_reward = episode_reward + reward
|
||||
if self.memory is not None:
|
||||
self.memory.append(state, action, reward, next_state, done)
|
||||
if memory is not None:
|
||||
memory.put((state, action, reward, next_state, done))
|
||||
state = next_state
|
||||
|
||||
if printstat:
|
||||
|
|
|
@ -2,11 +2,10 @@ from copy import deepcopy
|
|||
import torch.multiprocessing as mp
|
||||
|
||||
class EnvironmentRun(mp.Process):
|
||||
def __init__(self, env, actor, config, memory = None, logger = None, name = ""):
|
||||
def __init__(self, env, actor, config, logger = None, name = ""):
|
||||
super(EnvironmentRun, self).__init__()
|
||||
self.env = env
|
||||
self.actor = actor
|
||||
self.memory = memory
|
||||
self.config = deepcopy(config)
|
||||
self.logger = logger
|
||||
self.name = name
|
||||
|
@ -14,15 +13,15 @@ class EnvironmentRun(mp.Process):
|
|||
self.episode_reward = 0
|
||||
self.last_state = env.reset()
|
||||
|
||||
def run(self, iterations = 1, printstat = False):
|
||||
def run(self, iterations = 1, printstat = False, memory = None):
|
||||
state = self.last_state
|
||||
for _ in range(iterations):
|
||||
action = self.actor.act(state)
|
||||
next_state, reward, done, _ = self.env.step(action)
|
||||
|
||||
self.episode_reward = self.episode_reward + reward
|
||||
if self.memory is not None:
|
||||
self.memory.append(state, action, reward, next_state, done)
|
||||
if memory is not None:
|
||||
memory.put((state, action, reward, next_state, done))
|
||||
state = next_state
|
||||
|
||||
if done:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue