Updated configs and fixed threading issues
This commit is contained in:
parent
32862e4d79
commit
744656aaa9
4 changed files with 58 additions and 31 deletions
18
config.py
18
config.py
|
@ -2,20 +2,20 @@ import rltorch
|
|||
|
||||
config = {}
|
||||
config['seed'] = 901
|
||||
config['seconds_play_per_state'] = 120
|
||||
config['zoom'] = 4
|
||||
config['environment_name'] = 'PongNoFrameskip-v4'
|
||||
config['learning_rate'] = 1e-4
|
||||
config['learning_rate'] = 1e-5
|
||||
config['target_sync_tau'] = 1e-3
|
||||
config['discount_rate'] = 0.99
|
||||
config['exploration_rate'] = rltorch.scheduler.ExponentialScheduler(initial_value = 1, end_value = 0.1, iterations = 10**5)
|
||||
# Number of episodes for the computer to train the agent without the human seeing
|
||||
config['num_sneaky_episodes'] = 10
|
||||
config['num_train_per_demo'] = 50 # 100 total since you have two demo training per cycle
|
||||
config['replay_skip'] = 14
|
||||
config['batch_size'] = 32 * (config['replay_skip'] + 1)
|
||||
config['disable_cuda'] = False
|
||||
config['memory_size'] = 10**4
|
||||
|
||||
config['seconds_play_per_state'] = 120
|
||||
# 30 transitions per second for 120 seconds = 3600 transitions per turn
|
||||
config['memory_size'] = 21600 # To hold 6 demonstrations
|
||||
config['batch_size'] = 64
|
||||
config['num_train_per_demo'] = 115 # 4 looks * transitions per turn / (2 * batch_size)
|
||||
|
||||
|
||||
# Prioritized vs Random Sampling
|
||||
# 0 - Random sampling
|
||||
# 1 - Only the highest prioirities
|
||||
|
|
42
play.py
42
play.py
|
@ -4,10 +4,11 @@ from pygame.locals import VIDEORESIZE
|
|||
from rltorch.memory import ReplayMemory
|
||||
|
||||
class Play:
|
||||
def __init__(self, env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config):
|
||||
def __init__(self, env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config, sneaky_config):
|
||||
self.env = env
|
||||
self.action_selector = action_selector
|
||||
self.record_lock = record_lock
|
||||
self.record_locked = False
|
||||
self.sneaky_agent = sneaky_agent
|
||||
self.agent = agent
|
||||
self.sneaky_env = sneaky_env
|
||||
|
@ -18,9 +19,8 @@ class Play:
|
|||
self.zoom = config['zoom'] if 'zoom' in config else 1
|
||||
self.keys_to_action = config['keys_to_action'] if 'keys_to_action' in config else None
|
||||
self.seconds_play_per_state = config['seconds_play_per_state'] if 'seconds_play_per_state' in config else 30
|
||||
self.num_sneaky_episodes = config['num_sneaky_episodes'] if 'num_sneaky_episodes' in config else 10
|
||||
self.memory_size = config['memory_size'] if 'memory_size' in config else 10**4
|
||||
self.replay_skip = config['replay_skip'] if 'replay_skip' in config else 0
|
||||
self.num_sneaky_episodes = sneaky_config['num_sneaky_episodes'] if 'num_sneaky_episodes' in sneaky_config else 10
|
||||
self.replay_skip = sneaky_config['replay_skip'] if 'replay_skip' in sneaky_config else 0
|
||||
self.num_train_per_demo = config['num_train_per_demo'] if 'num_train_per_demo' in config else 1
|
||||
# Initial values...
|
||||
self.video_size = (0, 0)
|
||||
|
@ -78,6 +78,7 @@ class Play:
|
|||
self.pressed_keys.append(event.key)
|
||||
elif event.type == pygame.KEYUP:
|
||||
if event.key in self.relevant_keys:
|
||||
if event.key in self.pressed_keys: # To make sure that program doesn't crash
|
||||
self.pressed_keys.remove(event.key)
|
||||
|
||||
pygame.display.flip()
|
||||
|
@ -145,7 +146,7 @@ class Play:
|
|||
self.clock.tick(self.fps)
|
||||
|
||||
def sneaky_train(self):
|
||||
self.record_lock.acquire()
|
||||
# self.record_lock.acquire()
|
||||
# Do a standard RL algorithm process for a certain number of episodes
|
||||
for i in range(self.num_sneaky_episodes):
|
||||
print("Episode: %d / %d, Reward: " % ((self.num_sneaky_episodes * self.sneaky_iteration) + i + 1, (self.sneaky_iteration + 1) * self.num_sneaky_episodes), end = "")
|
||||
|
@ -167,9 +168,9 @@ class Play:
|
|||
self.sneaky_agent.learn()
|
||||
|
||||
# Finish the previous print with the total reward obtained during the episode
|
||||
print(total_reward)
|
||||
print(total_reward, flush = True)
|
||||
self.sneaky_iteration += 1
|
||||
self.record_lock.release()
|
||||
# self.record_lock.release()
|
||||
|
||||
def display_text(self, text):
|
||||
myfont = pygame.font.SysFont('Comic Sans MS', 50)
|
||||
|
@ -188,6 +189,9 @@ class Play:
|
|||
if event.type == pygame.KEYUP and event.key == pygame.K_F1:
|
||||
self.paused = False
|
||||
self.clear_text(obs)
|
||||
if self.record_locked:
|
||||
self.record_lock.release()
|
||||
self.record_locked = False
|
||||
else:
|
||||
self._process_common_pygame_events(event)
|
||||
|
||||
|
@ -224,21 +228,30 @@ class Play:
|
|||
# If the environment is done after a turn, reset it so we can keep going
|
||||
if env_done:
|
||||
episode_num += 1
|
||||
print("Human/Computer Episode: ", episode_num)
|
||||
print("Human/Computer Episode:", episode_num, flush = True)
|
||||
obs = self.env.reset()
|
||||
env_done = False
|
||||
|
||||
if self.paused:
|
||||
if not self.record_locked:
|
||||
self.record_lock.acquire()
|
||||
self.record_locked = True
|
||||
self.process_pause_state(obs)
|
||||
continue
|
||||
|
||||
if self.state is HUMAN_PLAY:
|
||||
if self.record_locked:
|
||||
self.record_lock.release()
|
||||
self.record_locked = False
|
||||
prev_obs, action, reward, obs, env_done = self._human_play(obs)
|
||||
|
||||
# The computer will train for a few episodes without showing to the user.
|
||||
# Mainly to speed up the learning process a bit
|
||||
elif self.state is SNEAKY_COMPUTER_PLAY:
|
||||
print("Sneaky Computer Time")
|
||||
if not self.record_locked:
|
||||
self.record_lock.acquire()
|
||||
self.record_locked = True
|
||||
print("Sneaky Computer Time", flush = True)
|
||||
self.display_text("Training...")
|
||||
|
||||
# Have the agent play a few rounds without showing to the user
|
||||
|
@ -248,12 +261,21 @@ class Play:
|
|||
self._increment_state()
|
||||
|
||||
elif self.state is TRANSITION:
|
||||
if not self.record_locked:
|
||||
self.record_lock.acquire()
|
||||
self.record_locked = True
|
||||
self.transition("Computers Turn! Press <Space> to Start")
|
||||
|
||||
elif self.state is COMPUTER_PLAY:
|
||||
if self.record_locked:
|
||||
self.record_lock.release()
|
||||
self.record_locked = False
|
||||
prev_obs, action, reward, obs, env_done = self._computer_play(obs)
|
||||
|
||||
elif self.state is TRANSITION2:
|
||||
if not self.record_locked:
|
||||
self.record_lock.acquire()
|
||||
self.record_locked = True
|
||||
self.transition("Your Turn! Press <Space> to Start")
|
||||
|
||||
# Increment the timer if it's the human or shown computer's turn
|
||||
|
@ -265,7 +287,7 @@ class Play:
|
|||
self.record_lock.acquire()
|
||||
self.display_text("Demo Training...")
|
||||
print("Begin Demonstration Training")
|
||||
print("Number of transitions in buffer: ", len(self.agent.memory))
|
||||
print("Number of transitions in buffer: ", len(self.agent.memory), flush = True)
|
||||
for j in range(self.num_train_per_demo):
|
||||
print("Iteration %d / %d" % (j + 1, self.num_train_per_demo))
|
||||
self.agent.learn()
|
||||
|
|
15
play_env.py
15
play_env.py
|
@ -37,17 +37,18 @@ from networks import Value
|
|||
## Play Related Classes
|
||||
#
|
||||
class PlayClass(Thread):
|
||||
def __init__(self, env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config):
|
||||
def __init__(self, env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config, sneaky_config):
|
||||
super(PlayClass, self).__init__()
|
||||
self.play = play.Play(env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config)
|
||||
self.play = play.Play(env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config, sneaky_config)
|
||||
|
||||
def run(self):
|
||||
self.play.start()
|
||||
|
||||
class Record(GymWrapper):
|
||||
def __init__(self, env, memory, args):
|
||||
def __init__(self, env, memory, lock, args):
|
||||
GymWrapper.__init__(self, env)
|
||||
self.memory = memory
|
||||
self.lock = lock # Lock for memory access
|
||||
self.skipframes = args['skip']
|
||||
self.environment_name = args['environment_name']
|
||||
self.logdir = args['logdir']
|
||||
|
@ -62,14 +63,16 @@ class Record(GymWrapper):
|
|||
self.current_i += 1
|
||||
# Don't add to memory until a certain number of frames is reached
|
||||
if self.current_i % self.skipframes == 0:
|
||||
self.lock.acquire()
|
||||
self.memory.append((state, action, reward, next_state, done))
|
||||
self.lock.release()
|
||||
self.current_i = 0
|
||||
return next_state, reward, done, info
|
||||
|
||||
def log_transitions(self):
|
||||
if len(self.memory) > 0:
|
||||
basename = self.logdir + "/{}.{}".format(self.environment_name, datetime.now().strftime("%Y-%m-%d-%H-%M-%s"))
|
||||
print("Base Filename: ", basename)
|
||||
print("Base Filename: ", basename, flush = True)
|
||||
state, action, reward, next_state, done = zip(*self.memory)
|
||||
np_save(basename + "-state.npy", np_array(state), allow_pickle = False)
|
||||
np_save(basename + "-action.npy", np_array(action), allow_pickle = False)
|
||||
|
@ -124,7 +127,7 @@ def wrap_preprocessing(env, MaxAndSkipEnv = False):
|
|||
## Set up environment to be recorded and preprocessed
|
||||
record_memory = []
|
||||
record_lock = Lock()
|
||||
env = Record(makeEnv(args['environment_name']), record_memory, args)
|
||||
env = Record(makeEnv(args['environment_name']), record_memory, record_lock, args)
|
||||
|
||||
# Bind record_env to current env so that we can reference log_transitions easier later
|
||||
record_env = env
|
||||
|
@ -162,7 +165,7 @@ sneaky_actor = EpsilonGreedySelector(net, action_size, device = device, epsilon
|
|||
sneaky_agent = rltorch.agents.DQNAgent(net, sneaky_memory, sneaky_config, target_net = target_net)
|
||||
|
||||
# Pass all this information into the thread that will handle the game play and start
|
||||
playThread = PlayClass(env, actor, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config)
|
||||
playThread = PlayClass(env, actor, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config, sneaky_config)
|
||||
playThread.start()
|
||||
|
||||
# While the play thread is running, we'll periodically log transitions we've encountered
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
import rltorch
|
||||
|
||||
sneaky_config = {}
|
||||
sneaky_config['learning_rate'] = 1e-4
|
||||
sneaky_config['learning_rate'] = 1e-5
|
||||
sneaky_config['target_sync_tau'] = 1e-3
|
||||
sneaky_config['discount_rate'] = 0.99
|
||||
sneaky_config['exploration_rate'] = rltorch.scheduler.ExponentialScheduler(initial_value = 1, end_value = 0.1, iterations = 10**5)
|
||||
sneaky_config['exploration_rate'] = rltorch.scheduler.ExponentialScheduler(initial_value = 1, end_value = 0.02, iterations = 10**5)
|
||||
# Number of episodes for the computer to train the agent without the human seeing
|
||||
sneaky_config['replay_skip'] = 14
|
||||
sneaky_config['batch_size'] = 32 * (sneaky_config['replay_skip'] + 1)
|
||||
sneaky_config['memory_size'] = 10**4
|
||||
sneaky_config['replay_skip'] = 29 # Gradient descent every second
|
||||
sneaky_config['batch_size'] = 16 * (sneaky_config['replay_skip'] + 1) # Calculated based on memory constraints
|
||||
sneaky_config['memory_size'] = 2000 # batch_size * 2 looks = 66 seconds of gameplay
|
||||
# Number of episodes for the computer to train the agent without the human seeing
|
||||
sneaky_config['num_sneaky_episodes'] = 10
|
Loading…
Reference in a new issue