diff --git a/config.py b/config.py
index 6a1b865..30970ff 100644
--- a/config.py
+++ b/config.py
@@ -2,20 +2,20 @@ import rltorch
 
 config = {}
 config['seed'] = 901
-config['seconds_play_per_state'] = 120
 config['zoom'] = 4
 config['environment_name'] = 'PongNoFrameskip-v4'
-config['learning_rate'] = 1e-4
+config['learning_rate'] = 1e-5
 config['target_sync_tau'] = 1e-3
 config['discount_rate'] = 0.99
-config['exploration_rate'] = rltorch.scheduler.ExponentialScheduler(initial_value = 1, end_value = 0.1, iterations = 10**5)
-# Number of episodes for the computer to train the agent without the human seeing
-config['num_sneaky_episodes'] = 10
-config['num_train_per_demo'] = 50 # 100 total since you have two demo training per cycle
-config['replay_skip'] = 14
-config['batch_size'] = 32 * (config['replay_skip'] + 1)
 config['disable_cuda'] = False
-config['memory_size'] = 10**4
+
+config['seconds_play_per_state'] = 120
+# 30 transitions per second for 120 seconds = 3600 transitions per turn
+config['memory_size'] = 21600 # To hold 6 demonstrations
+config['batch_size'] = 64
+config['num_train_per_demo'] = 115 # 4 looks * transitions per turn / (2 * batch_size)
+
+
 # Prioritized vs Random Sampling
 # 0 - Random sampling
 # 1 - Only the highest prioirities
diff --git a/play.py b/play.py
index daea72e..c530d9e 100644
--- a/play.py
+++ b/play.py
@@ -4,10 +4,11 @@ from pygame.locals import VIDEORESIZE
 from rltorch.memory import ReplayMemory
 
 class Play:
-    def __init__(self, env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config):
+    def __init__(self, env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config, sneaky_config):
         self.env = env
         self.action_selector = action_selector
         self.record_lock = record_lock
+        self.record_locked = False
         self.sneaky_agent = sneaky_agent
         self.agent = agent
         self.sneaky_env = sneaky_env
@@ -18,9 +19,8 @@ class Play:
         self.zoom = config['zoom'] if 'zoom' in config else 1
         self.keys_to_action = config['keys_to_action'] if 'keys_to_action' in config else None
         self.seconds_play_per_state = config['seconds_play_per_state'] if 'seconds_play_per_state' in config else 30
-        self.num_sneaky_episodes = config['num_sneaky_episodes'] if 'num_sneaky_episodes' in config else 10
-        self.memory_size = config['memory_size'] if 'memory_size' in config else 10**4
-        self.replay_skip = config['replay_skip'] if 'replay_skip' in config else 0
+        self.num_sneaky_episodes = sneaky_config['num_sneaky_episodes'] if 'num_sneaky_episodes' in sneaky_config else 10
+        self.replay_skip = sneaky_config['replay_skip'] if 'replay_skip' in sneaky_config else 0
         self.num_train_per_demo = config['num_train_per_demo'] if 'num_train_per_demo' in config else 1
         # Initial values...
         self.video_size = (0, 0)
@@ -78,7 +78,8 @@ class Play:
                     self.pressed_keys.append(event.key)
             elif event.type == pygame.KEYUP:
                 if event.key in self.relevant_keys:
-                    self.pressed_keys.remove(event.key)
+                    if event.key in self.pressed_keys: # To make sure that program doesn't crash
+                        self.pressed_keys.remove(event.key)
 
         pygame.display.flip()
         self.clock.tick(self.fps)
@@ -145,7 +146,7 @@ class Play:
         self.clock.tick(self.fps)
     
     def sneaky_train(self):
-        self.record_lock.acquire()
+        # self.record_lock.acquire()
         # Do a standard RL algorithm process for a certain number of episodes
         for i in range(self.num_sneaky_episodes):
             print("Episode: %d / %d, Reward: " % ((self.num_sneaky_episodes * self.sneaky_iteration) + i + 1, (self.sneaky_iteration + 1) * self.num_sneaky_episodes), end = "")
@@ -167,9 +168,9 @@ class Play:
                     self.sneaky_agent.learn()
             
             # Finish the previous print with the total reward obtained during the episode
-            print(total_reward)
+            print(total_reward, flush = True)
         self.sneaky_iteration += 1
-        self.record_lock.release()
+        # self.record_lock.release()
     
     def display_text(self, text):
         myfont = pygame.font.SysFont('Comic Sans MS', 50)
@@ -188,6 +189,9 @@ class Play:
             if event.type == pygame.KEYUP and event.key == pygame.K_F1:
                 self.paused = False
                 self.clear_text(obs)
+                if self.record_locked:
+                    self.record_lock.release()
+                    self.record_locked = False
             else:
                 self._process_common_pygame_events(event)
 
@@ -224,21 +228,30 @@ class Play:
             # If the environment is done after a turn, reset it so we can keep going
             if env_done:
                 episode_num += 1
-                print("Human/Computer Episode: ", episode_num)
+                print("Human/Computer Episode:", episode_num, flush = True)
                 obs = self.env.reset()
                 env_done = False
             
             if self.paused:
+                if not self.record_locked:
+                    self.record_lock.acquire()
+                    self.record_locked = True
                 self.process_pause_state(obs)
                 continue
 
             if self.state is HUMAN_PLAY:
+                if self.record_locked:
+                    self.record_lock.release()
+                    self.record_locked = False
                 prev_obs, action, reward, obs, env_done = self._human_play(obs)
             
             # The computer will train for a few episodes without showing to the user.
             # Mainly to speed up the learning process a bit
             elif self.state is SNEAKY_COMPUTER_PLAY:
-                print("Sneaky Computer Time")
+                if not self.record_locked:
+                    self.record_lock.acquire()
+                    self.record_locked = True
+                print("Sneaky Computer Time", flush = True)
                 self.display_text("Training...")
 
                 # Have the agent play a few rounds without showing to the user
@@ -248,12 +261,21 @@ class Play:
                 self._increment_state()
             
             elif self.state is TRANSITION:
+                if not self.record_locked:
+                    self.record_lock.acquire()
+                    self.record_locked = True
                 self.transition("Computers Turn! Press <Space> to Start")
             
             elif self.state is COMPUTER_PLAY:
+                if self.record_locked:
+                    self.record_lock.release()
+                    self.record_locked = False
                 prev_obs, action, reward, obs, env_done = self._computer_play(obs)
             
             elif self.state is TRANSITION2:
+                if not self.record_locked:
+                    self.record_lock.acquire()
+                    self.record_locked = True
                 self.transition("Your Turn! Press <Space> to Start")
 
             # Increment the timer if it's the human or shown computer's turn
@@ -265,7 +287,7 @@ class Play:
                     self.record_lock.acquire()
                     self.display_text("Demo Training...")
                     print("Begin Demonstration Training")
-                    print("Number of transitions in buffer: ", len(self.agent.memory))
+                    print("Number of transitions in buffer: ", len(self.agent.memory), flush = True)
                     for j in range(self.num_train_per_demo):
                         print("Iteration %d / %d" % (j + 1, self.num_train_per_demo))
                         self.agent.learn()
diff --git a/play_env.py b/play_env.py
index f06f8ee..fe1a3ce 100644
--- a/play_env.py
+++ b/play_env.py
@@ -37,17 +37,18 @@ from networks import Value
 ## Play Related Classes
 #
 class PlayClass(Thread):
-  def __init__(self, env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config):
+  def __init__(self, env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config, sneaky_config):
     super(PlayClass, self).__init__()
-    self.play = play.Play(env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock,  config)
+    self.play = play.Play(env, action_selector, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config, sneaky_config)
 
   def run(self):
     self.play.start()
 
 class Record(GymWrapper):
-  def __init__(self, env, memory, args):
+  def __init__(self, env, memory, lock, args):
     GymWrapper.__init__(self, env)
     self.memory = memory
+    self.lock = lock # Lock for memory access
     self.skipframes = args['skip']
     self.environment_name = args['environment_name']
     self.logdir = args['logdir']
@@ -62,14 +63,16 @@ class Record(GymWrapper):
     self.current_i += 1
     # Don't add to memory until a certain number of frames is reached
     if self.current_i % self.skipframes == 0:
+      self.lock.acquire()
       self.memory.append((state, action, reward, next_state, done))
+      self.lock.release()
       self.current_i = 0
     return next_state, reward, done, info
   
   def log_transitions(self):
     if len(self.memory) > 0:
       basename = self.logdir + "/{}.{}".format(self.environment_name, datetime.now().strftime("%Y-%m-%d-%H-%M-%s"))
-      print("Base Filename: ", basename)
+      print("Base Filename: ", basename, flush = True)
       state, action, reward, next_state, done = zip(*self.memory)
       np_save(basename + "-state.npy", np_array(state), allow_pickle = False)
       np_save(basename + "-action.npy", np_array(action), allow_pickle = False)
@@ -124,7 +127,7 @@ def wrap_preprocessing(env, MaxAndSkipEnv = False):
 ## Set up environment to be recorded and preprocessed
 record_memory = []
 record_lock = Lock()
-env = Record(makeEnv(args['environment_name']), record_memory, args)
+env = Record(makeEnv(args['environment_name']), record_memory, record_lock, args)
 
 # Bind record_env to current env so that we can reference log_transitions easier later
 record_env = env
@@ -162,7 +165,7 @@ sneaky_actor = EpsilonGreedySelector(net, action_size, device = device, epsilon
 sneaky_agent = rltorch.agents.DQNAgent(net, sneaky_memory, sneaky_config, target_net = target_net)
 
 # Pass all this information into the thread that will handle the game play and start
-playThread = PlayClass(env, actor, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config)
+playThread = PlayClass(env, actor, agent, sneaky_env, sneaky_actor, sneaky_agent, record_lock, config, sneaky_config)
 playThread.start()
 
 # While the play thread is running, we'll periodically log transitions we've encountered
diff --git a/sneaky_config.py b/sneaky_config.py
index f72b2af..a83a3f0 100644
--- a/sneaky_config.py
+++ b/sneaky_config.py
@@ -1,11 +1,13 @@
 import rltorch
 
 sneaky_config = {}
-sneaky_config['learning_rate'] = 1e-4
+sneaky_config['learning_rate'] = 1e-5
 sneaky_config['target_sync_tau'] = 1e-3
 sneaky_config['discount_rate'] = 0.99
-sneaky_config['exploration_rate'] = rltorch.scheduler.ExponentialScheduler(initial_value = 1, end_value = 0.1, iterations = 10**5)
+sneaky_config['exploration_rate'] = rltorch.scheduler.ExponentialScheduler(initial_value = 1, end_value = 0.02, iterations = 10**5)
 # Number of episodes for the computer to train the agent without the human seeing
-sneaky_config['replay_skip'] = 14
-sneaky_config['batch_size'] = 32 * (sneaky_config['replay_skip'] + 1)
-sneaky_config['memory_size'] = 10**4
+sneaky_config['replay_skip'] = 29 # Gradient descent every second
+sneaky_config['batch_size'] = 16 * (sneaky_config['replay_skip'] + 1) # Calculated based on memory constraints
+sneaky_config['memory_size'] = 2000 # batch_size * 2 looks = 66 seconds of gameplay
+# Number of episodes for the computer to train the agent without the human seeing
+sneaky_config['num_sneaky_episodes'] = 10
\ No newline at end of file