diff --git a/play.py b/play.py
index 6441f93..a26e254 100644
--- a/play.py
+++ b/play.py
@@ -3,6 +3,7 @@ import pygame
 import sys
 import time
 import matplotlib
+import rltorch.memory as M
 try:
     matplotlib.use('GTK3Agg')
     import matplotlib.pyplot as plt
@@ -17,7 +18,7 @@ from pygame.locals import HWSURFACE, DOUBLEBUF, RESIZABLE, VIDEORESIZE
 from threading import Thread, Event, Timer
 
 class Play:
-    def __init__(self, env, action_selector, memory, agent, transpose = True, fps = 30, zoom = None, keys_to_action = None):
+    def __init__(self, env, action_selector, memory, agent, sneaky_env, transpose = True, fps = 30, zoom = None, keys_to_action = None):
         self.env = env
         self.action_selector = action_selector
         self.transpose = transpose
@@ -34,7 +35,7 @@ class Play:
         self.paused = False
         self.memory = memory
         self.agent = agent
-        print("FPS ", 30)
+        self.sneaky_env = sneaky_env
     
     def _display_arr(self, obs, screen, arr, video_size):
         if obs is not None:
@@ -120,7 +121,7 @@ class Play:
         self.relevant_keys = set(sum(map(list, self.keys_to_action.keys()),[]))
     
     def _increment_state(self):
-        self.state = (self.state + 1) % 4
+        self.state = (self.state + 1) % 5
 
     def pause(self, text = ""):
         self.paused = True
@@ -145,6 +146,31 @@ class Play:
         pygame.display.flip()
         self.clock.tick(self.fps)
     
+
+    def sneaky_train(self):
+        # Backup memory
+        backup_memory = self.memory
+        self.memory = M.ReplayMemory(capacity = 2000) # Another configurable parameter
+        EPISODES = 30 # Make this configurable
+        replay_skip = 4 # Make this configurable
+        for _ in range(EPISODES):
+            prev_obs = self.sneaky_env.reset()
+            done = False
+            step = 0
+            while not done:
+                action = self.action_selector.act(prev_obs)
+                obs, reward, done, _ = self.sneaky_env.step(action)
+                self.memory.append(prev_obs, action, reward, obs, done)
+                prev_obs = obs
+                step += 1
+                if step % replay_skip == 0:
+                    self.agent.learn()
+        self.memory = backup_memory
+        # It would be cool instead of throwing away all this new data, we keep just a sample of it
+        # Not sure if i want all of it because then it'll drown out the expert demonstration data
+
+        
+    
     def start(self):
         """Allows one to play the game using keyboard.
         To simply play the game use:
@@ -202,8 +228,12 @@ class Play:
         self.clock = pygame.time.Clock()
         
         # States
-        COMPUTER_PLAY = 0
-        HUMAN_PLAY = 2
+        HUMAN_PLAY = 0
+        SNEAKY_COMPUTER_PLAY = 1
+        TRANSITION = 2
+        COMPUTER_PLAY = 3
+        TRANSITION2 = 4
+        
 
         env_done = True
         prev_obs = None
@@ -214,28 +244,31 @@ class Play:
             if env_done:
                 obs = self.env.reset()
                 env_done = False
-            
-            if self.state == 0:
-                prev_obs, action, reward, obs, env_done = self._computer_play(obs)
-            elif self.state == 1:
-                self.pause("Your Turn! Press <Space> to Start")
-            elif self.state == 2:
+            if self.state is HUMAN_PLAY:
                 prev_obs, action, reward, obs, env_done = self._human_play(obs)
-            elif self.state == 3:
+            elif self.state is SNEAKY_COMPUTER_PLAY:
+                myfont = pygame.font.SysFont('Comic Sans MS', 50)
+                textsurface = myfont.render("Training....", False, (0, 0, 0))
+                self.screen.blit(textsurface,(0,0))
+                self.sneaky_train()
+                self._increment_state()
+            elif self.state is TRANSITION:
                 self.pause("Computers Turn! Press <Space> to Start")
+            elif self.state is COMPUTER_PLAY:
+                prev_obs, action, reward, obs, env_done = self._computer_play(obs)
+            elif self.state is TRANSITION2:
+                self.pause("Your Turn! Press <Space> to Start")
 
             if self.state is COMPUTER_PLAY or self.state is HUMAN_PLAY:
                 self.memory.append(prev_obs, action, reward, obs, env_done)
-                
-            if not self.paused:
                 i += 1
-                if i % (self.fps * 30) == 0: # Every 30 seconds...
-                    print("TRAINING...")
+                # Every 30 seconds...
+                if i % (self.fps * 30) == 0:
+                    print("Training...")
                     self.agent.learn()
                     print("PAUSING...")
                     self._increment_state()
                     i = 0
 
-
         pygame.quit()
 
diff --git a/play_env.py b/play_env.py
index da6391f..7e53911 100644
--- a/play_env.py
+++ b/play_env.py
@@ -17,11 +17,9 @@ import argparse
 import sys
 import numpy as np
 
-
-## CURRRENT ISSUE: MaxSkipEnv applies to the human player as well, which makes for an awkward gaming experience
-# What are your thoughts? Training is different if expert isn't forced with the same constraint
-# At some point I need to introduce learning
-
+#
+## Networks
+#
 class Value(nn.Module):
   def __init__(self, state_size, action_size):
     super(Value, self).__init__()
@@ -69,16 +67,18 @@ class Value(nn.Module):
     return x
 
 
-
+#
+## Play Related Classes
+#
 Transition = namedtuple('Transition',
       ('state', 'action', 'reward', 'next_state', 'done'))
 
 class PlayClass(threading.Thread):
-  def __init__(self, env, action_selector, memory, agent, fps = 60):
+  def __init__(self, env, action_selector, memory, agent, sneaky_env, fps = 60):
     super(PlayClass, self).__init__()
     self.env = env
     self.fps = fps
-    self.play = play.Play(self.env, action_selector, memory, agent, fps = fps, zoom = 4)
+    self.play = play.Play(self.env, action_selector, memory, agent, sneaky_env, fps = fps, zoom = 4)
 
   def run(self):
     self.play.start()
@@ -162,19 +162,15 @@ if args['skip'] is None:
 if args['fps'] is None:
   args['fps'] = 30
 
-## Starting the game
-memory = []
-env = Record(gym.make(args['environment_name']), memory, args, skipframes = args['skip'])
-record_env = env
-env = gym.wrappers.Monitor(env, args['logdir'], force=True)
-env = E.ClippedRewardsWrapper(
+def wrap_preprocessing(env):
+  return E.ClippedRewardsWrapper(
     E.FrameStack(
       E.TorchWrap(
         E.ProcessFrame84(
           E.FireResetEnv(
             # E.MaxAndSkipEnv(
               E.NoopResetEnv(
-                E.EpisodicLifeEnv(gym.make(config['environment_name']))
+                E.EpisodicLifeEnv(env)
               , noop_max = 30)
             # , skip=4)
           )
@@ -183,6 +179,15 @@ env = E.ClippedRewardsWrapper(
     4)
   )
 
+## Starting the game
+memory = []
+env = Record(gym.make(args['environment_name']), memory, args, skipframes = args['skip'])
+record_env = env
+env = gym.wrappers.Monitor(env, args['logdir'], force=True)
+env = wrap_preprocessing(env)
+
+sneaky_env = wrap_preprocessing(gym.make(args['environment_name']))
+
 rltorch.set_seed(config['seed'])
 
 device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
@@ -199,7 +204,7 @@ agent = rltorch.agents.DQNAgent(net, memory, config, target_net = target_net)
 
 env.seed(config['seed'])
 
-playThread = PlayClass(env, actor, memory, agent, args['fps'])
+playThread = PlayClass(env, actor, memory, agent, sneaky_env, fps = args['fps'])
 playThread.start()
 
 ## Logging portion