Made Logger global

2020-04-14 15:24:48 -04:00 · 2020-04-14 15:24:48 -04:00 · c6172f309d
commit c6172f309d
parent 1f7c6f10ab
21 changed files with 513 additions and 527 deletions
--- a/examples/acrobot_a2c.py
+++ b/examples/acrobot_a2c.py
@ -8,6 +8,7 @@ import rltorch.memory as M
 import rltorch.env as E
 from rltorch.action_selector import StochasticSelector
 from tensorboardX import SummaryWriter
+from rltorch.log import Logger

 #
 ## Networks
@ -68,65 +69,55 @@ config['disable_cuda'] = False
 #
 ## Training Loop
 #
-def train(runner, agent, config, logger = None, logwriter = None):
+def train(runner, agent, config, logwriter=None):
    finished = False
    while not finished:
        runner.run()
        agent.learn()
        if logwriter is not None:
-          agent.value_net.log_named_parameters()
-          agent.policy_net.log_named_parameters()
-          logwriter.write(logger)
+            agent.value_net.log_named_parameters()
+            agent.policy_net.log_named_parameters()
+            logwriter.write(Logger)
        finished = runner.episode_num > config['total_training_episodes']


 if __name__ == "__main__":
-  # Setting up the environment
-  rltorch.set_seed(config['seed'])
-  print("Setting up environment...", end = " ")
-  env = E.TorchWrap(gym.make(config['environment_name']))
-  env.seed(config['seed'])
-  print("Done.")
-      
-  state_size = env.observation_space.shape[0]
-  action_size = env.action_space.n
+    # Setting up the environment
+    rltorch.set_seed(config['seed'])
+    print("Setting up environment...", end=" ")
+    env = E.TorchWrap(gym.make(config['environment_name']))
+    env.seed(config['seed'])
+    print("Done.")

-  # Logging
-  logger = rltorch.log.Logger()
-  logwriter = rltorch.log.LogWriter(SummaryWriter())
-
-  # Setting up the networks
-  device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
-  policy_net = rn.Network(Policy(state_size, action_size), 
-                      torch.optim.Adam, config, device = device, name = "Policy")
-  value_net = rn.Network(Value(state_size), 
-                      torch.optim.Adam, config, device = device, name = "DQN")
-
-
-  # Memory stores experiences for later training
-  memory = M.EpisodeMemory()
-
-  # Actor takes a net and uses it to produce actions from given states
-  actor = StochasticSelector(policy_net, action_size, memory, device = device)
-
-  # Agent is what performs the training
-  agent = rltorch.agents.A2CSingleAgent(policy_net, value_net, memory, config, logger = logger)
-
-  # Runner performs one episode in the environment
-  runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name = "Training", memory = memory, logwriter = logwriter)
+    state_size = env.observation_space.shape[0]
+    action_size = env.action_space.n
+    # Logging
+    logwriter = rltorch.log.LogWriter(SummaryWriter())
+    # Setting up the networks
+    device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
+    policy_net = rn.Network(Policy(state_size, action_size),
+                        torch.optim.Adam, config, device=device, name="Policy")
+    value_net = rn.Network(Value(state_size), 
+                        torch.optim.Adam, config, device=device, name="DQN")
+    # Memory stores experiences for later training
+    memory = M.EpisodeMemory()
+    # Actor takes a net and uses it to produce actions from given states
+    actor = StochasticSelector(policy_net, action_size, memory, device = device)
+    # Agent is what performs the training
+    agent = rltorch.agents.A2CSingleAgent(policy_net, value_net, memory, config)
+    # Runner performs one episode in the environment
+    runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name="Training", memory=memory, logwriter=logwriter)
    
-  print("Training...")
-  train(runner, agent, config, logger = logger, logwriter = logwriter) 
+    print("Training...")
+    train(runner, agent, config, logwriter=logwriter)

-  # For profiling...
-  # import cProfile
-  # cProfile.run('train(runner, agent, config, logger = logger, logwriter = logwriter )')
-  # python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...
+    # For profiling...
+    # import cProfile
+    # cProfile.run('train(runner, agent, config, logwriter = logwriter )')
+    # python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...

-  print("Training Finished.")
-
-  print("Evaluating...")
-  rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], logger = logger, name = "Evaluation")
-  print("Evaulations Done.")
-
-  logwriter.close() # We don't need to write anything out to disk anymore
+    print("Training Finished.") 
+    print("Evaluating...")
+    rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], name="Evaluation")
+    print("Evaulations Done.")
+    logwriter.close() # We don't need to write anything out to disk anymore
--- a/examples/acrobot_es.py
+++ b/examples/acrobot_es.py
@ -9,29 +9,28 @@ import rltorch.memory as M
 import rltorch.env as E
 from rltorch.action_selector import StochasticSelector
 from tensorboardX import SummaryWriter
+from rltorch.log import Logger

 #
 ## Networks
 #
 class Policy(nn.Module):
-  def __init__(self, state_size, action_size):
-    super(Policy, self).__init__()
-    self.state_size = state_size
-    self.action_size = action_size
+    def __init__(self, state_size, action_size):
+        super(Policy, self).__init__()
+        self.state_size = state_size
+        self.action_size = action_size
+        self.fc1 = nn.Linear(state_size, 125)
+        self.fc_norm = nn.LayerNorm(125)

-    self.fc1 = nn.Linear(state_size, 125)
-    self.fc_norm = nn.LayerNorm(125)
-    
-    self.fc2 = nn.Linear(125, 125)
-    self.fc2_norm = nn.LayerNorm(125)
+        self.fc2 = nn.Linear(125, 125)
+        self.fc2_norm = nn.LayerNorm(125)
+        self.action_prob = nn.Linear(125, action_size)

-    self.action_prob = nn.Linear(125, action_size)
-
-  def forward(self, x):
-    x = F.relu(self.fc_norm(self.fc1(x)))
-    x = F.relu(self.fc2_norm(self.fc2(x)))
-    x = F.softmax(self.action_prob(x), dim = 1)
-    return x
+    def forward(self, x):
+        x = F.relu(self.fc_norm(self.fc1(x)))
+        x = F.relu(self.fc2_norm(self.fc2(x)))
+        x = F.softmax(self.action_prob(x), dim = 1)
+        return x

 #
 ## Configuration
@ -50,75 +49,67 @@ config['disable_cuda'] = False
 #
 ## Training Loop
 #
-def train(runner, net, config, logger = None, logwriter = None):
-  finished = False
-  while not finished:
-    runner.run()
-    net.calc_gradients()
-    net.step()
-    if logwriter is not None:
-      net.log_named_parameters()
-      logwriter.write(logger)
-    finished = runner.episode_num > config['total_training_episodes']
+def train(runner, net, config, logwriter=None):
+    finished = False
+    while not finished:
+        runner.run()
+        net.calc_gradients()
+        net.step()
+        if logwriter is not None:
+            net.log_named_parameters()
+            logwriter.write(Logger)
+        finished = runner.episode_num > config['total_training_episodes']
 
 #
 ## Loss function
 #
 def fitness(model):
-  env = gym.make("Acrobot-v1")
-  state = torch.from_numpy(env.reset()).float().unsqueeze(0)
-  total_reward = 0
-  done = False
-  while not done:
-    action_probabilities = model(state)
-    distribution = Categorical(action_probabilities)
-    action = distribution.sample().item()
-    next_state, reward, done, _ = env.step(action)
-    total_reward += reward
-    state = torch.from_numpy(next_state).float().unsqueeze(0)
-  return -total_reward
+    env = gym.make("Acrobot-v1")
+    state = torch.from_numpy(env.reset()).float().unsqueeze(0)
+    total_reward = 0
+    done = False
+    while not done:
+        action_probabilities = model(state)
+        distribution = Categorical(action_probabilities)
+        action = distribution.sample().item()
+        next_state, reward, done, _ = env.step(action)
+        total_reward += reward
+        state = torch.from_numpy(next_state).float().unsqueeze(0)
+    return -total_reward

 if __name__ == "__main__":
-  # Hide internal gym warnings
-  gym.logger.set_level(40)
+    # Hide internal gym warnings
+    gym.logger.set_level(40)

-  # Setting up the environment
-  rltorch.set_seed(config['seed'])
-  print("Setting up environment...", end = " ")
-  env = E.TorchWrap(gym.make(config['environment_name']))
-  env.seed(config['seed'])
-  print("Done.")
-      
-  state_size = env.observation_space.shape[0]
-  action_size = env.action_space.n
+    # Setting up the environment
+    rltorch.set_seed(config['seed'])
+    print("Setting up environment...", end=" ")
+    env = E.TorchWrap(gym.make(config['environment_name']))
+    env.seed(config['seed'])
+    print("Done.")

-  # Logging
-  logger = rltorch.log.Logger()
-  logwriter = rltorch.log.LogWriter(SummaryWriter())
+    state_size = env.observation_space.shape[0]
+    action_size = env.action_space.n

-  # Setting up the networks
-  device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
-  net = rn.ESNetwork(Policy(state_size, action_size), 
-                      torch.optim.Adam, 100, fitness, config, device = device, name = "ES", logger = logger)
+    # Logging
+    logwriter = rltorch.log.LogWriter(SummaryWriter())  
+    # Setting up the networks
+    device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
+    net = rn.ESNetwork(Policy(state_size, action_size),
+                        torch.optim.Adam, 100, fitness, config, device=device, name="ES")  
+    # Actor takes a net and uses it to produce actions from given states
+    actor = StochasticSelector(net, action_size, device=device)   
+    # Runner performs an episode of the environment
+    runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name="Training", logwriter=logwriter)   
+    print("Training...")
+    train(runner, net, config, logwriter=logwriter)  
+    # For profiling...
+    # import cProfile
+    # cProfile.run('train(runner, agent, config, logwriter = logwriter )')
+    # python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...
+    print("Training Finished.") 
+    print("Evaluating...")
+    rltorch.env.simulateEnvEps(env, actor, config, total_episodes=config['total_evaluation_episodes'], name="Evaluation")
+    print("Evaulations Done.")

-  # Actor takes a net and uses it to produce actions from given states
-  actor = StochasticSelector(net, action_size, device = device)
-
-  # Runner performs an episode of the environment
-  runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name = "Training", logwriter = logwriter)
-
-  print("Training...")
-  train(runner, net, config, logger = logger, logwriter = logwriter) 
-
-  # For profiling...
-  # import cProfile
-  # cProfile.run('train(runner, agent, config, logger = logger, logwriter = logwriter )')
-  # python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...
-
-  print("Training Finished.")
-
-  print("Evaluating...")
-  rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], logger = logger, name = "Evaluation")
-  print("Evaulations Done.")
-
-  logwriter.close() # We don't need to write anything out to disk anymore
+    logwriter.close() # We don't need to write anything out to disk anymore
--- a/examples/acrobot_ppo.py
+++ b/examples/acrobot_ppo.py
@ -8,48 +8,49 @@ import rltorch.memory as M
 import rltorch.env as E
 from rltorch.action_selector import StochasticSelector
 from tensorboardX import SummaryWriter
+from rltorch.log import Logger

 #
 ## Networks
 #
 class Value(nn.Module):
-  def __init__(self, state_size):
-    super(Value, self).__init__()
-    self.state_size = state_size
+    def __init__(self, state_size):
+        super(Value, self).__init__()
+        self.state_size = state_size

-    self.fc1 = rn.NoisyLinear(state_size, 64)
-    self.fc_norm = nn.LayerNorm(64)
+        self.fc1 = rn.NoisyLinear(state_size, 64)
+        self.fc_norm = nn.LayerNorm(64)

-    self.fc2 = rn.NoisyLinear(64, 64)
-    self.fc2_norm = nn.LayerNorm(64)
+        self.fc2 = rn.NoisyLinear(64, 64)
+        self.fc2_norm = nn.LayerNorm(64)

-    self.fc3 = rn.NoisyLinear(64, 1)
+        self.fc3 = rn.NoisyLinear(64, 1)

-  def forward(self, x):
-    x = F.relu(self.fc_norm(self.fc1(x)))
-    x = F.relu(self.fc2_norm(self.fc2(x)))
-    x = self.fc3(x)
-    return x
+    def forward(self, x):
+        x = F.relu(self.fc_norm(self.fc1(x)))
+        x = F.relu(self.fc2_norm(self.fc2(x)))
+        x = self.fc3(x)
+        return x

 class Policy(nn.Module):
-  def __init__(self, state_size, action_size):
-    super(Policy, self).__init__()
-    self.state_size = state_size
-    self.action_size = action_size
+    def __init__(self, state_size, action_size):
+        super(Policy, self).__init__()
+        self.state_size = state_size
+        self.action_size = action_size

-    self.fc1 = rn.NoisyLinear(state_size, 64)
-    self.fc_norm = nn.LayerNorm(64)
+        self.fc1 = rn.NoisyLinear(state_size, 64)
+        self.fc_norm = nn.LayerNorm(64)

-    self.fc2 = rn.NoisyLinear(64, 64)
-    self.fc2_norm = nn.LayerNorm(64)
+        self.fc2 = rn.NoisyLinear(64, 64)
+        self.fc2_norm = nn.LayerNorm(64)

-    self.fc3 = rn.NoisyLinear(64, action_size)
+        self.fc3 = rn.NoisyLinear(64, action_size)

-  def forward(self, x):
-    x = F.relu(self.fc_norm(self.fc1(x)))
-    x = F.relu(self.fc2_norm(self.fc2(x)))
-    x = F.softmax(self.fc3(x), dim = 1)
-    return x
+    def forward(self, x):
+        x = F.relu(self.fc_norm(self.fc1(x)))
+        x = F.relu(self.fc2_norm(self.fc2(x)))
+        x = F.softmax(self.fc3(x), dim = 1)
+        return x

 #
 ## Configuration
@ -68,64 +69,63 @@ config['disable_cuda'] = False
 #
 ## Training Loop
 #
-def train(runner, agent, config, logger = None, logwriter = None):
+def train(runner, agent, config, logwriter = None):
    finished = False
    while not finished:
        runner.run()
        agent.learn()
        if logwriter is not None:
-          agent.value_net.log_named_parameters()
-          agent.policy_net.log_named_parameters()
-          logwriter.write(logger)
+            agent.value_net.log_named_parameters()
+            agent.policy_net.log_named_parameters()
+            logwriter.write(Logger)
        finished = runner.episode_num > config['total_training_episodes']

 if __name__ == "__main__":
-  # Setting up the environment
-  rltorch.set_seed(config['seed'])
-  print("Setting up environment...", end = " ")
-  env = E.TorchWrap(gym.make(config['environment_name']))
-  env.seed(config['seed'])
-  print("Done.")
+    # Setting up the environment
+    rltorch.set_seed(config['seed'])
+    print("Setting up environment...", end=" ")
+    env = E.TorchWrap(gym.make(config['environment_name']))
+    env.seed(config['seed'])
+    print("Done.")
      
-  state_size = env.observation_space.shape[0]
-  action_size = env.action_space.n
+    state_size = env.observation_space.shape[0]
+    action_size = env.action_space.n

-  # Logging
-  logger = rltorch.log.Logger()
-  logwriter = rltorch.log.LogWriter(SummaryWriter())
+    # Logging
+    logwriter = rltorch.log.LogWriter(SummaryWriter())

-  # Setting up the networks
-  device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
-  policy_net = rn.Network(Policy(state_size, action_size), 
-                      torch.optim.Adam, config, device = device, name = "Policy")
-  value_net = rn.Network(Value(state_size), 
-                      torch.optim.Adam, config, device = device, name = "DQN")
+    # Setting up the networks
+    device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
+    policy_net = rn.Network(Policy(state_size, action_size), 
+                      torch.optim.Adam, config, device=device, name="Policy")
+    value_net = rn.Network(Value(state_size), 
+                      torch.optim.Adam, config, device=device, name="DQN")


-  # Memory stores experiences for later training
-  memory = M.EpisodeMemory()
+    # Memory stores experiences for later training
+    memory = M.EpisodeMemory()

-  # Actor takes a net and uses it to produce actions from given states
-  actor = StochasticSelector(policy_net, action_size, memory, device = device)
+    # Actor takes a net and uses it to produce actions from given states
+    actor = StochasticSelector(policy_net, action_size, memory, device=device)

-  # Agent is what performs the training
-  agent = rltorch.agents.PPOAgent(policy_net, value_net, memory, config, logger = logger)
+    # Agent is what performs the training
+    agent = rltorch.agents.PPOAgent(policy_net, value_net, memory, config)

-  # Runner performs a certain number of steps in the environment
-  runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name = "Training", memory = memory, logwriter = logwriter)
+    # Runner performs a certain number of steps in the environment
+    runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name="Training", memory=memory, logwriter=logwriter)
    
-  print("Training...")
-  train(runner, agent, config, logger = logger, logwriter = logwriter) 
+    print("Training...")
+    train(runner, agent, config, logwriter=logwriter) 

  # For profiling...
  # import cProfile
-  # cProfile.run('train(runner, agent, config, logger = logger, logwriter = logwriter )')
+  # cProfile.run('train(runner, agent, config, logwriter = logwriter )')
  # python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...

-  print("Training Finished.")
+    print("Training Finished.")

-  print("Evaluating...")
-  rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], logger = logger, name = "Evaluation")
-  print("Evaulations Done.")
+    print("Evaluating...")
+    rltorch.env.simulateEnvEps(env, actor, config, total_episodes=config['total_evaluation_episodes'], name="Evaluation")
+    print("Evaulations Done.")

-  logwriter.close() # We don't need to write anything out to disk anymore
+    logwriter.close() # We don't need to write anything out to disk anymore
--- a/examples/acrobot_qep.py
+++ b/examples/acrobot_qep.py
@ -7,61 +7,62 @@ import rltorch.network as rn
 import rltorch.memory as M
 import rltorch.env as E
 from rltorch.action_selector import StochasticSelector
-from tensorboardX import SummaryWriter
+# from tensorboardX import SummaryWriter
 from copy import deepcopy
+from rltorch.log import Logger

 #
 ## Networks
 #
 class Value(nn.Module):
-  def __init__(self, state_size, action_size):
-    super(Value, self).__init__()
-    self.state_size = state_size
-    self.action_size = action_size
+    def __init__(self, state_size, action_size):
+        super(Value, self).__init__()
+        self.state_size = state_size
+        self.action_size = action_size

-    self.fc1 = rn.NoisyLinear(state_size, 255)
-    self.fc_norm = nn.LayerNorm(255)
+        self.fc1 = rn.NoisyLinear(state_size, 255)
+        self.fc_norm = nn.LayerNorm(255)
    
-    self.value_fc = rn.NoisyLinear(255, 255)
-    self.value_fc_norm = nn.LayerNorm(255)
-    self.value = rn.NoisyLinear(255, 1)
-    
-    self.advantage_fc = rn.NoisyLinear(255, 255)
-    self.advantage_fc_norm = nn.LayerNorm(255)
-    self.advantage = rn.NoisyLinear(255, action_size)
+        self.value_fc = rn.NoisyLinear(255, 255)
+        self.value_fc_norm = nn.LayerNorm(255)
+        self.value = rn.NoisyLinear(255, 1)

-  def forward(self, x):
-    x = F.relu(self.fc_norm(self.fc1(x)))
+        self.advantage_fc = rn.NoisyLinear(255, 255)
+        self.advantage_fc_norm = nn.LayerNorm(255)
+        self.advantage = rn.NoisyLinear(255, action_size)
+
+    def forward(self, x):
+        x = F.relu(self.fc_norm(self.fc1(x)))
    
-    state_value = F.relu(self.value_fc_norm(self.value_fc(x)))
-    state_value = self.value(state_value)
+        state_value = F.relu(self.value_fc_norm(self.value_fc(x)))
+        state_value = self.value(state_value)
    
-    advantage = F.relu(self.advantage_fc_norm(self.advantage_fc(x)))
-    advantage = self.advantage(advantage)
+        advantage = F.relu(self.advantage_fc_norm(self.advantage_fc(x)))
+        advantage = self.advantage(advantage)
    
-    x = state_value + advantage - advantage.mean()
-    return x
+        x = state_value + advantage - advantage.mean()
+        return x


 class Policy(nn.Module):
-  def __init__(self, state_size, action_size):
-    super(Policy, self).__init__()
-    self.state_size = state_size
-    self.action_size = action_size
+    def __init__(self, state_size, action_size):
+        super(Policy, self).__init__()
+        self.state_size = state_size
+        self.action_size = action_size

-    self.fc1 = nn.Linear(state_size, 125)
-    self.fc_norm = nn.LayerNorm(125)
+        self.fc1 = nn.Linear(state_size, 125)
+        self.fc_norm = nn.LayerNorm(125)
    
-    self.fc2 = nn.Linear(125, 125)
-    self.fc2_norm = nn.LayerNorm(125)
+        self.fc2 = nn.Linear(125, 125)
+        self.fc2_norm = nn.LayerNorm(125)

-    self.action_prob = nn.Linear(125, action_size)
+        self.action_prob = nn.Linear(125, action_size)

-  def forward(self, x):
-    x = F.relu(self.fc_norm(self.fc1(x)))
-    x = F.relu(self.fc2_norm(self.fc2(x)))
-    x = F.softmax(self.action_prob(x), dim = 1)
-    return x
+    def forward(self, x):
+        x = F.relu(self.fc_norm(self.fc1(x)))
+        x = F.relu(self.fc2_norm(self.fc2(x)))
+        x = F.softmax(self.action_prob(x), dim = 1)
+        return x

 #
 ## Configuration
@ -94,70 +95,70 @@ config['prioritized_replay_weight_importance'] = rltorch.scheduler.ExponentialSc
 #
 ## Training Loop
 #
-def train(runner, agent, config, logger = None, logwriter = None):
+def train(runner, agent, config, logwriter=None):
    finished = False
    last_episode_num = 1
    while not finished:
        runner.run(config['replay_skip'] + 1)
        agent.learn()
        if logwriter is not None:
-          if last_episode_num < runner.episode_num:
-            last_episode_num = runner.episode_num
-            agent.value_net.log_named_parameters()
-            agent.policy_net.log_named_parameters()
-          logwriter.write(logger)
+            if last_episode_num < runner.episode_num:
+                last_episode_num = runner.episode_num
+                agent.value_net.log_named_parameters()
+                agent.policy_net.log_named_parameters()
+            logwriter.write(Logger)
        finished = runner.episode_num > config['total_training_episodes']


 if __name__ == "__main__":
-  # Setting up the environment
-  rltorch.set_seed(config['seed'])
-  print("Setting up environment...", end = " ")
-  env = E.TorchWrap(gym.make(config['environment_name']))
-  env.seed(config['seed'])
-  print("Done.")
+    # Setting up the environment
+    rltorch.set_seed(config['seed'])
+    print("Setting up environment...", end = " ")
+    env = E.TorchWrap(gym.make(config['environment_name']))
+    env.seed(config['seed'])
+    print("Done.")
      
-  state_size = env.observation_space.shape[0]
-  action_size = env.action_space.n
+    state_size = env.observation_space.shape[0]
+    action_size = env.action_space.n

-  # Logging
-  logger = rltorch.log.Logger()
-  logwriter = rltorch.log.LogWriter(SummaryWriter())
+    # Logging
+    logwriter = None
+    # logwriter = rltorch.log.LogWriter(SummaryWriter())

-  # Setting up the networks
-  device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
-  config2 = deepcopy(config)
-  config2['learning_rate'] = 0.01
-  policy_net = rn.ESNetwork(Policy(state_size, action_size), 
-                      torch.optim.Adam, 500, None, config2, sigma = 0.1, device = device, name = "ES", logger = logger)
-  value_net = rn.Network(Value(state_size, action_size), 
-                      torch.optim.Adam, config, device = device, name = "DQN", logger = logger)
-  target_net = rn.TargetNetwork(value_net, device = device)
+    # Setting up the networks
+    device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
+    config2 = deepcopy(config)
+    config2['learning_rate'] = 0.01
+    policy_net = rn.ESNetwork(Policy(state_size, action_size),
+                        torch.optim.Adam, 500, None, config2, sigma=0.1, device=device, name="ES")
+    value_net = rn.Network(Value(state_size, action_size),
+                        torch.optim.Adam, config, device=device, name="DQN")
+    target_net = rn.TargetNetwork(value_net, device=device)

-  # Actor takes a net and uses it to produce actions from given states
-  actor = StochasticSelector(policy_net, action_size, device = device)
+    # Actor takes a net and uses it to produce actions from given states
+    actor = StochasticSelector(policy_net, action_size, device=device)

-  # Memory stores experiences for later training
-  memory = M.PrioritizedReplayMemory(capacity = config['memory_size'], alpha = config['prioritized_replay_sampling_priority'])
+    # Memory stores experiences for later training
+    memory = M.PrioritizedReplayMemory(capacity=config['memory_size'], alpha=config['prioritized_replay_sampling_priority'])

-  # Runner performs a certain number of steps in the environment
-  runner = rltorch.env.EnvironmentRunSync(env, actor, config, name = "Training", memory = memory, logwriter = logwriter)
+    # Runner performs a certain number of steps in the environment
+    runner = rltorch.env.EnvironmentRunSync(env, actor, config, name="Training", memory=memory, logwriter=logwriter)

-  # Agent is what performs the training
-  agent = rltorch.agents.QEPAgent(policy_net, value_net, memory, config, target_value_net = target_net, logger = logger)
+    # Agent is what performs the training
+    agent = rltorch.agents.QEPAgent(policy_net, value_net, memory, config, target_value_net=target_net)

-  print("Training...")
-  train(runner, agent, config, logger = logger, logwriter = logwriter) 
+    print("Training...")
+    train(runner, agent, config, logwriter=logwriter)

  # For profiling...
  # import cProfile
-  # cProfile.run('train(runner, agent, config, logger = logger, logwriter = logwriter )')
+  # cProfile.run('train(runner, agent, config, logwriter = logwriter )')
  # python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...

-  print("Training Finished.")
+    print("Training Finished.")

-  print("Evaluating...")
-  rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], logger = logger, name = "Evaluation")
-  print("Evaulations Done.")
+    print("Evaluating...")
+    rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], name="Evaluation")
+    print("Evaulations Done.")

-  logwriter.close() # We don't need to write anything out to disk anymore
+    # logwriter.close() # We don't need to write anything out to disk anymore
--- a/examples/acrobot_reinforce.py
+++ b/examples/acrobot_reinforce.py
@ -7,30 +7,30 @@ import rltorch.network as rn
 import rltorch.memory as M
 import rltorch.env as E
 from rltorch.action_selector import StochasticSelector
-from tensorboardX import SummaryWriter
+from rltorch.log import Logger

 #
 ## Networks
 #
 class Policy(nn.Module):
-  def __init__(self, state_size, action_size):
-    super(Policy, self).__init__()
-    self.state_size = state_size
-    self.action_size = action_size
+    def __init__(self, state_size, action_size):
+        super(Policy, self).__init__()
+        self.state_size = state_size
+        self.action_size = action_size

-    self.fc1 = rn.NoisyLinear(state_size, 64)
-    self.fc_norm = nn.LayerNorm(64)
+        self.fc1 = rn.NoisyLinear(state_size, 64)
+        self.fc_norm = nn.LayerNorm(64)

-    self.fc2 = rn.NoisyLinear(64, 64)
-    self.fc2_norm = nn.LayerNorm(64)
+        self.fc2 = rn.NoisyLinear(64, 64)
+        self.fc2_norm = nn.LayerNorm(64)

-    self.fc3 = rn.NoisyLinear(64, action_size)
+        self.fc3 = rn.NoisyLinear(64, action_size)

-  def forward(self, x):
-    x = F.relu(self.fc_norm(self.fc1(x)))
-    x = F.relu(self.fc2_norm(self.fc2(x)))
-    x = F.softmax(self.fc3(x), dim = 1)
-    return x
+    def forward(self, x):
+        x = F.relu(self.fc_norm(self.fc1(x)))
+        x = F.relu(self.fc2_norm(self.fc2(x)))
+        x = F.softmax(self.fc3(x), dim=1)
+        return x

 #
 ## Configuration
@ -49,65 +49,65 @@ config['disable_cuda'] = False
 #
 ## Training Loop
 #
-def train(runner, agent, config, logger = None, logwriter = None):
-  finished = False
-  while not finished:
-    runner.run()
-    agent.learn()
-    # When the episode number changes, log network paramters
-    if logwriter is not None:
-      agent.net.log_named_parameters()
-      logwriter.write(logger)
-    finished = runner.episode_num > config['total_training_episodes']
+def train(runner, agent, config, logwriter=None):
+    finished = False
+    while not finished:
+        runner.run()
+        agent.learn()
+        # When the episode number changes, log network paramters
+        if logwriter is not None:
+            agent.net.log_named_parameters()
+            logwriter.write(Logger)
+        finished = runner.episode_num > config['total_training_episodes']


 if __name__ == "__main__":
-  torch.multiprocessing.set_sharing_strategy('file_system') # To not hit file descriptor memory limit
+    torch.multiprocessing.set_sharing_strategy('file_system') # To not hit file descriptor memory limit

-  # Setting up the environment
-  rltorch.set_seed(config['seed'])
-  print("Setting up environment...", end = " ")
-  env = E.TorchWrap(gym.make(config['environment_name']))
-  env.seed(config['seed'])
-  print("Done.")
+    # Setting up the environment
+    rltorch.set_seed(config['seed'])
+    print("Setting up environment...", end=" ")
+    env = E.TorchWrap(gym.make(config['environment_name']))
+    env.seed(config['seed'])
+    print("Done.")
      
-  state_size = env.observation_space.shape[0]
-  action_size = env.action_space.n
+    state_size = env.observation_space.shape[0]
+    action_size = env.action_space.n

-  # Logging
-  logger = rltorch.log.Logger()
-  logwriter = rltorch.log.LogWriter(SummaryWriter())
+    # Logging
+    logwriter = None
+    # logwriter = rltorch.log.LogWriter(SummaryWriter())

-  # Setting up the networks
-  device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
-  net = rn.Network(Policy(state_size, action_size), 
-                      torch.optim.Adam, config, device = device, name = "DQN")
-  target_net = rn.TargetNetwork(net, device = device)
+    # Setting up the networks
+    device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
+    net = rn.Network(Policy(state_size, action_size),
+                      torch.optim.Adam, config, device=device, name="DQN")
+    target_net = rn.TargetNetwork(net, device=device)

-  # Memory stores experiences for later training
-  memory = M.EpisodeMemory()
+    # Memory stores experiences for later training
+    memory = M.EpisodeMemory()

-  # Actor takes a net and uses it to produce actions from given states
-  actor = StochasticSelector(net, action_size, memory, device = device)
+    # Actor takes a net and uses it to produce actions from given states
+    actor = StochasticSelector(net, action_size, memory, device=device)

-  # Agent is what performs the training
-  agent = rltorch.agents.REINFORCEAgent(net, memory, config, target_net = target_net, logger = logger)
+    # Agent is what performs the training
+    agent = rltorch.agents.REINFORCEAgent(net, memory, config, target_net=target_net)
    
-  # Runner performs one episode in the environment
-  runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name = "Training", memory = memory, logwriter = logwriter)
+    # Runner performs one episode in the environment
+    runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name="Training", memory=memory, logwriter=logwriter)

-  print("Training...")
-  train(runner, agent, config, logger = logger, logwriter = logwriter) 
+    print("Training...")
+    train(runner, agent, config, logwriter=logwriter)

  # For profiling...
  # import cProfile
-  # cProfile.run('train(runner, agent, config, logger = logger, logwriter = logwriter )')
+  # cProfile.run('train(runner, agent, config,  logwriter = logwriter )')
  # python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...

-  print("Training Finished.")
+    print("Training Finished.")

-  print("Evaluating...")
-  rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], logger = logger, name = "Evaluation")
-  print("Evaulations Done.")
+    print("Evaluating...")
+    rltorch.env.simulateEnvEps(env, actor, config, total_episodes=config['total_evaluation_episodes'], name="Evaluation")
+    print("Evaulations Done.")

-  logwriter.close() # We don't need to write anything out to disk anymore
+    # logwriter.close() # We don't need to write anything out to disk anymore
--- a/examples/acrobot_single_process_dqn.py
+++ b/examples/acrobot_single_process_dqn.py
@ -7,39 +7,39 @@ import rltorch.network as rn
 import rltorch.memory as M
 import rltorch.env as E
 from rltorch.action_selector import ArgMaxSelector
-from tensorboardX import SummaryWriter
+from rltorch.log import Logger

 #
 ## Networks
 #
 class Value(nn.Module):
-  def __init__(self, state_size, action_size):
-    super(Value, self).__init__()
-    self.state_size = state_size
-    self.action_size = action_size
+    def __init__(self, state_size, action_size):
+        super(Value, self).__init__()
+        self.state_size = state_size
+        self.action_size = action_size

-    self.fc1 = rn.NoisyLinear(state_size, 255)
-    self.fc_norm = nn.LayerNorm(255)
+        self.fc1 = rn.NoisyLinear(state_size, 255)
+        self.fc_norm = nn.LayerNorm(255)
    
-    self.value_fc = rn.NoisyLinear(255, 255)
-    self.value_fc_norm = nn.LayerNorm(255)
-    self.value = rn.NoisyLinear(255, 1)
+        self.value_fc = rn.NoisyLinear(255, 255)
+        self.value_fc_norm = nn.LayerNorm(255)
+        self.value = rn.NoisyLinear(255, 1)
    
-    self.advantage_fc = rn.NoisyLinear(255, 255)
-    self.advantage_fc_norm = nn.LayerNorm(255)
-    self.advantage = rn.NoisyLinear(255, action_size)
+        self.advantage_fc = rn.NoisyLinear(255, 255)
+        self.advantage_fc_norm = nn.LayerNorm(255)
+        self.advantage = rn.NoisyLinear(255, action_size)

-  def forward(self, x):
-    x = F.relu(self.fc_norm(self.fc1(x)))
+    def forward(self, x):
+        x = F.relu(self.fc_norm(self.fc1(x)))
    
-    state_value = F.relu(self.value_fc_norm(self.value_fc(x)))
-    state_value = self.value(state_value)
+        state_value = F.relu(self.value_fc_norm(self.value_fc(x)))
+        state_value = self.value(state_value)
    
-    advantage = F.relu(self.advantage_fc_norm(self.advantage_fc(x)))
-    advantage = self.advantage(advantage)
+        advantage = F.relu(self.advantage_fc_norm(self.advantage_fc(x)))
+        advantage = self.advantage(advantage)
    
-    x = state_value + advantage - advantage.mean()
-    return x
+        x = state_value + advantage - advantage.mean()
+        return x

 #
 ## Configuration
@ -71,7 +71,7 @@ config['prioritized_replay_weight_importance'] = rltorch.scheduler.ExponentialSc
 #
 ## Training Loop
 #
-def train(runner, agent, config, logger = None, logwriter = None):
+def train(runner, agent, config, logwriter=None):
    finished = False
    last_episode_num = 1
    while not finished:
@ -79,56 +79,56 @@ def train(runner, agent, config, logger = None, logwriter = None):
        agent.learn()
        if logwriter is not None:
          if last_episode_num < runner.episode_num:
-            last_episode_num = runner.episode_num
-            agent.net.log_named_parameters()
-          logwriter.write(logger)
+                last_episode_num = runner.episode_num
+                agent.net.log_named_parameters()
+          logwriter.write(Logger)
        finished = runner.episode_num > config['total_training_episodes']


 if __name__ == "__main__":
-  # Setting up the environment
-  rltorch.set_seed(config['seed'])
-  print("Setting up environment...", end = " ")
-  env = E.TorchWrap(gym.make(config['environment_name']))
-  env.seed(config['seed'])
-  print("Done.")
+    # Setting up the environment
+    rltorch.set_seed(config['seed'])
+    print("Setting up environment...", end=" ")
+    env = E.TorchWrap(gym.make(config['environment_name']))
+    env.seed(config['seed'])
+    print("Done.")
      
-  state_size = env.observation_space.shape[0]
-  action_size = env.action_space.n
+    state_size = env.observation_space.shape[0]
+    action_size = env.action_space.n

-  # Logging
-  logger = rltorch.log.Logger()
-  logwriter = rltorch.log.LogWriter(SummaryWriter())
+    # Logging
+    logwriter = None
+    # logwriter = rltorch.log.LogWriter(SummaryWriter())

-  # Setting up the networks
-  device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
-  net = rn.Network(Value(state_size, action_size), 
-                      torch.optim.Adam, config, device = device, name = "DQN", logger = logger)
-  target_net = rn.TargetNetwork(net, device = device)
+    # Setting up the networks
+    device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
+    net = rn.Network(Value(state_size, action_size), 
+                        torch.optim.Adam, config, device=device, name="DQN")
+    target_net = rn.TargetNetwork(net, device=device)

-  # Actor takes a net and uses it to produce actions from given states
-  actor = ArgMaxSelector(net, action_size, device = device)
-  # Memory stores experiences for later training
-  memory = M.PrioritizedReplayMemory(capacity = config['memory_size'], alpha = config['prioritized_replay_sampling_priority'])
+    # Actor takes a net and uses it to produce actions from given states
+    actor = ArgMaxSelector(net, action_size, device=device)
+    # Memory stores experiences for later training
+    memory = M.PrioritizedReplayMemory(capacity=config['memory_size'], alpha=config['prioritized_replay_sampling_priority'])

-  # Runner performs a certain number of steps in the environment
-  runner = rltorch.env.EnvironmentRunSync(env, actor, config, name = "Training", memory = memory, logwriter = logwriter)
+    # Runner performs a certain number of steps in the environment
+    runner = rltorch.env.EnvironmentRunSync(env, actor, config, name="Training", memory=memory, logwriter=logwriter)

-  # Agent is what performs the training
-  agent = rltorch.agents.DQNAgent(net, memory, config, target_net = target_net, logger = logger)
+    # Agent is what performs the training
+    agent = rltorch.agents.DQNAgent(net, memory, config, target_net=target_net)
    
-  print("Training...")
-  train(runner, agent, config, logger = logger, logwriter = logwriter) 
+    print("Training...")
+    train(runner, agent, config, logwriter=logwriter) 

  # For profiling...
  # import cProfile
-  # cProfile.run('train(runner, agent, config, logger = logger, logwriter = logwriter )')
+  # cProfile.run('train(runner, agent, config, logwriter = logwriter )')
  # python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...

-  print("Training Finished.")
+    print("Training Finished.")

-  print("Evaluating...")
-  rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], logger = logger, name = "Evaluation")
-  print("Evaulations Done.")
+    print("Evaluating...")
+    rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], name = "Evaluation")
+    print("Evaulations Done.")

-  logwriter.close() # We don't need to write anything out to disk anymore
+    logwriter.close() # We don't need to write anything out to disk anymore
--- a/examples/pong_mp_dqn.py
+++ b/examples/pong_mp_dqn.py
@ -9,58 +9,59 @@ import rltorch.env as E
 from rltorch.action_selector import ArgMaxSelector
 from tensorboardX import SummaryWriter
 import torch.multiprocessing as mp
+from rltorch.log import Logger

 #
 ## Networks
 #
 class Value(nn.Module):
-  def __init__(self, state_size, action_size):
-    super(Value, self).__init__()
-    self.state_size = state_size
-    self.action_size = action_size
+    def __init__(self, state_size, action_size):
+        super(Value, self).__init__()
+        self.state_size = state_size
+        self.action_size = action_size
    
-    self.conv1 = nn.Conv2d(4, 32, kernel_size = (8, 8), stride = (4, 4))
-    self.conv_norm1 = nn.LayerNorm([32, 19, 19])
-    self.conv2 = nn.Conv2d(32, 64, kernel_size = (4, 4), stride = (2, 2))    
-    self.conv_norm2 = nn.LayerNorm([64, 8, 8])
-    self.conv3 = nn.Conv2d(64, 64, kernel_size = (3, 3), stride = (1, 1))
-    self.conv_norm3 = nn.LayerNorm([64, 6, 6])
+        self.conv1 = nn.Conv2d(4, 32, kernel_size=(8, 8), stride=(4, 4))
+        self.conv_norm1 = nn.LayerNorm([32, 19, 19])
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))    
+        self.conv_norm2 = nn.LayerNorm([64, 8, 8])
+        self.conv3 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
+        self.conv_norm3 = nn.LayerNorm([64, 6, 6])
    
-    self.fc1 = rn.NoisyLinear(64 * 6 * 6, 384)
-    self.fc_norm = nn.LayerNorm(384)
+        self.fc1 = rn.NoisyLinear(64 * 6 * 6, 384)
+        self.fc_norm = nn.LayerNorm(384)
    
-    self.value_fc = rn.NoisyLinear(384, 384)
-    self.value_fc_norm = nn.LayerNorm(384)
-    self.value = rn.NoisyLinear(384, 1)
+        self.value_fc = rn.NoisyLinear(384, 384)
+        self.value_fc_norm = nn.LayerNorm(384)
+        self.value = rn.NoisyLinear(384, 1)
    
-    self.advantage_fc = rn.NoisyLinear(384, 384)
-    self.advantage_fc_norm = nn.LayerNorm(384)
-    self.advantage = rn.NoisyLinear(384, action_size)
+        self.advantage_fc = rn.NoisyLinear(384, 384)
+        self.advantage_fc_norm = nn.LayerNorm(384)
+        self.advantage = rn.NoisyLinear(384, action_size)

  
-  def forward(self, x):
-    x = F.relu(self.conv_norm1(self.conv1(x)))
-    x = F.relu(self.conv_norm2(self.conv2(x)))
-    x = F.relu(self.conv_norm3(self.conv3(x)))
+    def forward(self, x):
+        x = F.relu(self.conv_norm1(self.conv1(x)))
+        x = F.relu(self.conv_norm2(self.conv2(x)))
+        x = F.relu(self.conv_norm3(self.conv3(x)))
    
-    # Makes batch_size dimension again
-    x = x.view(-1, 64 * 6 * 6)
-    x = F.relu(self.fc_norm(self.fc1(x)))
+        # Makes batch_size dimension again
+        x = x.view(-1, 64 * 6 * 6)
+        x = F.relu(self.fc_norm(self.fc1(x)))
    
-    state_value = F.relu(self.value_fc_norm(self.value_fc(x)))
-    state_value = self.value(state_value)
+        state_value = F.relu(self.value_fc_norm(self.value_fc(x)))
+        state_value = self.value(state_value)
    
-    advantage = F.relu(self.advantage_fc_norm(self.advantage_fc(x)))
-    advantage = self.advantage(advantage)
+        advantage = F.relu(self.advantage_fc_norm(self.advantage_fc(x)))
+        advantage = self.advantage(advantage)
    
-    x = state_value + advantage - advantage.mean()
+        x = state_value + advantage - advantage.mean()
    
-    # For debugging purposes...
-    if torch.isnan(x).any().item():
-      print("WARNING NAN IN MODEL DETECTED")
-    
-    return x
+        # For debugging purposes...
+        if torch.isnan(x).any().item():
+            print("WARNING NAN IN MODEL DETECTED")
    
+        return x
+
 #
 ## Configuration
 #
@ -89,59 +90,73 @@ config['prioritized_replay_sampling_priority'] = 0.6
 # Should ideally start from 0 and move your way to 1 to prevent overfitting
 config['prioritized_replay_weight_importance'] = rltorch.scheduler.ExponentialScheduler(initial_value = 0.4, end_value = 1, iterations = 5000)

+#
+## Training Loop
+#
+def train(runner, agent, config, logwriter = None):
+    finished = False
+    while not finished:
+        runner.run()
+        agent.learn()
+        if logwriter is not None:
+            agent.value_net.log_named_parameters()
+            agent.policy_net.log_named_parameters()
+            logwriter.write(Logger)
+        finished = runner.episode_num > config['total_training_episodes']
+
+
 if __name__ == "__main__":
-  # To not hit file descriptor memory limit
-  torch.multiprocessing.set_sharing_strategy('file_system') 
+    # To not hit file descriptor memory limit
+    torch.multiprocessing.set_sharing_strategy('file_system') 

-  # Setting up the environment
-  rltorch.set_seed(config['seed'])
-  print("Setting up environment...", end = " ")
-  env = E.FrameStack(E.TorchWrap(
-    E.ProcessFrame(E.FireResetEnv(gym.make(config['environment_name'])), 
-      resize_shape = (80, 80), crop_bounds = [34, 194, 15, 145], grayscale = True))
-  , 4)
-  env.seed(config['seed'])
-  print("Done.")
+    # Setting up the environment
+    rltorch.set_seed(config['seed'])
+    print("Setting up environment...", end = " ")
+    env = E.FrameStack(E.TorchWrap(
+        E.ProcessFrame(E.FireResetEnv(gym.make(config['environment_name'])), 
+        resize_shape=(80, 80), crop_bounds=[34, 194, 15, 145], grayscale=True))
+    , 4)
+    env.seed(config['seed'])
+    print("Done.")
      
-  state_size = env.observation_space.shape[0]
-  action_size = env.action_space.n
+    state_size = env.observation_space.shape[0]
+    action_size = env.action_space.n

-  # Logging
-  logger = rltorch.log.Logger()
-  logwriter = rltorch.log.LogWriter(SummaryWriter())
+    # Logging
+    logwriter = rltorch.log.LogWriter(SummaryWriter())

-  # Setting up the networks
-  device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
-  net = rn.Network(Value(state_size, action_size), 
-                      torch.optim.Adam, config, device = device, name = "DQN")
-  target_net = rn.TargetNetwork(net, device = device)
-  net.model.share_memory()
-  target_net.model.share_memory()
+    # Setting up the networks
+    device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
+    net = rn.Network(Value(state_size, action_size), 
+        torch.optim.Adam, config, device=device, name="DQN")
+    target_net = rn.TargetNetwork(net, device=device)
+    net.model.share_memory()
+    target_net.model.share_memory()

-  # Actor takes a net and uses it to produce actions from given states
-  actor = ArgMaxSelector(net, action_size, device = device)
-  # Memory stores experiences for later training
-  memory = M.PrioritizedReplayMemory(capacity = config['memory_size'], alpha = config['prioritized_replay_sampling_priority'])
+    # Actor takes a net and uses it to produce actions from given states
+    actor = ArgMaxSelector(net, action_size, device=device)
+    # Memory stores experiences for later training
+    memory = M.PrioritizedReplayMemory(capacity=config['memory_size'], alpha=config['prioritized_replay_sampling_priority'])

-  # Runner performs a certain number of steps in the environment
-  runner = rltorch.mp.EnvironmentRun(env, actor, config, name = "Training", memory = memory, logwriter = logwriter)
+    # Runner performs a certain number of steps in the environment
+    runner = rltorch.mp.EnvironmentRun(env, actor, config, name="Training", memory=memory, logwriter=logwriter)

-  # Agent is what performs the training
-  agent = rltorch.agents.DQNAgent(net, memory, config, target_net = target_net, logger = logger)
+    # Agent is what performs the training
+    agent = rltorch.agents.DQNAgent(net, memory, config, target_net=target_net)
    
-  print("Training...")
-  train(runner, agent, config, logger = logger, logwriter = logwriter) 
+    print("Training...")
+    train(runner, agent, config, logwriter=logwriter)

  # For profiling...
  # import cProfile
-  # cProfile.run('train(runner, agent, config, logger = logger, logwriter = logwriter )')
+  # cProfile.run('train(runner, agent, config, logwriter = logwriter )')
  # python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...

-  print("Training Finished.")
-  runner.terminate() # We don't need the extra process anymore
+    print("Training Finished.")
+    runner.terminate() # We don't need the extra process anymore

-  print("Evaluating...")
-  rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], logger = logger, name = "Evaluation")
-  print("Evaulations Done.")
+    print("Evaluating...")
+    rltorch.env.simulateEnvEps(env, actor, config, total_episodes=config['total_evaluation_episodes'], name="Evaluation")
+    print("Evaulations Done.")

-  logwriter.close() # We don't need to write anything out to disk anymore
+    logwriter.close() # We don't need to write anything out to disk anymore