From 4c6dc0a2ea090da99e733ed7f000f04016e31651 Mon Sep 17 00:00:00 2001
From: Brandon Rozek <rozekbrandon@gmail.com>
Date: Fri, 20 Mar 2020 20:16:29 -0400
Subject: [PATCH] Added network documentation

---
 docs/source/network.rst          |  8 ++++-
 rltorch/network/ESNetwork.py     | 51 ++++++++++++++++++++++++++++----
 rltorch/network/Network.py       | 32 +++++++++++++++++++-
 rltorch/network/NoisyLinear.py   | 27 +++++++++++++++++
 rltorch/network/TargetNetwork.py | 28 ++++++++++++++----
 5 files changed, 134 insertions(+), 12 deletions(-)

diff --git a/docs/source/network.rst b/docs/source/network.rst
index 1e3d69b..d6e4ef1 100644
--- a/docs/source/network.rst
+++ b/docs/source/network.rst
@@ -1,4 +1,10 @@
 Neural Networks
 ===============
-.. automodule:: rltorch.network
+.. autoclass:: rltorch.network.Network
+   :members:
+.. autoclass:: rltorch.network.TargetNetwork
+   :members:
+.. autoclass:: rltorch.network.ESNetwork
+   :members:
+.. autoclass:: rltorch.network.NoisyLinear
    :members:
diff --git a/rltorch/network/ESNetwork.py b/rltorch/network/ESNetwork.py
index 7d6d8d0..6c83def 100644
--- a/rltorch/network/ESNetwork.py
+++ b/rltorch/network/ESNetwork.py
@@ -7,9 +7,36 @@ from copy import deepcopy
 # What if we want to sometimes do gradient descent as well?
 class ESNetwork(Network):
     """
-    Network that functions from the paper Evolutionary Strategies (https://arxiv.org/abs/1703.03864)
-    fitness_fun := model, *args -> fitness_value (float)
-    We wish to find a model that maximizes the fitness function
+    Uses evolutionary tecniques to optimize a neural network.
+
+    Notes
+    -----
+    Derived from the paper 
+    Evolutionary Strategies 
+    (https://arxiv.org/abs/1703.03864)
+
+    Parameters
+    ----------
+    model : nn.Module
+      A PyTorch nn.Module.
+    optimizer
+      A PyTorch opimtizer from torch.optim.
+    population_size : int
+      The number of networks to evaluate each iteration.
+    fitness_fn : function
+      Function that evaluates a network and returns a higher
+      number for better performing networks.
+    sigma : number
+      The standard deviation of the guassian noise added to
+      the parameters when creating the population.
+    config : dict
+      A dictionary of configuration items.
+    device
+      A device to send the weights to.
+    logger
+      Keeps track of historical weights
+    name
+      For use in logger to differentiate in analysis.
     """
     def __init__(self, model, optimizer, population_size, fitness_fn, config, sigma = 0.05, device = None, logger = None, name = ""):
         super(ESNetwork, self).__init__(model, optimizer, config, device, logger, name)
@@ -18,9 +45,15 @@ class ESNetwork(Network):
         self.sigma = sigma
         assert self.sigma > 0
 
-    # We're not going to be calculating gradients in the traditional way
-    # So there's no need to waste computation time keeping track
     def __call__(self, *args):
+        """
+        Notes
+        -----
+        Since gradients aren't going to be computed in the 
+        traditional fashion, there is no need to keep
+        track of the computations performed on the
+        tensors.
+        """
         with torch.no_grad():
             result = self.model(*args)
         return result
@@ -48,6 +81,14 @@ class ESNetwork(Network):
         return candidate_solutions
 
     def calc_gradients(self, *args):
+        """
+        Calculate gradients by shifting parameters
+        towards the networks with the highest fitness value.
+
+        This is calculated by evaluating the fitness of multiple
+        networks according to the fitness function specified in
+        the class. 
+        """
         ## Generate Noise
         white_noise_dict, noise_dict = self._generate_noise_dicts()
         
diff --git a/rltorch/network/Network.py b/rltorch/network/Network.py
index eeafae9..d16d436 100644
--- a/rltorch/network/Network.py
+++ b/rltorch/network/Network.py
@@ -1,6 +1,21 @@
 class Network:
     """
-    Wrapper around model which provides copy of it instead of trained weights
+    Wrapper around model and optimizer in PyTorch to abstract away common use cases.
+    
+    Parameters
+    ----------
+    model : nn.Module
+      A PyTorch nn.Module.
+    optimizer
+      A PyTorch opimtizer from torch.optim.
+    config : dict
+      A dictionary of configuration items.
+    device
+      A device to send the weights to.
+    logger
+      Keeps track of historical weights
+    name
+      For use in logger to differentiate in analysis.
     """
     def __init__(self, model, optimizer, config, device = None, logger = None, name = ""):
         self.model = model
@@ -18,14 +33,29 @@ class Network:
         return self.model(*args)
 
     def clamp_gradients(self, x = 1):
+        """
+        Forcing gradients to stay within a certain interval
+        by setting it to the bound if it goes over it.
+
+        Parameters
+        ----------
+        x : number > 0
+          Sets the interval to be [-x, x]
+        """
         assert x > 0
         for param in self.model.parameters():
             param.grad.data.clamp_(-x, x)
     
     def zero_grad(self):
+        """
+        Clears out gradients held in the model.
+        """
         self.model.zero_grad()
 
     def step(self):
+        """
+        Run a step of the optimizer on `model`.
+        """
         self.optimizer.step()
     
     def log_named_parameters(self):
diff --git a/rltorch/network/NoisyLinear.py b/rltorch/network/NoisyLinear.py
index 578457b..cd8b905 100644
--- a/rltorch/network/NoisyLinear.py
+++ b/rltorch/network/NoisyLinear.py
@@ -6,6 +6,24 @@ import math
 # This class utilizes this property of the normal distribution
 # N(mu, sigma) = mu + sigma * N(0, 1)
 class NoisyLinear(nn.Linear):
+  """
+  Draws the parameters of nn.Linear from a normal distribution.
+  The parameters of the normal distribution are registered as 
+  learnable parameters in the neural network.
+
+  Parameters
+  ----------
+  in_features
+    Size of each input sample.
+  out_features
+    Size of each output sample.
+  sigma_init
+    The starting standard deviation of guassian noise.
+  bias
+     If set to False, the layer will not 
+     learn an additive bias.
+     Default: True
+  """
   def __init__(self, in_features, out_features, sigma_init = 0.017, bias = True):
     super(NoisyLinear, self).__init__(in_features, out_features, bias = bias)
     # One of the parameters the network is going to tune is the 
@@ -27,6 +45,15 @@ class NoisyLinear(nn.Linear):
     nn.init.uniform_(self.bias, -std, std)
   
   def forward(self, x):
+    r"""
+    Calculates the output :math:`y` through the following:
+
+    :math:`sigma \sim N(mu_1, std_1)`
+
+    :math:`bias \sim N(mu_2, std_2)`
+
+    :math:`y = sigma \cdot x + bias`
+    """
     # Fill s_normal_weight with values from the standard normal distribution
     self.s_normal_weight.normal_()
     weight_noise = self.sigma_weight * self.s_normal_weight.clone().requires_grad_()
diff --git a/rltorch/network/TargetNetwork.py b/rltorch/network/TargetNetwork.py
index dd80365..3339bdd 100644
--- a/rltorch/network/TargetNetwork.py
+++ b/rltorch/network/TargetNetwork.py
@@ -1,25 +1,43 @@
 from copy import deepcopy
-# Derived from ptan library
+
 class TargetNetwork:
     """
-    Wrapper around model which provides copy of it instead of trained weights
+    Creates a clone of a network with syncing capabilities.
+
+    Parameters
+    ----------
+    network
+      The network to clone.
+    device
+      The device to put the cloned parameters in.
     """
     def __init__(self, network, device = None):
         self.model = network.model
         self.target_model = deepcopy(network.model)
-        if network.device is not None:
+        if device is not None:
+            self.target_model = self.target_model.to(device)
+        elif network.device is not None:
             self.target_model = self.target_model.to(network.device)
 
     def __call__(self, *args):
         return self.model(*args)
 
     def sync(self):
+        """
+        Perform a full state sync with the originating model.
+        """
         self.target_model.load_state_dict(self.model.state_dict())
 
     def partial_sync(self, tau):
         """
-        Blend params of target net with params from the model
-        :param tau:
+        Partially move closer to the parameters of the originating
+        model by updating parameters to be a mix of the
+        originating and the clone models.
+        
+        Parameters
+        ----------
+        tau : number
+          A number between 0-1 which indicates the proportion of the originator and clone in the new clone.
         """
         assert isinstance(tau, float)
         assert 0.0 < tau <= 1.0