From 4c6dc0a2ea090da99e733ed7f000f04016e31651 Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Fri, 20 Mar 2020 20:16:29 -0400 Subject: [PATCH] Added network documentation --- docs/source/network.rst | 8 ++++- rltorch/network/ESNetwork.py | 51 ++++++++++++++++++++++++++++---- rltorch/network/Network.py | 32 +++++++++++++++++++- rltorch/network/NoisyLinear.py | 27 +++++++++++++++++ rltorch/network/TargetNetwork.py | 28 ++++++++++++++---- 5 files changed, 134 insertions(+), 12 deletions(-) diff --git a/docs/source/network.rst b/docs/source/network.rst index 1e3d69b..d6e4ef1 100644 --- a/docs/source/network.rst +++ b/docs/source/network.rst @@ -1,4 +1,10 @@ Neural Networks =============== -.. automodule:: rltorch.network +.. autoclass:: rltorch.network.Network + :members: +.. autoclass:: rltorch.network.TargetNetwork + :members: +.. autoclass:: rltorch.network.ESNetwork + :members: +.. autoclass:: rltorch.network.NoisyLinear :members: diff --git a/rltorch/network/ESNetwork.py b/rltorch/network/ESNetwork.py index 7d6d8d0..6c83def 100644 --- a/rltorch/network/ESNetwork.py +++ b/rltorch/network/ESNetwork.py @@ -7,9 +7,36 @@ from copy import deepcopy # What if we want to sometimes do gradient descent as well? class ESNetwork(Network): """ - Network that functions from the paper Evolutionary Strategies (https://arxiv.org/abs/1703.03864) - fitness_fun := model, *args -> fitness_value (float) - We wish to find a model that maximizes the fitness function + Uses evolutionary tecniques to optimize a neural network. + + Notes + ----- + Derived from the paper + Evolutionary Strategies + (https://arxiv.org/abs/1703.03864) + + Parameters + ---------- + model : nn.Module + A PyTorch nn.Module. + optimizer + A PyTorch opimtizer from torch.optim. + population_size : int + The number of networks to evaluate each iteration. + fitness_fn : function + Function that evaluates a network and returns a higher + number for better performing networks. + sigma : number + The standard deviation of the guassian noise added to + the parameters when creating the population. + config : dict + A dictionary of configuration items. + device + A device to send the weights to. + logger + Keeps track of historical weights + name + For use in logger to differentiate in analysis. """ def __init__(self, model, optimizer, population_size, fitness_fn, config, sigma = 0.05, device = None, logger = None, name = ""): super(ESNetwork, self).__init__(model, optimizer, config, device, logger, name) @@ -18,9 +45,15 @@ class ESNetwork(Network): self.sigma = sigma assert self.sigma > 0 - # We're not going to be calculating gradients in the traditional way - # So there's no need to waste computation time keeping track def __call__(self, *args): + """ + Notes + ----- + Since gradients aren't going to be computed in the + traditional fashion, there is no need to keep + track of the computations performed on the + tensors. + """ with torch.no_grad(): result = self.model(*args) return result @@ -48,6 +81,14 @@ class ESNetwork(Network): return candidate_solutions def calc_gradients(self, *args): + """ + Calculate gradients by shifting parameters + towards the networks with the highest fitness value. + + This is calculated by evaluating the fitness of multiple + networks according to the fitness function specified in + the class. + """ ## Generate Noise white_noise_dict, noise_dict = self._generate_noise_dicts() diff --git a/rltorch/network/Network.py b/rltorch/network/Network.py index eeafae9..d16d436 100644 --- a/rltorch/network/Network.py +++ b/rltorch/network/Network.py @@ -1,6 +1,21 @@ class Network: """ - Wrapper around model which provides copy of it instead of trained weights + Wrapper around model and optimizer in PyTorch to abstract away common use cases. + + Parameters + ---------- + model : nn.Module + A PyTorch nn.Module. + optimizer + A PyTorch opimtizer from torch.optim. + config : dict + A dictionary of configuration items. + device + A device to send the weights to. + logger + Keeps track of historical weights + name + For use in logger to differentiate in analysis. """ def __init__(self, model, optimizer, config, device = None, logger = None, name = ""): self.model = model @@ -18,14 +33,29 @@ class Network: return self.model(*args) def clamp_gradients(self, x = 1): + """ + Forcing gradients to stay within a certain interval + by setting it to the bound if it goes over it. + + Parameters + ---------- + x : number > 0 + Sets the interval to be [-x, x] + """ assert x > 0 for param in self.model.parameters(): param.grad.data.clamp_(-x, x) def zero_grad(self): + """ + Clears out gradients held in the model. + """ self.model.zero_grad() def step(self): + """ + Run a step of the optimizer on `model`. + """ self.optimizer.step() def log_named_parameters(self): diff --git a/rltorch/network/NoisyLinear.py b/rltorch/network/NoisyLinear.py index 578457b..cd8b905 100644 --- a/rltorch/network/NoisyLinear.py +++ b/rltorch/network/NoisyLinear.py @@ -6,6 +6,24 @@ import math # This class utilizes this property of the normal distribution # N(mu, sigma) = mu + sigma * N(0, 1) class NoisyLinear(nn.Linear): + """ + Draws the parameters of nn.Linear from a normal distribution. + The parameters of the normal distribution are registered as + learnable parameters in the neural network. + + Parameters + ---------- + in_features + Size of each input sample. + out_features + Size of each output sample. + sigma_init + The starting standard deviation of guassian noise. + bias + If set to False, the layer will not + learn an additive bias. + Default: True + """ def __init__(self, in_features, out_features, sigma_init = 0.017, bias = True): super(NoisyLinear, self).__init__(in_features, out_features, bias = bias) # One of the parameters the network is going to tune is the @@ -27,6 +45,15 @@ class NoisyLinear(nn.Linear): nn.init.uniform_(self.bias, -std, std) def forward(self, x): + r""" + Calculates the output :math:`y` through the following: + + :math:`sigma \sim N(mu_1, std_1)` + + :math:`bias \sim N(mu_2, std_2)` + + :math:`y = sigma \cdot x + bias` + """ # Fill s_normal_weight with values from the standard normal distribution self.s_normal_weight.normal_() weight_noise = self.sigma_weight * self.s_normal_weight.clone().requires_grad_() diff --git a/rltorch/network/TargetNetwork.py b/rltorch/network/TargetNetwork.py index dd80365..3339bdd 100644 --- a/rltorch/network/TargetNetwork.py +++ b/rltorch/network/TargetNetwork.py @@ -1,25 +1,43 @@ from copy import deepcopy -# Derived from ptan library + class TargetNetwork: """ - Wrapper around model which provides copy of it instead of trained weights + Creates a clone of a network with syncing capabilities. + + Parameters + ---------- + network + The network to clone. + device + The device to put the cloned parameters in. """ def __init__(self, network, device = None): self.model = network.model self.target_model = deepcopy(network.model) - if network.device is not None: + if device is not None: + self.target_model = self.target_model.to(device) + elif network.device is not None: self.target_model = self.target_model.to(network.device) def __call__(self, *args): return self.model(*args) def sync(self): + """ + Perform a full state sync with the originating model. + """ self.target_model.load_state_dict(self.model.state_dict()) def partial_sync(self, tau): """ - Blend params of target net with params from the model - :param tau: + Partially move closer to the parameters of the originating + model by updating parameters to be a mix of the + originating and the clone models. + + Parameters + ---------- + tau : number + A number between 0-1 which indicates the proportion of the originator and clone in the new clone. """ assert isinstance(tau, float) assert 0.0 < tau <= 1.0