Added network documentation
This commit is contained in:
parent
5e7de5bed7
commit
4c6dc0a2ea
5 changed files with 134 additions and 12 deletions
|
@ -1,4 +1,10 @@
|
|||
Neural Networks
|
||||
===============
|
||||
.. automodule:: rltorch.network
|
||||
.. autoclass:: rltorch.network.Network
|
||||
:members:
|
||||
.. autoclass:: rltorch.network.TargetNetwork
|
||||
:members:
|
||||
.. autoclass:: rltorch.network.ESNetwork
|
||||
:members:
|
||||
.. autoclass:: rltorch.network.NoisyLinear
|
||||
:members:
|
||||
|
|
|
@ -7,9 +7,36 @@ from copy import deepcopy
|
|||
# What if we want to sometimes do gradient descent as well?
|
||||
class ESNetwork(Network):
|
||||
"""
|
||||
Network that functions from the paper Evolutionary Strategies (https://arxiv.org/abs/1703.03864)
|
||||
fitness_fun := model, *args -> fitness_value (float)
|
||||
We wish to find a model that maximizes the fitness function
|
||||
Uses evolutionary tecniques to optimize a neural network.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Derived from the paper
|
||||
Evolutionary Strategies
|
||||
(https://arxiv.org/abs/1703.03864)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : nn.Module
|
||||
A PyTorch nn.Module.
|
||||
optimizer
|
||||
A PyTorch opimtizer from torch.optim.
|
||||
population_size : int
|
||||
The number of networks to evaluate each iteration.
|
||||
fitness_fn : function
|
||||
Function that evaluates a network and returns a higher
|
||||
number for better performing networks.
|
||||
sigma : number
|
||||
The standard deviation of the guassian noise added to
|
||||
the parameters when creating the population.
|
||||
config : dict
|
||||
A dictionary of configuration items.
|
||||
device
|
||||
A device to send the weights to.
|
||||
logger
|
||||
Keeps track of historical weights
|
||||
name
|
||||
For use in logger to differentiate in analysis.
|
||||
"""
|
||||
def __init__(self, model, optimizer, population_size, fitness_fn, config, sigma = 0.05, device = None, logger = None, name = ""):
|
||||
super(ESNetwork, self).__init__(model, optimizer, config, device, logger, name)
|
||||
|
@ -18,9 +45,15 @@ class ESNetwork(Network):
|
|||
self.sigma = sigma
|
||||
assert self.sigma > 0
|
||||
|
||||
# We're not going to be calculating gradients in the traditional way
|
||||
# So there's no need to waste computation time keeping track
|
||||
def __call__(self, *args):
|
||||
"""
|
||||
Notes
|
||||
-----
|
||||
Since gradients aren't going to be computed in the
|
||||
traditional fashion, there is no need to keep
|
||||
track of the computations performed on the
|
||||
tensors.
|
||||
"""
|
||||
with torch.no_grad():
|
||||
result = self.model(*args)
|
||||
return result
|
||||
|
@ -48,6 +81,14 @@ class ESNetwork(Network):
|
|||
return candidate_solutions
|
||||
|
||||
def calc_gradients(self, *args):
|
||||
"""
|
||||
Calculate gradients by shifting parameters
|
||||
towards the networks with the highest fitness value.
|
||||
|
||||
This is calculated by evaluating the fitness of multiple
|
||||
networks according to the fitness function specified in
|
||||
the class.
|
||||
"""
|
||||
## Generate Noise
|
||||
white_noise_dict, noise_dict = self._generate_noise_dicts()
|
||||
|
||||
|
|
|
@ -1,6 +1,21 @@
|
|||
class Network:
|
||||
"""
|
||||
Wrapper around model which provides copy of it instead of trained weights
|
||||
Wrapper around model and optimizer in PyTorch to abstract away common use cases.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : nn.Module
|
||||
A PyTorch nn.Module.
|
||||
optimizer
|
||||
A PyTorch opimtizer from torch.optim.
|
||||
config : dict
|
||||
A dictionary of configuration items.
|
||||
device
|
||||
A device to send the weights to.
|
||||
logger
|
||||
Keeps track of historical weights
|
||||
name
|
||||
For use in logger to differentiate in analysis.
|
||||
"""
|
||||
def __init__(self, model, optimizer, config, device = None, logger = None, name = ""):
|
||||
self.model = model
|
||||
|
@ -18,14 +33,29 @@ class Network:
|
|||
return self.model(*args)
|
||||
|
||||
def clamp_gradients(self, x = 1):
|
||||
"""
|
||||
Forcing gradients to stay within a certain interval
|
||||
by setting it to the bound if it goes over it.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : number > 0
|
||||
Sets the interval to be [-x, x]
|
||||
"""
|
||||
assert x > 0
|
||||
for param in self.model.parameters():
|
||||
param.grad.data.clamp_(-x, x)
|
||||
|
||||
def zero_grad(self):
|
||||
"""
|
||||
Clears out gradients held in the model.
|
||||
"""
|
||||
self.model.zero_grad()
|
||||
|
||||
def step(self):
|
||||
"""
|
||||
Run a step of the optimizer on `model`.
|
||||
"""
|
||||
self.optimizer.step()
|
||||
|
||||
def log_named_parameters(self):
|
||||
|
|
|
@ -6,6 +6,24 @@ import math
|
|||
# This class utilizes this property of the normal distribution
|
||||
# N(mu, sigma) = mu + sigma * N(0, 1)
|
||||
class NoisyLinear(nn.Linear):
|
||||
"""
|
||||
Draws the parameters of nn.Linear from a normal distribution.
|
||||
The parameters of the normal distribution are registered as
|
||||
learnable parameters in the neural network.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
in_features
|
||||
Size of each input sample.
|
||||
out_features
|
||||
Size of each output sample.
|
||||
sigma_init
|
||||
The starting standard deviation of guassian noise.
|
||||
bias
|
||||
If set to False, the layer will not
|
||||
learn an additive bias.
|
||||
Default: True
|
||||
"""
|
||||
def __init__(self, in_features, out_features, sigma_init = 0.017, bias = True):
|
||||
super(NoisyLinear, self).__init__(in_features, out_features, bias = bias)
|
||||
# One of the parameters the network is going to tune is the
|
||||
|
@ -27,6 +45,15 @@ class NoisyLinear(nn.Linear):
|
|||
nn.init.uniform_(self.bias, -std, std)
|
||||
|
||||
def forward(self, x):
|
||||
r"""
|
||||
Calculates the output :math:`y` through the following:
|
||||
|
||||
:math:`sigma \sim N(mu_1, std_1)`
|
||||
|
||||
:math:`bias \sim N(mu_2, std_2)`
|
||||
|
||||
:math:`y = sigma \cdot x + bias`
|
||||
"""
|
||||
# Fill s_normal_weight with values from the standard normal distribution
|
||||
self.s_normal_weight.normal_()
|
||||
weight_noise = self.sigma_weight * self.s_normal_weight.clone().requires_grad_()
|
||||
|
|
|
@ -1,25 +1,43 @@
|
|||
from copy import deepcopy
|
||||
# Derived from ptan library
|
||||
|
||||
class TargetNetwork:
|
||||
"""
|
||||
Wrapper around model which provides copy of it instead of trained weights
|
||||
Creates a clone of a network with syncing capabilities.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
network
|
||||
The network to clone.
|
||||
device
|
||||
The device to put the cloned parameters in.
|
||||
"""
|
||||
def __init__(self, network, device = None):
|
||||
self.model = network.model
|
||||
self.target_model = deepcopy(network.model)
|
||||
if network.device is not None:
|
||||
if device is not None:
|
||||
self.target_model = self.target_model.to(device)
|
||||
elif network.device is not None:
|
||||
self.target_model = self.target_model.to(network.device)
|
||||
|
||||
def __call__(self, *args):
|
||||
return self.model(*args)
|
||||
|
||||
def sync(self):
|
||||
"""
|
||||
Perform a full state sync with the originating model.
|
||||
"""
|
||||
self.target_model.load_state_dict(self.model.state_dict())
|
||||
|
||||
def partial_sync(self, tau):
|
||||
"""
|
||||
Blend params of target net with params from the model
|
||||
:param tau:
|
||||
Partially move closer to the parameters of the originating
|
||||
model by updating parameters to be a mix of the
|
||||
originating and the clone models.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tau : number
|
||||
A number between 0-1 which indicates the proportion of the originator and clone in the new clone.
|
||||
"""
|
||||
assert isinstance(tau, float)
|
||||
assert 0.0 < tau <= 1.0
|
||||
|
|
Loading…
Reference in a new issue