Merge branch 'master' of github.com:Brandon-Rozek/rltorch

2020-03-23 19:57:21 -04:00 · 2020-03-23 19:57:21 -04:00 · a667b3734b
commit a667b3734b
parent 838062813a 4c6dc0a2ea
29 changed files with 536 additions and 78 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,9 @@ __pycache__/
 *.py[cod]
 rlenv/
 runs/
+*.tox
+*.coverage
+.vscode/
+docs/build
+.mypy_cache/
+*egg-info*
--- a/docs/Makefile
+++ b/docs/Makefile
@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/make.bat
+++ b/docs/make.bat
@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
--- a/docs/source/action_selector.rst
+++ b/docs/source/action_selector.rst
@ -0,0 +1,4 @@
+Action Selector
+===============
+.. automodule:: rltorch.action_selector
+   :members:
--- a/docs/source/agents.rst
+++ b/docs/source/agents.rst
@ -0,0 +1,4 @@
+Agents
+======
+.. automodule:: rltorch.agents
+   :members:
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -0,0 +1,58 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+
+project = 'RLTorch'
+copyright = '2020, Brandon Rozek'
+author = 'Brandon Rozek'
+
+# The full version, including alpha/beta/rc tags
+release = '0.1.0'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    'sphinx.ext.autosummary',
+    'sphinx.ext.napoleon',
+    "sphinx.ext.viewcode",
+    "sphinx.ext.mathjax",
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'alabaster'
+
+
+html_sidebars = {
+    '**': [
+        'about.html',
+        'navigation.html',
+        'searchbox.html',
+    ]
+}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
--- a/docs/source/env.rst
+++ b/docs/source/env.rst
@ -0,0 +1,5 @@
+Environment Utilities
+=====================
+.. automodule:: rltorch.env
+   :members:
+
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@ -0,0 +1,15 @@
+Welcome to RLTorch's documentation!
+===================================
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   action_selector
+   agents
+   env
+   memory
+   mp
+   network
+   scheduler
+   log
+   seed
--- a/docs/source/log.rst
+++ b/docs/source/log.rst
@ -0,0 +1,4 @@
+Logging
+=======
+.. automodule:: rltorch.log
+   :members:
--- a/docs/source/memory.rst
+++ b/docs/source/memory.rst
@ -0,0 +1,8 @@
+Memory Structures
+=================
+.. autoclass:: rltorch.memory.ReplayMemory
+   :members:
+.. autoclass:: rltorch.memory.PrioritizedReplayMemory
+   :members:
+.. autoclass:: rltorch.memory.EpisodeMemory
+   :members:
--- a/docs/source/mp.rst
+++ b/docs/source/mp.rst
@ -0,0 +1,4 @@
+Multiprocessing
+===============
+.. automodule:: rltorch.mp
+   :members:
--- a/docs/source/network.rst
+++ b/docs/source/network.rst
@ -0,0 +1,10 @@
+Neural Networks
+===============
+.. autoclass:: rltorch.network.Network
+   :members:
+.. autoclass:: rltorch.network.TargetNetwork
+   :members:
+.. autoclass:: rltorch.network.ESNetwork
+   :members:
+.. autoclass:: rltorch.network.NoisyLinear
+   :members:
--- a/docs/source/scheduler.rst
+++ b/docs/source/scheduler.rst
@ -0,0 +1,6 @@
+Hyperparameter Scheduling
+=========================
+.. autoclass:: rltorch.scheduler.LinearScheduler
+   :members:
+.. autoclass:: rltorch.scheduler.ExponentialScheduler
+   :members:
--- a/docs/source/seed.rst
+++ b/docs/source/seed.rst
@ -0,0 +1,4 @@
+Seeding
+=======
+.. automodule:: rltorch.seed
+   :members:
--- a/requirements.txt
+++ b/requirements.txt
@ -1,32 +0,0 @@
-absl-py==0.7.0
-astor==0.7.1
-atari-py==0.1.7
-certifi==2018.11.29
-chardet==3.0.4
-future==0.17.1
-gast==0.2.2
-grpcio==1.18.0
-gym==0.10.11
-h5py==2.9.0
-idna==2.8
-Keras-Applications==1.0.7
-Keras-Preprocessing==1.0.8
-Markdown==3.0.1
-numpy==1.16.0
-opencv-python==4.0.0.21
-Pillow==5.4.1
-pkg-resources==0.0.0
-protobuf==3.6.1
-pyglet==1.3.2
-PyOpenGL==3.1.0
-requests==2.21.0
-scipy==1.2.0
-six==1.12.0
-tensorboard==1.12.2
-tensorboardX==1.6
-tensorflow==1.12.0
-termcolor==1.1.0
-torch==1.0.0
-urllib3==1.24.1
-Werkzeug==0.14.1
-numba==0.42.1
--- a/rltorch/log.py
+++ b/rltorch/log.py
@ -3,6 +3,13 @@ import numpy as np
 import torch

 class Logger:
+    """
+    Keeps track of lists of items seperated by tags.
+
+    Notes
+    -----
+    Logger is a dictionary of lists.
+    """
    def __init__(self):
        self.log = {}
    def append(self, tag, value):
@ -26,26 +33,22 @@ class Logger:
    def __reversed__(self):
        return reversed(self.log)

-# Workaround since we can't use SummaryWriter in a different process
-# class LogWriter:
-#     def __init__(self, logger, writer):
-#         self.logger = logger
-#         self.writer = writer
-#         self.steps = Counter()
-#     def write(self):
-#         for key in self.logger.keys():
-#             for value in self.logger[key]:
-#                 self.steps[key] += 1
-#                 if isinstance(value, int) or isinstance(value, float):
-#                     self.writer.add_scalar(key, value, self.steps[key])
-#                 if isinstance(value, np.ndarray) or isinstance(value, torch.Tensor):
-#                     self.writer.add_histogram(key, value, self.steps[key])
-#         self.logger.log = {}
-#     def close(self):
-#         self.writer.close()
-    
-
 class LogWriter:
+    """
+    Takes a logger and writes it to a writter. 
+    While keeping track of the number of times it 
+    a certain tag.
+
+    Notes
+    -----
+    Used to keep track of scalars and histograms in
+    Tensorboard.
+
+    Parameters
+    ----------
+    writer
+      The tensorboard writer.
+    """
    def __init__(self, writer):
        self.writer = writer
        self.steps = Counter()
--- a/rltorch/memory/EpisodeMemory.py
+++ b/rltorch/memory/EpisodeMemory.py
@ -5,22 +5,43 @@ Transition = namedtuple('Transition',
    ('state', 'action', 'reward', 'next_state', 'done'))

 class EpisodeMemory(object):
+    """
+    Memory structure that stores an entire episode and
+    the observation's associated log-based probabilities.
+    """
    def __init__(self):
        self.memory = []
        self.log_probs = []

    def append(self, *args):
-        """Saves a transition."""
+        """
+        Adds a transition to the memory.
+
+        Parameters
+        ----------
+          *args
+             The state, action, reward, next_state, done tuple
+        """
        self.memory.append(Transition(*args))
    
    def append_log_probs(self, logprob):
+        """
+        Adds a log-based probability to the observation.
+        """
        self.log_probs.append(logprob)

    def clear(self):
+        """
+        Clears the transitions and log-based probabilities.
+        """
        self.memory.clear()
        self.log_probs.clear()

    def recall(self):
+        """
+        Return a list of the transitions with their 
+        associated log-based probabilities.
+        """
        if len(self.memory) != len(self.log_probs):
            raise ValueError("Memory and recorded log probabilities must be the same length.")
        return list(zip(*tuple(zip(*self.memory)), self.log_probs))
--- a/rltorch/memory/PrioritizedReplayMemory.py
+++ b/rltorch/memory/PrioritizedReplayMemory.py
@ -147,7 +147,9 @@ class MinSegmentTree(SegmentTree):

 class PrioritizedReplayMemory(ReplayMemory):
    def __init__(self, capacity, alpha):
-        """Create Prioritized Replay buffer.
+        """
+        Create Prioritized Replay buffer.
+
        Parameters
        ----------
        capacity: int
@ -156,9 +158,6 @@ class PrioritizedReplayMemory(ReplayMemory):
        alpha: float
            how much prioritization is used
            (0 - no prioritization, 1 - full prioritization)
-        See Also
-        --------
-        ReplayBuffer.__init__
        """
        super(PrioritizedReplayMemory, self).__init__(capacity)
        assert alpha >= 0
@ -173,7 +172,14 @@ class PrioritizedReplayMemory(ReplayMemory):
        self._max_priority = 1.0

    def append(self, *args, **kwargs):
-        """See ReplayBuffer.store_effect"""
+        """
+        Adds a transition to the buffer and add an initial prioritization.
+
+        Parameters
+        ----------
+          *args
+             The state, action, reward, next_state, done tuple
+        """
        idx = self.position
        super().append(*args, **kwargs)
        self._it_sum[idx] = self._max_priority ** self._alpha
@ -191,10 +197,11 @@ class PrioritizedReplayMemory(ReplayMemory):
        return res

    def sample(self, batch_size, beta):
-        """Sample a batch of experiences.
-        compared to ReplayBuffer.sample
-        it also returns importance weights and idxes
+        """
+        Sample a batch of experiences.
+        while returning importance weights and idxes
        of sampled experiences.
+
        Parameters
        ----------
        batch_size: int
@ -202,6 +209,7 @@ class PrioritizedReplayMemory(ReplayMemory):
        beta: float
            To what degree to use importance weights
            (0 - no corrections, 1 - full correction)
+        
        Returns
        -------
        weights: np.array
@ -232,6 +240,32 @@ class PrioritizedReplayMemory(ReplayMemory):
        return batch

    def sample_n_steps(self, batch_size, steps, beta):
+        r"""
+        Sample a batch of sequential experiences.
+        while returning importance weights and idxes
+        of sampled experiences.
+
+        Parameters
+        ----------
+        batch_size: int
+            How many transitions to sample.
+        beta: float
+            To what degree to use importance weights
+            (0 - no corrections, 1 - full correction)
+        
+        Notes
+        -----
+        The number of batches sampled is :math:`\lfloor\frac{batch\_size}{steps}\rfloor`.
+
+        Returns
+        -------
+        weights: np.array
+            Array of shape (batch_size,) and dtype np.float32
+            denoting importance weight of each sampled transition
+        idxes: np.array
+            Array of shape (batch_size,) and dtype np.int32
+            idexes in buffer of sampled experiences
+        """
        assert beta > 0

        sample_size = batch_size // steps
@ -262,9 +296,11 @@ class PrioritizedReplayMemory(ReplayMemory):
    
    @jit(forceobj = True)
    def update_priorities(self, idxes, priorities):
-        """Update priorities of sampled transitions.
+        """
+        Update priorities of sampled transitions.
        sets priority of transition at index idxes[i] in buffer
        to priorities[i].
+        
        Parameters
        ----------
        idxes: [int]
--- a/rltorch/memory/ReplayMemory.py
+++ b/rltorch/memory/ReplayMemory.py
@ -4,21 +4,38 @@ import torch
 Transition = namedtuple('Transition',
    ('state', 'action', 'reward', 'next_state', 'done'))

-# Implements a Ring Buffer
 class ReplayMemory(object):
+    """
+    Creates a ring buffer of a fixed size.
+
+    Parameters
+    ----------
+    capacity : int
+      The maximum size of the buffer
+    """
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def append(self, *args):
-        """Saves a transition."""
+        """
+        Adds a transition to the buffer.
+
+        Parameters
+        ----------
+        *args
+          The state, action, reward, next_state, done tuple
+        """
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def clear(self):
+        """
+        Clears the buffer.
+        """
        self.memory.clear()
        self.position = 0

@ -37,10 +54,35 @@ class ReplayMemory(object):


    def sample(self, batch_size):
+        """
+        Returns a random sample from the buffer.
+
+        Parameters
+        ----------
+        batch_size : int
+          The number of observations to sample.
+        """
        return random.sample(self.memory, batch_size)
    
    def sample_n_steps(self, batch_size, steps):
-        idxes = random.sample(range(len(self.memory) - steps), batch_size // steps)
+        r"""
+        Returns a random sample of sequential batches of size steps.
+
+        Notes
+        -----
+        The number of batches sampled is :math:`\lfloor\frac{batch\_size}{steps}\rfloor`.
+
+        Parameters
+        ----------
+        batch_size : int
+          The total number of observations to sample.
+        steps : int
+          The number of observations after the one selected to sample.
+        """
+        idxes = random.sample(
+            range(len(self.memory) - steps), 
+            batch_size // steps
+        )
        step_idxes = []
        for i in idxes:
            step_idxes += range(i, i + steps)
@ -56,10 +98,10 @@ class ReplayMemory(object):
        return value in self.memory

    def __getitem__(self, index):
-        return self.memory[index]
+        return self.memory[index % self.capacity]

    def __setitem__(self, index, value):
-        self.memory[index] = value
+        self.memory[index % self.capacity] = value

    def __reversed__(self):
        return reversed(self.memory)
--- a/rltorch/network/ESNetwork.py
+++ b/rltorch/network/ESNetwork.py
@ -7,9 +7,36 @@ from copy import deepcopy
 # What if we want to sometimes do gradient descent as well?
 class ESNetwork(Network):
    """
-    Network that functions from the paper Evolutionary Strategies (https://arxiv.org/abs/1703.03864)
-    fitness_fun := model, *args -> fitness_value (float)
-    We wish to find a model that maximizes the fitness function
+    Uses evolutionary tecniques to optimize a neural network.
+
+    Notes
+    -----
+    Derived from the paper 
+    Evolutionary Strategies 
+    (https://arxiv.org/abs/1703.03864)
+
+    Parameters
+    ----------
+    model : nn.Module
+      A PyTorch nn.Module.
+    optimizer
+      A PyTorch opimtizer from torch.optim.
+    population_size : int
+      The number of networks to evaluate each iteration.
+    fitness_fn : function
+      Function that evaluates a network and returns a higher
+      number for better performing networks.
+    sigma : number
+      The standard deviation of the guassian noise added to
+      the parameters when creating the population.
+    config : dict
+      A dictionary of configuration items.
+    device
+      A device to send the weights to.
+    logger
+      Keeps track of historical weights
+    name
+      For use in logger to differentiate in analysis.
    """
    def __init__(self, model, optimizer, population_size, fitness_fn, config, sigma = 0.05, device = None, logger = None, name = ""):
        super(ESNetwork, self).__init__(model, optimizer, config, device, logger, name)
@ -18,9 +45,15 @@ class ESNetwork(Network):
        self.sigma = sigma
        assert self.sigma > 0

-    # We're not going to be calculating gradients in the traditional way
-    # So there's no need to waste computation time keeping track
    def __call__(self, *args):
+        """
+        Notes
+        -----
+        Since gradients aren't going to be computed in the 
+        traditional fashion, there is no need to keep
+        track of the computations performed on the
+        tensors.
+        """
        with torch.no_grad():
            result = self.model(*args)
        return result
@ -48,6 +81,14 @@ class ESNetwork(Network):
        return candidate_solutions

    def calc_gradients(self, *args):
+        """
+        Calculate gradients by shifting parameters
+        towards the networks with the highest fitness value.
+
+        This is calculated by evaluating the fitness of multiple
+        networks according to the fitness function specified in
+        the class. 
+        """
        ## Generate Noise
        white_noise_dict, noise_dict = self._generate_noise_dicts()
        
--- a/rltorch/network/Network.py
+++ b/rltorch/network/Network.py
@ -1,6 +1,21 @@
 class Network:
    """
-    Wrapper around model which provides copy of it instead of trained weights
+    Wrapper around model and optimizer in PyTorch to abstract away common use cases.
+    
+    Parameters
+    ----------
+    model : nn.Module
+      A PyTorch nn.Module.
+    optimizer
+      A PyTorch opimtizer from torch.optim.
+    config : dict
+      A dictionary of configuration items.
+    device
+      A device to send the weights to.
+    logger
+      Keeps track of historical weights
+    name
+      For use in logger to differentiate in analysis.
    """
    def __init__(self, model, optimizer, config, device = None, logger = None, name = ""):
        self.model = model
@ -18,14 +33,29 @@ class Network:
        return self.model(*args)

    def clamp_gradients(self, x = 1):
+        """
+        Forcing gradients to stay within a certain interval
+        by setting it to the bound if it goes over it.
+
+        Parameters
+        ----------
+        x : number > 0
+          Sets the interval to be [-x, x]
+        """
        assert x > 0
        for param in self.model.parameters():
            param.grad.data.clamp_(-x, x)
    
    def zero_grad(self):
+        """
+        Clears out gradients held in the model.
+        """
        self.model.zero_grad()

    def step(self):
+        """
+        Run a step of the optimizer on `model`.
+        """
        self.optimizer.step()
    
    def log_named_parameters(self):
--- a/rltorch/network/NoisyLinear.py
+++ b/rltorch/network/NoisyLinear.py
@ -6,6 +6,24 @@ import math
 # This class utilizes this property of the normal distribution
 # N(mu, sigma) = mu + sigma * N(0, 1)
 class NoisyLinear(nn.Linear):
+  """
+  Draws the parameters of nn.Linear from a normal distribution.
+  The parameters of the normal distribution are registered as 
+  learnable parameters in the neural network.
+
+  Parameters
+  ----------
+  in_features
+    Size of each input sample.
+  out_features
+    Size of each output sample.
+  sigma_init
+    The starting standard deviation of guassian noise.
+  bias
+     If set to False, the layer will not 
+     learn an additive bias.
+     Default: True
+  """
  def __init__(self, in_features, out_features, sigma_init = 0.017, bias = True):
    super(NoisyLinear, self).__init__(in_features, out_features, bias = bias)
    # One of the parameters the network is going to tune is the 
@ -27,6 +45,15 @@ class NoisyLinear(nn.Linear):
    nn.init.uniform_(self.bias, -std, std)
  
  def forward(self, x):
+    r"""
+    Calculates the output :math:`y` through the following:
+
+    :math:`sigma \sim N(mu_1, std_1)`
+
+    :math:`bias \sim N(mu_2, std_2)`
+
+    :math:`y = sigma \cdot x + bias`
+    """
    # Fill s_normal_weight with values from the standard normal distribution
    self.s_normal_weight.normal_()
    weight_noise = self.sigma_weight * self.s_normal_weight.clone().requires_grad_()
--- a/rltorch/network/TargetNetwork.py
+++ b/rltorch/network/TargetNetwork.py
@ -1,25 +1,43 @@
 from copy import deepcopy
-# Derived from ptan library
+
 class TargetNetwork:
    """
-    Wrapper around model which provides copy of it instead of trained weights
+    Creates a clone of a network with syncing capabilities.
+
+    Parameters
+    ----------
+    network
+      The network to clone.
+    device
+      The device to put the cloned parameters in.
    """
    def __init__(self, network, device = None):
        self.model = network.model
        self.target_model = deepcopy(network.model)
-        if network.device is not None:
+        if device is not None:
+            self.target_model = self.target_model.to(device)
+        elif network.device is not None:
            self.target_model = self.target_model.to(network.device)

    def __call__(self, *args):
        return self.model(*args)

    def sync(self):
+        """
+        Perform a full state sync with the originating model.
+        """
        self.target_model.load_state_dict(self.model.state_dict())

    def partial_sync(self, tau):
        """
-        Blend params of target net with params from the model
-        :param tau:
+        Partially move closer to the parameters of the originating
+        model by updating parameters to be a mix of the
+        originating and the clone models.
+        
+        Parameters
+        ----------
+        tau : number
+          A number between 0-1 which indicates the proportion of the originator and clone in the new clone.
        """
        assert isinstance(tau, float)
        assert 0.0 < tau <= 1.0
--- a/rltorch/scheduler/ExponentialScheduler.py
+++ b/rltorch/scheduler/ExponentialScheduler.py
@ -1,5 +1,32 @@
 from .Scheduler import Scheduler
 class ExponentialScheduler(Scheduler):
+    r"""
+    A exponential scheduler that given a certain number
+    of iterations, spaces the values between
+    a start and an end point in an exponential order.
+
+    Notes
+    -----
+    The forumula used to produce the value :math:`y` is based on the number of
+    times you call `next`. (denoted as :math:`i`)
+
+    :math:`y(1) = initial\_value`
+
+    :math:`base = \sqrt[iterations]{\frac{end\_value}{initial\_value}}`
+
+    :math:`y(i) = y(1) \cdot base^{i - 1}`
+
+    Another property is that :math:`y(iterations) = end\_value`.
+
+    Parameters
+    ----------
+    initial_value : number
+      The first value returned in the schedule.
+    end_value: number
+      The value returned when the maximum number of iterations are reached
+    iterations: int
+      The total number of iterations
+    """
    def __init__(self, initial_value, end_value, iterations):
        super(ExponentialScheduler, self).__init__(initial_value, end_value, iterations)
        self.base = (end_value / initial_value) ** (1.0 / iterations)
--- a/rltorch/scheduler/LinearScheduler.py
+++ b/rltorch/scheduler/LinearScheduler.py
@ -1,5 +1,29 @@
 from .Scheduler import Scheduler
 class LinearScheduler(Scheduler):
+    r"""
+    A linear scheduler that given a certain number
+    of iterations, equally spaces the values between
+    a start and an end point.
+
+    Notes
+    -----
+    The forumula used to produce the value :math:`y` is based on the number of
+    times you call `next`. (denoted as :math:`i`)
+
+    :math:`y(1) = initial\_value`
+
+    :math:`y(i) = slope(i - 1) + y(1)`
+    where :math:`slope = \frac{end\_value - initial\_value}{iterations}`.
+
+    Parameters
+    ----------
+    initial_value : number
+      The first value returned in the schedule.
+    end_value: number
+      The value returned when the maximum number of iterations are reached
+    iterations: int
+      The total number of iterations
+    """
    def __init__(self, initial_value, end_value, iterations):
        super(LinearScheduler, self).__init__(initial_value, end_value, iterations)
        self.slope = (end_value - initial_value) / iterations
--- a/rltorch/seed.py
+++ b/rltorch/seed.py
@ -4,6 +4,14 @@ import random
 import torch

 def set_seed(SEED):
+    """
+    Set the seed for repeatability purposes.
+
+    Parameters
+    ----------
+    SEED : int
+      The seed to set numpy, random, and torch to.
+    """
    # Set `PYTHONHASHSEED` environment variable at a fixed value
    environ['PYTHONHASHSEED'] = str(SEED)

--- a/setup.py
+++ b/setup.py
@ -12,4 +12,11 @@ setuptools.setup(
    description="Reinforcement Learning Framework for PyTorch",
    version="0.1",
    packages=setuptools.find_packages(),
-)
+    install_requires=[
+        "numpy~=1.16.0",
+        "opencv-python~=4.2.0.32",
+        "gym~=0.10.11",
+        "torch~=1.4.0",
+        "numba~=0.48.0"
+    ]
+)
--- a/tests/test.py
+++ b/tests/test.py
@ -0,0 +1,6 @@
+import rltorch
+import unittest
+
+class Test(unittest.TestCase):
+    def test(self):
+        pass
--- a/tox.ini
+++ b/tox.ini
@ -0,0 +1,17 @@
+[tox]
+envlist = 
+    py36
+    py37
+    py38
+
+[testenv]
+deps = coverage
+commands = 
+    coverage run --source=tests,rltorch -m unittest discover tests
+
+
+[testenv:py38]
+commands =
+    coverage run --source=tests,rltorch -m unittest discover tests
+    coverage report -m
+