From 324fa5c66738f28752841ee5e6783d3dad11e5b5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 21 Oct 2019 18:14:40 +0000
Subject: [PATCH 01/12] Bump werkzeug from 0.14.1 to 0.15.3

Bumps [werkzeug](https://github.com/pallets/werkzeug) from 0.14.1 to 0.15.3.
- [Release notes](https://github.com/pallets/werkzeug/releases)
- [Changelog](https://github.com/pallets/werkzeug/blob/master/CHANGES.rst)
- [Commits](https://github.com/pallets/werkzeug/compare/0.14.1...0.15.3)

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9f97f26..7084bc2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,5 +28,5 @@ tensorflow==1.12.0
 termcolor==1.1.0
 torch==1.0.0
 urllib3==1.24.1
-Werkzeug==0.14.1
+Werkzeug==0.15.3
 numba==0.42.1

From 6b2b6da9e6ae5d4f8bcef19f4d9f52e5fcb4cd42 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 22 Oct 2019 22:14:02 +0000
Subject: [PATCH 02/12] Bump pillow from 5.4.1 to 6.2.0

Bumps [pillow](https://github.com/python-pillow/Pillow) from 5.4.1 to 6.2.0.
- [Release notes](https://github.com/python-pillow/Pillow/releases)
- [Changelog](https://github.com/python-pillow/Pillow/blob/master/CHANGES.rst)
- [Commits](https://github.com/python-pillow/Pillow/compare/5.4.1...6.2.0)

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9f97f26..9143967 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,7 +14,7 @@ Keras-Preprocessing==1.0.8
 Markdown==3.0.1
 numpy==1.16.0
 opencv-python==4.0.0.21
-Pillow==5.4.1
+Pillow==6.2.0
 pkg-resources==0.0.0
 protobuf==3.6.1
 pyglet==1.3.2

From 35f57a3f220d566c2df0de257778b4c6f437382f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 16 Dec 2019 21:27:03 +0000
Subject: [PATCH 03/12] Bump tensorflow from 1.12.0 to 1.15.0

Bumps [tensorflow](https://github.com/tensorflow/tensorflow) from 1.12.0 to 1.15.0.
- [Release notes](https://github.com/tensorflow/tensorflow/releases)
- [Changelog](https://github.com/tensorflow/tensorflow/blob/master/RELEASE.md)
- [Commits](https://github.com/tensorflow/tensorflow/compare/v1.12.0...v1.15.0)

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9f97f26..ace5e33 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,7 +24,7 @@ scipy==1.2.0
 six==1.12.0
 tensorboard==1.12.2
 tensorboardX==1.6
-tensorflow==1.12.0
+tensorflow==1.15.0
 termcolor==1.1.0
 torch==1.0.0
 urllib3==1.24.1

From 8323b1b073771572351d9db5c3c7bd30dc31e308 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 3 Jan 2020 04:48:59 +0000
Subject: [PATCH 04/12] Bump urllib3 from 1.24.1 to 1.24.2

Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.24.1 to 1.24.2.
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/master/CHANGES.rst)
- [Commits](https://github.com/urllib3/urllib3/compare/1.24.1...1.24.2)

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index ecfa52c..8b10420 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,6 +27,6 @@ tensorboardX==1.6
 tensorflow==1.15.0
 termcolor==1.1.0
 torch==1.0.0
-urllib3==1.24.1
+urllib3==1.24.2
 Werkzeug==0.15.3
 numba==0.42.1

From c6ca2a4cfea83fc536716cfe285ce37f0f7fb1f9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 28 Jan 2020 22:34:13 +0000
Subject: [PATCH 05/12] Bump tensorflow from 1.15.0 to 1.15.2

Bumps [tensorflow](https://github.com/tensorflow/tensorflow) from 1.15.0 to 1.15.2.
- [Release notes](https://github.com/tensorflow/tensorflow/releases)
- [Changelog](https://github.com/tensorflow/tensorflow/blob/master/RELEASE.md)
- [Commits](https://github.com/tensorflow/tensorflow/compare/v1.15.0...v1.15.2)

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 8b10420..91331da 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,7 +24,7 @@ scipy==1.2.0
 six==1.12.0
 tensorboard==1.12.2
 tensorboardX==1.6
-tensorflow==1.15.0
+tensorflow==1.15.2
 termcolor==1.1.0
 torch==1.0.0
 urllib3==1.24.2

From ea62ccf389687f601c555d28135d9ff7d9f9a414 Mon Sep 17 00:00:00 2001
From: Brandon Rozek <rozekbrandon@gmail.com>
Date: Sun, 15 Mar 2020 14:27:56 -0400
Subject: [PATCH 06/12] Added templates for unit testing and sphinx
 documentation

---
 .gitignore                      |  6 ++++
 docs/Makefile                   | 20 ++++++++++++
 docs/make.bat                   | 35 ++++++++++++++++++++
 docs/source/action_selector.rst |  4 +++
 docs/source/agents.rst          |  4 +++
 docs/source/conf.py             | 58 +++++++++++++++++++++++++++++++++
 docs/source/env.rst             |  5 +++
 docs/source/index.rst           | 15 +++++++++
 docs/source/log.rst             |  4 +++
 docs/source/memory.rst          |  4 +++
 docs/source/mp.rst              |  4 +++
 docs/source/network.rst         |  4 +++
 docs/source/scheduler.rst       |  4 +++
 docs/source/seed.rst            |  4 +++
 requirements.txt                | 32 ------------------
 setup.py                        |  9 ++++-
 tests/test.py                   |  6 ++++
 tox.ini                         | 17 ++++++++++
 18 files changed, 202 insertions(+), 33 deletions(-)
 create mode 100644 docs/Makefile
 create mode 100644 docs/make.bat
 create mode 100644 docs/source/action_selector.rst
 create mode 100644 docs/source/agents.rst
 create mode 100644 docs/source/conf.py
 create mode 100644 docs/source/env.rst
 create mode 100644 docs/source/index.rst
 create mode 100644 docs/source/log.rst
 create mode 100644 docs/source/memory.rst
 create mode 100644 docs/source/mp.rst
 create mode 100644 docs/source/network.rst
 create mode 100644 docs/source/scheduler.rst
 create mode 100644 docs/source/seed.rst
 delete mode 100644 requirements.txt
 create mode 100644 tests/test.py
 create mode 100644 tox.ini

diff --git a/.gitignore b/.gitignore
index 1f091d4..126808d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,9 @@ __pycache__/
 *.py[cod]
 rlenv/
 runs/
+*.tox
+*.coverage
+.vscode/
+docs/build
+.mypy_cache/
+*egg-info*
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d0c3cbf
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..6247f7e
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/source/action_selector.rst b/docs/source/action_selector.rst
new file mode 100644
index 0000000..17cfdfb
--- /dev/null
+++ b/docs/source/action_selector.rst
@@ -0,0 +1,4 @@
+Action Selector
+===============
+.. automodule:: rltorch.action_selector
+   :members:
diff --git a/docs/source/agents.rst b/docs/source/agents.rst
new file mode 100644
index 0000000..76ef275
--- /dev/null
+++ b/docs/source/agents.rst
@@ -0,0 +1,4 @@
+Agents
+======
+.. automodule:: rltorch.agents
+   :members:
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..5eef823
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,58 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+
+project = 'RLTorch'
+copyright = '2020, Brandon Rozek'
+author = 'Brandon Rozek'
+
+# The full version, including alpha/beta/rc tags
+release = '0.1.0'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    'sphinx.ext.autosummary',
+    'sphinx.ext.napoleon',
+    "sphinx.ext.viewcode",
+    "sphinx.ext.mathjax",
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'alabaster'
+
+
+html_sidebars = {
+    '**': [
+        'about.html',
+        'navigation.html',
+        'searchbox.html',
+    ]
+}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
diff --git a/docs/source/env.rst b/docs/source/env.rst
new file mode 100644
index 0000000..80b6c53
--- /dev/null
+++ b/docs/source/env.rst
@@ -0,0 +1,5 @@
+Environment Utilities
+=====================
+.. automodule:: rltorch.env
+   :members:
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..1f90744
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,15 @@
+Welcome to RLTorch's documentation!
+===================================
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   action_selector
+   agents
+   env
+   memory
+   mp
+   network
+   scheduler
+   log
+   seed
diff --git a/docs/source/log.rst b/docs/source/log.rst
new file mode 100644
index 0000000..658ecfc
--- /dev/null
+++ b/docs/source/log.rst
@@ -0,0 +1,4 @@
+Logging
+=======
+.. automodule:: rltorch.log
+   :members:
diff --git a/docs/source/memory.rst b/docs/source/memory.rst
new file mode 100644
index 0000000..3446f3d
--- /dev/null
+++ b/docs/source/memory.rst
@@ -0,0 +1,4 @@
+Memory Structures
+=================
+.. automodule:: rltorch.memory
+   :members:
diff --git a/docs/source/mp.rst b/docs/source/mp.rst
new file mode 100644
index 0000000..7fad488
--- /dev/null
+++ b/docs/source/mp.rst
@@ -0,0 +1,4 @@
+Multiprocessing
+===============
+.. automodule:: rltorch.mp
+   :members:
diff --git a/docs/source/network.rst b/docs/source/network.rst
new file mode 100644
index 0000000..1e3d69b
--- /dev/null
+++ b/docs/source/network.rst
@@ -0,0 +1,4 @@
+Neural Networks
+===============
+.. automodule:: rltorch.network
+   :members:
diff --git a/docs/source/scheduler.rst b/docs/source/scheduler.rst
new file mode 100644
index 0000000..61fd10c
--- /dev/null
+++ b/docs/source/scheduler.rst
@@ -0,0 +1,4 @@
+Hyperparameter Scheduling
+=========================
+.. automodule:: rltorch.scheduler
+   :members:
diff --git a/docs/source/seed.rst b/docs/source/seed.rst
new file mode 100644
index 0000000..933023b
--- /dev/null
+++ b/docs/source/seed.rst
@@ -0,0 +1,4 @@
+Seeding
+=======
+.. automodule:: rltorch.seed
+   :members:
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 91331da..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-absl-py==0.7.0
-astor==0.7.1
-atari-py==0.1.7
-certifi==2018.11.29
-chardet==3.0.4
-future==0.17.1
-gast==0.2.2
-grpcio==1.18.0
-gym==0.10.11
-h5py==2.9.0
-idna==2.8
-Keras-Applications==1.0.7
-Keras-Preprocessing==1.0.8
-Markdown==3.0.1
-numpy==1.16.0
-opencv-python==4.0.0.21
-Pillow==6.2.0
-pkg-resources==0.0.0
-protobuf==3.6.1
-pyglet==1.3.2
-PyOpenGL==3.1.0
-requests==2.21.0
-scipy==1.2.0
-six==1.12.0
-tensorboard==1.12.2
-tensorboardX==1.6
-tensorflow==1.15.2
-termcolor==1.1.0
-torch==1.0.0
-urllib3==1.24.2
-Werkzeug==0.15.3
-numba==0.42.1
diff --git a/setup.py b/setup.py
index c0763be..66de91d 100644
--- a/setup.py
+++ b/setup.py
@@ -12,4 +12,11 @@ setuptools.setup(
     description="Reinforcement Learning Framework for PyTorch",
     version="0.1",
     packages=setuptools.find_packages(),
-)
\ No newline at end of file
+    install_requires=[
+        "numpy~=1.16.0",
+        "opencv-python~=4.2.0.32",
+        "gym~=0.10.11",
+        "torch~=1.4.0",
+        "numba~=0.48.0"
+    ]
+)
diff --git a/tests/test.py b/tests/test.py
new file mode 100644
index 0000000..22eea34
--- /dev/null
+++ b/tests/test.py
@@ -0,0 +1,6 @@
+import rltorch
+import unittest
+
+class Test(unittest.TestCase):
+    def test(self):
+        pass
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..bf11c08
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,17 @@
+[tox]
+envlist = 
+    py36
+    py37
+    py38
+
+[testenv]
+deps = coverage
+commands = 
+    coverage run --source=tests,rltorch -m unittest discover tests
+
+
+[testenv:py38]
+commands =
+    coverage run --source=tests,rltorch -m unittest discover tests
+    coverage report -m
+

From 720bb1b051c9dcd5054606d076d2dca407ddffa2 Mon Sep 17 00:00:00 2001
From: Brandon Rozek <rozekbrandon@gmail.com>
Date: Fri, 20 Mar 2020 17:59:56 -0400
Subject: [PATCH 07/12] Documented scheduler module

---
 docs/source/scheduler.rst                 |  4 +++-
 rltorch/scheduler/ExponentialScheduler.py | 25 +++++++++++++++++++++++
 rltorch/scheduler/LinearScheduler.py      | 22 ++++++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/docs/source/scheduler.rst b/docs/source/scheduler.rst
index 61fd10c..5f698d3 100644
--- a/docs/source/scheduler.rst
+++ b/docs/source/scheduler.rst
@@ -1,4 +1,6 @@
 Hyperparameter Scheduling
 =========================
-.. automodule:: rltorch.scheduler
+.. autoclass:: rltorch.scheduler.LinearScheduler
+   :members:
+.. autoclass:: rltorch.scheduler.ExponentialScheduler
    :members:
diff --git a/rltorch/scheduler/ExponentialScheduler.py b/rltorch/scheduler/ExponentialScheduler.py
index ca8d162..f5d3c65 100644
--- a/rltorch/scheduler/ExponentialScheduler.py
+++ b/rltorch/scheduler/ExponentialScheduler.py
@@ -1,5 +1,30 @@
 from .Scheduler import Scheduler
 class ExponentialScheduler(Scheduler):
+    r"""
+    A exponential scheduler that given a certain number
+    of iterations, spaces the values between
+    a start and an end point in an exponential order.
+
+    Notes
+    -----
+    The forumula used to produce the value :math:`y` is based on the number of
+    times you call `next`. (denoted as :math:`i`)
+
+    :math:`y(1) = initial\_value`
+    :math:`y(i) = y(1) \cdot base^{i - 1}`
+    :math:`base = \sqrt[iterations]{\frac{end\_value}{initial\_value}}`.
+
+    Another property is that :math:`y(iterations) = end\_value`.
+
+    Parameters
+    ----------
+    initial_value : number
+      The first value returned in the schedule.
+    end_value: number
+      The value returned when the maximum number of iterations are reached
+    iterations: int
+      The total number of iterations
+    """
     def __init__(self, initial_value, end_value, iterations):
         super(ExponentialScheduler, self).__init__(initial_value, end_value, iterations)
         self.base = (end_value / initial_value) ** (1.0 / iterations)
diff --git a/rltorch/scheduler/LinearScheduler.py b/rltorch/scheduler/LinearScheduler.py
index 984f3eb..3637538 100644
--- a/rltorch/scheduler/LinearScheduler.py
+++ b/rltorch/scheduler/LinearScheduler.py
@@ -1,5 +1,27 @@
 from .Scheduler import Scheduler
 class LinearScheduler(Scheduler):
+    r"""
+    A linear scheduler that given a certain number
+    of iterations, equally spaces the values between
+    a start and an end point.
+
+    Notes
+    -----
+    The forumula used to produce the value :math:`y` is based on the number of
+    times you call `next`. (denoted as :math:`i`)
+
+    :math:`y(i) = slope \cdot (i - 1) + initial\_value`
+    where :math:`slope = \frac{end\_value - initial\_value)}{iterations}`.
+
+    Parameters
+    ----------
+    initial_value : number
+      The first value returned in the schedule.
+    end_value: number
+      The value returned when the maximum number of iterations are reached
+    iterations: int
+      The total number of iterations
+    """
     def __init__(self, initial_value, end_value, iterations):
         super(LinearScheduler, self).__init__(initial_value, end_value, iterations)
         self.slope = (end_value - initial_value) / iterations

From 711c2e8dd1f88bc03fde60d86715c9b1c65d3b9f Mon Sep 17 00:00:00 2001
From: Brandon Rozek <rozekbrandon@gmail.com>
Date: Fri, 20 Mar 2020 19:30:56 -0400
Subject: [PATCH 08/12] Cleaned up scheduler docs

---
 rltorch/scheduler/ExponentialScheduler.py | 4 +++-
 rltorch/scheduler/LinearScheduler.py      | 6 ++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/rltorch/scheduler/ExponentialScheduler.py b/rltorch/scheduler/ExponentialScheduler.py
index f5d3c65..97a9ebd 100644
--- a/rltorch/scheduler/ExponentialScheduler.py
+++ b/rltorch/scheduler/ExponentialScheduler.py
@@ -11,8 +11,10 @@ class ExponentialScheduler(Scheduler):
     times you call `next`. (denoted as :math:`i`)
 
     :math:`y(1) = initial\_value`
+
+    :math:`base = \sqrt[iterations]{\frac{end\_value}{initial\_value}}`
+
     :math:`y(i) = y(1) \cdot base^{i - 1}`
-    :math:`base = \sqrt[iterations]{\frac{end\_value}{initial\_value}}`.
 
     Another property is that :math:`y(iterations) = end\_value`.
 
diff --git a/rltorch/scheduler/LinearScheduler.py b/rltorch/scheduler/LinearScheduler.py
index 3637538..c15534f 100644
--- a/rltorch/scheduler/LinearScheduler.py
+++ b/rltorch/scheduler/LinearScheduler.py
@@ -10,8 +10,10 @@ class LinearScheduler(Scheduler):
     The forumula used to produce the value :math:`y` is based on the number of
     times you call `next`. (denoted as :math:`i`)
 
-    :math:`y(i) = slope \cdot (i - 1) + initial\_value`
-    where :math:`slope = \frac{end\_value - initial\_value)}{iterations}`.
+    :math:`y(1) = initial\_value`
+
+    :math:`y(i) = slope(i - 1) + y(1)`
+    where :math:`slope = \frac{end\_value - initial\_value}{iterations}`.
 
     Parameters
     ----------

From 1cad98fcf9e79887f645f1b39231bb841ee845bf Mon Sep 17 00:00:00 2001
From: Brandon Rozek <rozekbrandon@gmail.com>
Date: Fri, 20 Mar 2020 19:31:09 -0400
Subject: [PATCH 09/12] Created documentation for memory module

---
 docs/source/memory.rst                    |  6 ++-
 rltorch/memory/EpisodeMemory.py           | 23 +++++++++-
 rltorch/memory/PrioritizedReplayMemory.py | 54 +++++++++++++++++++----
 rltorch/memory/ReplayMemory.py            | 52 +++++++++++++++++++---
 4 files changed, 119 insertions(+), 16 deletions(-)

diff --git a/docs/source/memory.rst b/docs/source/memory.rst
index 3446f3d..7cacd28 100644
--- a/docs/source/memory.rst
+++ b/docs/source/memory.rst
@@ -1,4 +1,8 @@
 Memory Structures
 =================
-.. automodule:: rltorch.memory
+.. autoclass:: rltorch.memory.ReplayMemory
+   :members:
+.. autoclass:: rltorch.memory.PrioritizedReplayMemory
+   :members:
+.. autoclass:: rltorch.memory.EpisodeMemory
    :members:
diff --git a/rltorch/memory/EpisodeMemory.py b/rltorch/memory/EpisodeMemory.py
index 0957465..27efa69 100644
--- a/rltorch/memory/EpisodeMemory.py
+++ b/rltorch/memory/EpisodeMemory.py
@@ -5,22 +5,43 @@ Transition = namedtuple('Transition',
     ('state', 'action', 'reward', 'next_state', 'done'))
 
 class EpisodeMemory(object):
+    """
+    Memory structure that stores an entire episode and
+    the observation's associated log-based probabilities.
+    """
     def __init__(self):
         self.memory = []
         self.log_probs = []
 
     def append(self, *args):
-        """Saves a transition."""
+        """
+        Adds a transition to the memory.
+
+        Parameters
+        ----------
+          *args
+             The state, action, reward, next_state, done tuple
+        """
         self.memory.append(Transition(*args))
     
     def append_log_probs(self, logprob):
+        """
+        Adds a log-based probability to the observation.
+        """
         self.log_probs.append(logprob)
 
     def clear(self):
+        """
+        Clears the transitions and log-based probabilities.
+        """
         self.memory.clear()
         self.log_probs.clear()
 
     def recall(self):
+        """
+        Return a list of the transitions with their 
+        associated log-based probabilities.
+        """
         if len(self.memory) != len(self.log_probs):
             raise ValueError("Memory and recorded log probabilities must be the same length.")
         return list(zip(*tuple(zip(*self.memory)), self.log_probs))
diff --git a/rltorch/memory/PrioritizedReplayMemory.py b/rltorch/memory/PrioritizedReplayMemory.py
index 58843e3..1bf153a 100644
--- a/rltorch/memory/PrioritizedReplayMemory.py
+++ b/rltorch/memory/PrioritizedReplayMemory.py
@@ -147,7 +147,9 @@ class MinSegmentTree(SegmentTree):
 
 class PrioritizedReplayMemory(ReplayMemory):
     def __init__(self, capacity, alpha):
-        """Create Prioritized Replay buffer.
+        """
+        Create Prioritized Replay buffer.
+
         Parameters
         ----------
         capacity: int
@@ -156,9 +158,6 @@ class PrioritizedReplayMemory(ReplayMemory):
         alpha: float
             how much prioritization is used
             (0 - no prioritization, 1 - full prioritization)
-        See Also
-        --------
-        ReplayBuffer.__init__
         """
         super(PrioritizedReplayMemory, self).__init__(capacity)
         assert alpha >= 0
@@ -173,7 +172,14 @@ class PrioritizedReplayMemory(ReplayMemory):
         self._max_priority = 1.0
 
     def append(self, *args, **kwargs):
-        """See ReplayBuffer.store_effect"""
+        """
+        Adds a transition to the buffer and add an initial prioritization.
+
+        Parameters
+        ----------
+          *args
+             The state, action, reward, next_state, done tuple
+        """
         idx = self.position
         super().append(*args, **kwargs)
         self._it_sum[idx] = self._max_priority ** self._alpha
@@ -191,10 +197,11 @@ class PrioritizedReplayMemory(ReplayMemory):
         return res
 
     def sample(self, batch_size, beta):
-        """Sample a batch of experiences.
-        compared to ReplayBuffer.sample
-        it also returns importance weights and idxes
+        """
+        Sample a batch of experiences.
+        while returning importance weights and idxes
         of sampled experiences.
+
         Parameters
         ----------
         batch_size: int
@@ -202,6 +209,7 @@ class PrioritizedReplayMemory(ReplayMemory):
         beta: float
             To what degree to use importance weights
             (0 - no corrections, 1 - full correction)
+        
         Returns
         -------
         weights: np.array
@@ -232,6 +240,32 @@ class PrioritizedReplayMemory(ReplayMemory):
         return batch
 
     def sample_n_steps(self, batch_size, steps, beta):
+        r"""
+        Sample a batch of sequential experiences.
+        while returning importance weights and idxes
+        of sampled experiences.
+
+        Parameters
+        ----------
+        batch_size: int
+            How many transitions to sample.
+        beta: float
+            To what degree to use importance weights
+            (0 - no corrections, 1 - full correction)
+        
+        Notes
+        -----
+        The number of batches sampled is :math:`\lfloor\frac{batch\_size}{steps}\rfloor`.
+
+        Returns
+        -------
+        weights: np.array
+            Array of shape (batch_size,) and dtype np.float32
+            denoting importance weight of each sampled transition
+        idxes: np.array
+            Array of shape (batch_size,) and dtype np.int32
+            idexes in buffer of sampled experiences
+        """
         assert beta > 0
 
         sample_size = batch_size // steps
@@ -262,9 +296,11 @@ class PrioritizedReplayMemory(ReplayMemory):
     
     @jit(forceobj = True)
     def update_priorities(self, idxes, priorities):
-        """Update priorities of sampled transitions.
+        """
+        Update priorities of sampled transitions.
         sets priority of transition at index idxes[i] in buffer
         to priorities[i].
+        
         Parameters
         ----------
         idxes: [int]
diff --git a/rltorch/memory/ReplayMemory.py b/rltorch/memory/ReplayMemory.py
index aa32ab7..5507185 100644
--- a/rltorch/memory/ReplayMemory.py
+++ b/rltorch/memory/ReplayMemory.py
@@ -4,21 +4,38 @@ import torch
 Transition = namedtuple('Transition',
     ('state', 'action', 'reward', 'next_state', 'done'))
 
-# Implements a Ring Buffer
 class ReplayMemory(object):
+    """
+    Creates a ring buffer of a fixed size.
+
+    Parameters
+    ----------
+    capacity : int
+      The maximum size of the buffer
+    """
     def __init__(self, capacity):
         self.capacity = capacity
         self.memory = []
         self.position = 0
 
     def append(self, *args):
-        """Saves a transition."""
+        """
+        Adds a transition to the buffer.
+
+        Parameters
+        ----------
+        *args
+          The state, action, reward, next_state, done tuple
+        """
         if len(self.memory) < self.capacity:
             self.memory.append(None)
         self.memory[self.position] = Transition(*args)
         self.position = (self.position + 1) % self.capacity
 
     def clear(self):
+        """
+        Clears the buffer.
+        """
         self.memory.clear()
         self.position = 0
 
@@ -37,10 +54,35 @@ class ReplayMemory(object):
 
 
     def sample(self, batch_size):
+        """
+        Returns a random sample from the buffer.
+
+        Parameters
+        ----------
+        batch_size : int
+          The number of observations to sample.
+        """
         return random.sample(self.memory, batch_size)
     
     def sample_n_steps(self, batch_size, steps):
-        idxes = random.sample(range(len(self.memory) - steps), batch_size // steps)
+        r"""
+        Returns a random sample of sequential batches of size steps.
+
+        Notes
+        -----
+        The number of batches sampled is :math:`\lfloor\frac{batch\_size}{steps}\rfloor`.
+
+        Parameters
+        ----------
+        batch_size : int
+          The total number of observations to sample.
+        steps : int
+          The number of observations after the one selected to sample.
+        """
+        idxes = random.sample(
+            range(len(self.memory) - steps), 
+            batch_size // steps
+        )
         step_idxes = []
         for i in idxes:
             step_idxes += range(i, i + steps)
@@ -56,10 +98,10 @@ class ReplayMemory(object):
         return value in self.memory
 
     def __getitem__(self, index):
-        return self.memory[index]
+        return self.memory[index % self.capacity]
 
     def __setitem__(self, index, value):
-        self.memory[index] = value
+        self.memory[index % self.capacity] = value
 
     def __reversed__(self):
         return reversed(self.memory)

From a47f3f603768f2c7de178e583bb44ed311b138f6 Mon Sep 17 00:00:00 2001
From: Brandon Rozek <rozekbrandon@gmail.com>
Date: Fri, 20 Mar 2020 19:38:35 -0400
Subject: [PATCH 10/12] Seed documentation

---
 rltorch/seed.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/rltorch/seed.py b/rltorch/seed.py
index 74d685b..1b943b7 100644
--- a/rltorch/seed.py
+++ b/rltorch/seed.py
@@ -4,6 +4,14 @@ import random
 import torch
 
 def set_seed(SEED):
+    """
+    Set the seed for repeatability purposes.
+
+    Parameters
+    ----------
+    SEED : int
+      The seed to set numpy, random, and torch to.
+    """
     # Set `PYTHONHASHSEED` environment variable at a fixed value
     environ['PYTHONHASHSEED'] = str(SEED)
 

From 5e7de5bed7fa26ef4457b9c21db79d375de762d4 Mon Sep 17 00:00:00 2001
From: Brandon Rozek <rozekbrandon@gmail.com>
Date: Fri, 20 Mar 2020 19:38:45 -0400
Subject: [PATCH 11/12] Documentation for logging utility

---
 rltorch/log.py | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/rltorch/log.py b/rltorch/log.py
index efb4ae2..d24a08f 100644
--- a/rltorch/log.py
+++ b/rltorch/log.py
@@ -3,6 +3,13 @@ import numpy as np
 import torch
 
 class Logger:
+    """
+    Keeps track of lists of items seperated by tags.
+
+    Notes
+    -----
+    Logger is a dictionary of lists.
+    """
     def __init__(self):
         self.log = {}
     def append(self, tag, value):
@@ -26,26 +33,22 @@ class Logger:
     def __reversed__(self):
         return reversed(self.log)
 
-# Workaround since we can't use SummaryWriter in a different process
-# class LogWriter:
-#     def __init__(self, logger, writer):
-#         self.logger = logger
-#         self.writer = writer
-#         self.steps = Counter()
-#     def write(self):
-#         for key in self.logger.keys():
-#             for value in self.logger[key]:
-#                 self.steps[key] += 1
-#                 if isinstance(value, int) or isinstance(value, float):
-#                     self.writer.add_scalar(key, value, self.steps[key])
-#                 if isinstance(value, np.ndarray) or isinstance(value, torch.Tensor):
-#                     self.writer.add_histogram(key, value, self.steps[key])
-#         self.logger.log = {}
-#     def close(self):
-#         self.writer.close()
-    
-
 class LogWriter:
+    """
+    Takes a logger and writes it to a writter. 
+    While keeping track of the number of times it 
+    a certain tag.
+
+    Notes
+    -----
+    Used to keep track of scalars and histograms in
+    Tensorboard.
+
+    Parameters
+    ----------
+    writer
+      The tensorboard writer.
+    """
     def __init__(self, writer):
         self.writer = writer
         self.steps = Counter()

From 4c6dc0a2ea090da99e733ed7f000f04016e31651 Mon Sep 17 00:00:00 2001
From: Brandon Rozek <rozekbrandon@gmail.com>
Date: Fri, 20 Mar 2020 20:16:29 -0400
Subject: [PATCH 12/12] Added network documentation

---
 docs/source/network.rst          |  8 ++++-
 rltorch/network/ESNetwork.py     | 51 ++++++++++++++++++++++++++++----
 rltorch/network/Network.py       | 32 +++++++++++++++++++-
 rltorch/network/NoisyLinear.py   | 27 +++++++++++++++++
 rltorch/network/TargetNetwork.py | 28 ++++++++++++++----
 5 files changed, 134 insertions(+), 12 deletions(-)

diff --git a/docs/source/network.rst b/docs/source/network.rst
index 1e3d69b..d6e4ef1 100644
--- a/docs/source/network.rst
+++ b/docs/source/network.rst
@@ -1,4 +1,10 @@
 Neural Networks
 ===============
-.. automodule:: rltorch.network
+.. autoclass:: rltorch.network.Network
+   :members:
+.. autoclass:: rltorch.network.TargetNetwork
+   :members:
+.. autoclass:: rltorch.network.ESNetwork
+   :members:
+.. autoclass:: rltorch.network.NoisyLinear
    :members:
diff --git a/rltorch/network/ESNetwork.py b/rltorch/network/ESNetwork.py
index 7d6d8d0..6c83def 100644
--- a/rltorch/network/ESNetwork.py
+++ b/rltorch/network/ESNetwork.py
@@ -7,9 +7,36 @@ from copy import deepcopy
 # What if we want to sometimes do gradient descent as well?
 class ESNetwork(Network):
     """
-    Network that functions from the paper Evolutionary Strategies (https://arxiv.org/abs/1703.03864)
-    fitness_fun := model, *args -> fitness_value (float)
-    We wish to find a model that maximizes the fitness function
+    Uses evolutionary tecniques to optimize a neural network.
+
+    Notes
+    -----
+    Derived from the paper 
+    Evolutionary Strategies 
+    (https://arxiv.org/abs/1703.03864)
+
+    Parameters
+    ----------
+    model : nn.Module
+      A PyTorch nn.Module.
+    optimizer
+      A PyTorch opimtizer from torch.optim.
+    population_size : int
+      The number of networks to evaluate each iteration.
+    fitness_fn : function
+      Function that evaluates a network and returns a higher
+      number for better performing networks.
+    sigma : number
+      The standard deviation of the guassian noise added to
+      the parameters when creating the population.
+    config : dict
+      A dictionary of configuration items.
+    device
+      A device to send the weights to.
+    logger
+      Keeps track of historical weights
+    name
+      For use in logger to differentiate in analysis.
     """
     def __init__(self, model, optimizer, population_size, fitness_fn, config, sigma = 0.05, device = None, logger = None, name = ""):
         super(ESNetwork, self).__init__(model, optimizer, config, device, logger, name)
@@ -18,9 +45,15 @@ class ESNetwork(Network):
         self.sigma = sigma
         assert self.sigma > 0
 
-    # We're not going to be calculating gradients in the traditional way
-    # So there's no need to waste computation time keeping track
     def __call__(self, *args):
+        """
+        Notes
+        -----
+        Since gradients aren't going to be computed in the 
+        traditional fashion, there is no need to keep
+        track of the computations performed on the
+        tensors.
+        """
         with torch.no_grad():
             result = self.model(*args)
         return result
@@ -48,6 +81,14 @@ class ESNetwork(Network):
         return candidate_solutions
 
     def calc_gradients(self, *args):
+        """
+        Calculate gradients by shifting parameters
+        towards the networks with the highest fitness value.
+
+        This is calculated by evaluating the fitness of multiple
+        networks according to the fitness function specified in
+        the class. 
+        """
         ## Generate Noise
         white_noise_dict, noise_dict = self._generate_noise_dicts()
         
diff --git a/rltorch/network/Network.py b/rltorch/network/Network.py
index eeafae9..d16d436 100644
--- a/rltorch/network/Network.py
+++ b/rltorch/network/Network.py
@@ -1,6 +1,21 @@
 class Network:
     """
-    Wrapper around model which provides copy of it instead of trained weights
+    Wrapper around model and optimizer in PyTorch to abstract away common use cases.
+    
+    Parameters
+    ----------
+    model : nn.Module
+      A PyTorch nn.Module.
+    optimizer
+      A PyTorch opimtizer from torch.optim.
+    config : dict
+      A dictionary of configuration items.
+    device
+      A device to send the weights to.
+    logger
+      Keeps track of historical weights
+    name
+      For use in logger to differentiate in analysis.
     """
     def __init__(self, model, optimizer, config, device = None, logger = None, name = ""):
         self.model = model
@@ -18,14 +33,29 @@ class Network:
         return self.model(*args)
 
     def clamp_gradients(self, x = 1):
+        """
+        Forcing gradients to stay within a certain interval
+        by setting it to the bound if it goes over it.
+
+        Parameters
+        ----------
+        x : number > 0
+          Sets the interval to be [-x, x]
+        """
         assert x > 0
         for param in self.model.parameters():
             param.grad.data.clamp_(-x, x)
     
     def zero_grad(self):
+        """
+        Clears out gradients held in the model.
+        """
         self.model.zero_grad()
 
     def step(self):
+        """
+        Run a step of the optimizer on `model`.
+        """
         self.optimizer.step()
     
     def log_named_parameters(self):
diff --git a/rltorch/network/NoisyLinear.py b/rltorch/network/NoisyLinear.py
index 578457b..cd8b905 100644
--- a/rltorch/network/NoisyLinear.py
+++ b/rltorch/network/NoisyLinear.py
@@ -6,6 +6,24 @@ import math
 # This class utilizes this property of the normal distribution
 # N(mu, sigma) = mu + sigma * N(0, 1)
 class NoisyLinear(nn.Linear):
+  """
+  Draws the parameters of nn.Linear from a normal distribution.
+  The parameters of the normal distribution are registered as 
+  learnable parameters in the neural network.
+
+  Parameters
+  ----------
+  in_features
+    Size of each input sample.
+  out_features
+    Size of each output sample.
+  sigma_init
+    The starting standard deviation of guassian noise.
+  bias
+     If set to False, the layer will not 
+     learn an additive bias.
+     Default: True
+  """
   def __init__(self, in_features, out_features, sigma_init = 0.017, bias = True):
     super(NoisyLinear, self).__init__(in_features, out_features, bias = bias)
     # One of the parameters the network is going to tune is the 
@@ -27,6 +45,15 @@ class NoisyLinear(nn.Linear):
     nn.init.uniform_(self.bias, -std, std)
   
   def forward(self, x):
+    r"""
+    Calculates the output :math:`y` through the following:
+
+    :math:`sigma \sim N(mu_1, std_1)`
+
+    :math:`bias \sim N(mu_2, std_2)`
+
+    :math:`y = sigma \cdot x + bias`
+    """
     # Fill s_normal_weight with values from the standard normal distribution
     self.s_normal_weight.normal_()
     weight_noise = self.sigma_weight * self.s_normal_weight.clone().requires_grad_()
diff --git a/rltorch/network/TargetNetwork.py b/rltorch/network/TargetNetwork.py
index dd80365..3339bdd 100644
--- a/rltorch/network/TargetNetwork.py
+++ b/rltorch/network/TargetNetwork.py
@@ -1,25 +1,43 @@
 from copy import deepcopy
-# Derived from ptan library
+
 class TargetNetwork:
     """
-    Wrapper around model which provides copy of it instead of trained weights
+    Creates a clone of a network with syncing capabilities.
+
+    Parameters
+    ----------
+    network
+      The network to clone.
+    device
+      The device to put the cloned parameters in.
     """
     def __init__(self, network, device = None):
         self.model = network.model
         self.target_model = deepcopy(network.model)
-        if network.device is not None:
+        if device is not None:
+            self.target_model = self.target_model.to(device)
+        elif network.device is not None:
             self.target_model = self.target_model.to(network.device)
 
     def __call__(self, *args):
         return self.model(*args)
 
     def sync(self):
+        """
+        Perform a full state sync with the originating model.
+        """
         self.target_model.load_state_dict(self.model.state_dict())
 
     def partial_sync(self, tau):
         """
-        Blend params of target net with params from the model
-        :param tau:
+        Partially move closer to the parameters of the originating
+        model by updating parameters to be a mix of the
+        originating and the clone models.
+        
+        Parameters
+        ----------
+        tau : number
+          A number between 0-1 which indicates the proportion of the originator and clone in the new clone.
         """
         assert isinstance(tau, float)
         assert 0.0 < tau <= 1.0