From 324fa5c66738f28752841ee5e6783d3dad11e5b5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2019 18:14:40 +0000 Subject: [PATCH 01/12] Bump werkzeug from 0.14.1 to 0.15.3 Bumps [werkzeug](https://github.com/pallets/werkzeug) from 0.14.1 to 0.15.3. - [Release notes](https://github.com/pallets/werkzeug/releases) - [Changelog](https://github.com/pallets/werkzeug/blob/master/CHANGES.rst) - [Commits](https://github.com/pallets/werkzeug/compare/0.14.1...0.15.3) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9f97f26..7084bc2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,5 +28,5 @@ tensorflow==1.12.0 termcolor==1.1.0 torch==1.0.0 urllib3==1.24.1 -Werkzeug==0.14.1 +Werkzeug==0.15.3 numba==0.42.1 From 6b2b6da9e6ae5d4f8bcef19f4d9f52e5fcb4cd42 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2019 22:14:02 +0000 Subject: [PATCH 02/12] Bump pillow from 5.4.1 to 6.2.0 Bumps [pillow](https://github.com/python-pillow/Pillow) from 5.4.1 to 6.2.0. - [Release notes](https://github.com/python-pillow/Pillow/releases) - [Changelog](https://github.com/python-pillow/Pillow/blob/master/CHANGES.rst) - [Commits](https://github.com/python-pillow/Pillow/compare/5.4.1...6.2.0) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9f97f26..9143967 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ Keras-Preprocessing==1.0.8 Markdown==3.0.1 numpy==1.16.0 opencv-python==4.0.0.21 -Pillow==5.4.1 +Pillow==6.2.0 pkg-resources==0.0.0 protobuf==3.6.1 pyglet==1.3.2 From 35f57a3f220d566c2df0de257778b4c6f437382f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2019 21:27:03 +0000 Subject: [PATCH 03/12] Bump tensorflow from 1.12.0 to 1.15.0 Bumps [tensorflow](https://github.com/tensorflow/tensorflow) from 1.12.0 to 1.15.0. - [Release notes](https://github.com/tensorflow/tensorflow/releases) - [Changelog](https://github.com/tensorflow/tensorflow/blob/master/RELEASE.md) - [Commits](https://github.com/tensorflow/tensorflow/compare/v1.12.0...v1.15.0) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9f97f26..ace5e33 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ scipy==1.2.0 six==1.12.0 tensorboard==1.12.2 tensorboardX==1.6 -tensorflow==1.12.0 +tensorflow==1.15.0 termcolor==1.1.0 torch==1.0.0 urllib3==1.24.1 From 8323b1b073771572351d9db5c3c7bd30dc31e308 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 Jan 2020 04:48:59 +0000 Subject: [PATCH 04/12] Bump urllib3 from 1.24.1 to 1.24.2 Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.24.1 to 1.24.2. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/master/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.24.1...1.24.2) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ecfa52c..8b10420 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,6 +27,6 @@ tensorboardX==1.6 tensorflow==1.15.0 termcolor==1.1.0 torch==1.0.0 -urllib3==1.24.1 +urllib3==1.24.2 Werkzeug==0.15.3 numba==0.42.1 From c6ca2a4cfea83fc536716cfe285ce37f0f7fb1f9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 Jan 2020 22:34:13 +0000 Subject: [PATCH 05/12] Bump tensorflow from 1.15.0 to 1.15.2 Bumps [tensorflow](https://github.com/tensorflow/tensorflow) from 1.15.0 to 1.15.2. - [Release notes](https://github.com/tensorflow/tensorflow/releases) - [Changelog](https://github.com/tensorflow/tensorflow/blob/master/RELEASE.md) - [Commits](https://github.com/tensorflow/tensorflow/compare/v1.15.0...v1.15.2) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8b10420..91331da 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ scipy==1.2.0 six==1.12.0 tensorboard==1.12.2 tensorboardX==1.6 -tensorflow==1.15.0 +tensorflow==1.15.2 termcolor==1.1.0 torch==1.0.0 urllib3==1.24.2 From ea62ccf389687f601c555d28135d9ff7d9f9a414 Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Sun, 15 Mar 2020 14:27:56 -0400 Subject: [PATCH 06/12] Added templates for unit testing and sphinx documentation --- .gitignore | 6 ++++ docs/Makefile | 20 ++++++++++++ docs/make.bat | 35 ++++++++++++++++++++ docs/source/action_selector.rst | 4 +++ docs/source/agents.rst | 4 +++ docs/source/conf.py | 58 +++++++++++++++++++++++++++++++++ docs/source/env.rst | 5 +++ docs/source/index.rst | 15 +++++++++ docs/source/log.rst | 4 +++ docs/source/memory.rst | 4 +++ docs/source/mp.rst | 4 +++ docs/source/network.rst | 4 +++ docs/source/scheduler.rst | 4 +++ docs/source/seed.rst | 4 +++ requirements.txt | 32 ------------------ setup.py | 9 ++++- tests/test.py | 6 ++++ tox.ini | 17 ++++++++++ 18 files changed, 202 insertions(+), 33 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/source/action_selector.rst create mode 100644 docs/source/agents.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/env.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/log.rst create mode 100644 docs/source/memory.rst create mode 100644 docs/source/mp.rst create mode 100644 docs/source/network.rst create mode 100644 docs/source/scheduler.rst create mode 100644 docs/source/seed.rst delete mode 100644 requirements.txt create mode 100644 tests/test.py create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore index 1f091d4..126808d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,9 @@ __pycache__/ *.py[cod] rlenv/ runs/ +*.tox +*.coverage +.vscode/ +docs/build +.mypy_cache/ +*egg-info* diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..6247f7e --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/action_selector.rst b/docs/source/action_selector.rst new file mode 100644 index 0000000..17cfdfb --- /dev/null +++ b/docs/source/action_selector.rst @@ -0,0 +1,4 @@ +Action Selector +=============== +.. automodule:: rltorch.action_selector + :members: diff --git a/docs/source/agents.rst b/docs/source/agents.rst new file mode 100644 index 0000000..76ef275 --- /dev/null +++ b/docs/source/agents.rst @@ -0,0 +1,4 @@ +Agents +====== +.. automodule:: rltorch.agents + :members: diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..5eef823 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,58 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- + +project = 'RLTorch' +copyright = '2020, Brandon Rozek' +author = 'Brandon Rozek' + +# The full version, including alpha/beta/rc tags +release = '0.1.0' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + 'sphinx.ext.autosummary', + 'sphinx.ext.napoleon', + "sphinx.ext.viewcode", + "sphinx.ext.mathjax", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + + +html_sidebars = { + '**': [ + 'about.html', + 'navigation.html', + 'searchbox.html', + ] +} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/docs/source/env.rst b/docs/source/env.rst new file mode 100644 index 0000000..80b6c53 --- /dev/null +++ b/docs/source/env.rst @@ -0,0 +1,5 @@ +Environment Utilities +===================== +.. automodule:: rltorch.env + :members: + diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..1f90744 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,15 @@ +Welcome to RLTorch's documentation! +=================================== +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + action_selector + agents + env + memory + mp + network + scheduler + log + seed diff --git a/docs/source/log.rst b/docs/source/log.rst new file mode 100644 index 0000000..658ecfc --- /dev/null +++ b/docs/source/log.rst @@ -0,0 +1,4 @@ +Logging +======= +.. automodule:: rltorch.log + :members: diff --git a/docs/source/memory.rst b/docs/source/memory.rst new file mode 100644 index 0000000..3446f3d --- /dev/null +++ b/docs/source/memory.rst @@ -0,0 +1,4 @@ +Memory Structures +================= +.. automodule:: rltorch.memory + :members: diff --git a/docs/source/mp.rst b/docs/source/mp.rst new file mode 100644 index 0000000..7fad488 --- /dev/null +++ b/docs/source/mp.rst @@ -0,0 +1,4 @@ +Multiprocessing +=============== +.. automodule:: rltorch.mp + :members: diff --git a/docs/source/network.rst b/docs/source/network.rst new file mode 100644 index 0000000..1e3d69b --- /dev/null +++ b/docs/source/network.rst @@ -0,0 +1,4 @@ +Neural Networks +=============== +.. automodule:: rltorch.network + :members: diff --git a/docs/source/scheduler.rst b/docs/source/scheduler.rst new file mode 100644 index 0000000..61fd10c --- /dev/null +++ b/docs/source/scheduler.rst @@ -0,0 +1,4 @@ +Hyperparameter Scheduling +========================= +.. automodule:: rltorch.scheduler + :members: diff --git a/docs/source/seed.rst b/docs/source/seed.rst new file mode 100644 index 0000000..933023b --- /dev/null +++ b/docs/source/seed.rst @@ -0,0 +1,4 @@ +Seeding +======= +.. automodule:: rltorch.seed + :members: diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 91331da..0000000 --- a/requirements.txt +++ /dev/null @@ -1,32 +0,0 @@ -absl-py==0.7.0 -astor==0.7.1 -atari-py==0.1.7 -certifi==2018.11.29 -chardet==3.0.4 -future==0.17.1 -gast==0.2.2 -grpcio==1.18.0 -gym==0.10.11 -h5py==2.9.0 -idna==2.8 -Keras-Applications==1.0.7 -Keras-Preprocessing==1.0.8 -Markdown==3.0.1 -numpy==1.16.0 -opencv-python==4.0.0.21 -Pillow==6.2.0 -pkg-resources==0.0.0 -protobuf==3.6.1 -pyglet==1.3.2 -PyOpenGL==3.1.0 -requests==2.21.0 -scipy==1.2.0 -six==1.12.0 -tensorboard==1.12.2 -tensorboardX==1.6 -tensorflow==1.15.2 -termcolor==1.1.0 -torch==1.0.0 -urllib3==1.24.2 -Werkzeug==0.15.3 -numba==0.42.1 diff --git a/setup.py b/setup.py index c0763be..66de91d 100644 --- a/setup.py +++ b/setup.py @@ -12,4 +12,11 @@ setuptools.setup( description="Reinforcement Learning Framework for PyTorch", version="0.1", packages=setuptools.find_packages(), -) \ No newline at end of file + install_requires=[ + "numpy~=1.16.0", + "opencv-python~=4.2.0.32", + "gym~=0.10.11", + "torch~=1.4.0", + "numba~=0.48.0" + ] +) diff --git a/tests/test.py b/tests/test.py new file mode 100644 index 0000000..22eea34 --- /dev/null +++ b/tests/test.py @@ -0,0 +1,6 @@ +import rltorch +import unittest + +class Test(unittest.TestCase): + def test(self): + pass diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..bf11c08 --- /dev/null +++ b/tox.ini @@ -0,0 +1,17 @@ +[tox] +envlist = + py36 + py37 + py38 + +[testenv] +deps = coverage +commands = + coverage run --source=tests,rltorch -m unittest discover tests + + +[testenv:py38] +commands = + coverage run --source=tests,rltorch -m unittest discover tests + coverage report -m + From 720bb1b051c9dcd5054606d076d2dca407ddffa2 Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Fri, 20 Mar 2020 17:59:56 -0400 Subject: [PATCH 07/12] Documented scheduler module --- docs/source/scheduler.rst | 4 +++- rltorch/scheduler/ExponentialScheduler.py | 25 +++++++++++++++++++++++ rltorch/scheduler/LinearScheduler.py | 22 ++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/docs/source/scheduler.rst b/docs/source/scheduler.rst index 61fd10c..5f698d3 100644 --- a/docs/source/scheduler.rst +++ b/docs/source/scheduler.rst @@ -1,4 +1,6 @@ Hyperparameter Scheduling ========================= -.. automodule:: rltorch.scheduler +.. autoclass:: rltorch.scheduler.LinearScheduler + :members: +.. autoclass:: rltorch.scheduler.ExponentialScheduler :members: diff --git a/rltorch/scheduler/ExponentialScheduler.py b/rltorch/scheduler/ExponentialScheduler.py index ca8d162..f5d3c65 100644 --- a/rltorch/scheduler/ExponentialScheduler.py +++ b/rltorch/scheduler/ExponentialScheduler.py @@ -1,5 +1,30 @@ from .Scheduler import Scheduler class ExponentialScheduler(Scheduler): + r""" + A exponential scheduler that given a certain number + of iterations, spaces the values between + a start and an end point in an exponential order. + + Notes + ----- + The forumula used to produce the value :math:`y` is based on the number of + times you call `next`. (denoted as :math:`i`) + + :math:`y(1) = initial\_value` + :math:`y(i) = y(1) \cdot base^{i - 1}` + :math:`base = \sqrt[iterations]{\frac{end\_value}{initial\_value}}`. + + Another property is that :math:`y(iterations) = end\_value`. + + Parameters + ---------- + initial_value : number + The first value returned in the schedule. + end_value: number + The value returned when the maximum number of iterations are reached + iterations: int + The total number of iterations + """ def __init__(self, initial_value, end_value, iterations): super(ExponentialScheduler, self).__init__(initial_value, end_value, iterations) self.base = (end_value / initial_value) ** (1.0 / iterations) diff --git a/rltorch/scheduler/LinearScheduler.py b/rltorch/scheduler/LinearScheduler.py index 984f3eb..3637538 100644 --- a/rltorch/scheduler/LinearScheduler.py +++ b/rltorch/scheduler/LinearScheduler.py @@ -1,5 +1,27 @@ from .Scheduler import Scheduler class LinearScheduler(Scheduler): + r""" + A linear scheduler that given a certain number + of iterations, equally spaces the values between + a start and an end point. + + Notes + ----- + The forumula used to produce the value :math:`y` is based on the number of + times you call `next`. (denoted as :math:`i`) + + :math:`y(i) = slope \cdot (i - 1) + initial\_value` + where :math:`slope = \frac{end\_value - initial\_value)}{iterations}`. + + Parameters + ---------- + initial_value : number + The first value returned in the schedule. + end_value: number + The value returned when the maximum number of iterations are reached + iterations: int + The total number of iterations + """ def __init__(self, initial_value, end_value, iterations): super(LinearScheduler, self).__init__(initial_value, end_value, iterations) self.slope = (end_value - initial_value) / iterations From 711c2e8dd1f88bc03fde60d86715c9b1c65d3b9f Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Fri, 20 Mar 2020 19:30:56 -0400 Subject: [PATCH 08/12] Cleaned up scheduler docs --- rltorch/scheduler/ExponentialScheduler.py | 4 +++- rltorch/scheduler/LinearScheduler.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/rltorch/scheduler/ExponentialScheduler.py b/rltorch/scheduler/ExponentialScheduler.py index f5d3c65..97a9ebd 100644 --- a/rltorch/scheduler/ExponentialScheduler.py +++ b/rltorch/scheduler/ExponentialScheduler.py @@ -11,8 +11,10 @@ class ExponentialScheduler(Scheduler): times you call `next`. (denoted as :math:`i`) :math:`y(1) = initial\_value` + + :math:`base = \sqrt[iterations]{\frac{end\_value}{initial\_value}}` + :math:`y(i) = y(1) \cdot base^{i - 1}` - :math:`base = \sqrt[iterations]{\frac{end\_value}{initial\_value}}`. Another property is that :math:`y(iterations) = end\_value`. diff --git a/rltorch/scheduler/LinearScheduler.py b/rltorch/scheduler/LinearScheduler.py index 3637538..c15534f 100644 --- a/rltorch/scheduler/LinearScheduler.py +++ b/rltorch/scheduler/LinearScheduler.py @@ -10,8 +10,10 @@ class LinearScheduler(Scheduler): The forumula used to produce the value :math:`y` is based on the number of times you call `next`. (denoted as :math:`i`) - :math:`y(i) = slope \cdot (i - 1) + initial\_value` - where :math:`slope = \frac{end\_value - initial\_value)}{iterations}`. + :math:`y(1) = initial\_value` + + :math:`y(i) = slope(i - 1) + y(1)` + where :math:`slope = \frac{end\_value - initial\_value}{iterations}`. Parameters ---------- From 1cad98fcf9e79887f645f1b39231bb841ee845bf Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Fri, 20 Mar 2020 19:31:09 -0400 Subject: [PATCH 09/12] Created documentation for memory module --- docs/source/memory.rst | 6 ++- rltorch/memory/EpisodeMemory.py | 23 +++++++++- rltorch/memory/PrioritizedReplayMemory.py | 54 +++++++++++++++++++---- rltorch/memory/ReplayMemory.py | 52 +++++++++++++++++++--- 4 files changed, 119 insertions(+), 16 deletions(-) diff --git a/docs/source/memory.rst b/docs/source/memory.rst index 3446f3d..7cacd28 100644 --- a/docs/source/memory.rst +++ b/docs/source/memory.rst @@ -1,4 +1,8 @@ Memory Structures ================= -.. automodule:: rltorch.memory +.. autoclass:: rltorch.memory.ReplayMemory + :members: +.. autoclass:: rltorch.memory.PrioritizedReplayMemory + :members: +.. autoclass:: rltorch.memory.EpisodeMemory :members: diff --git a/rltorch/memory/EpisodeMemory.py b/rltorch/memory/EpisodeMemory.py index 0957465..27efa69 100644 --- a/rltorch/memory/EpisodeMemory.py +++ b/rltorch/memory/EpisodeMemory.py @@ -5,22 +5,43 @@ Transition = namedtuple('Transition', ('state', 'action', 'reward', 'next_state', 'done')) class EpisodeMemory(object): + """ + Memory structure that stores an entire episode and + the observation's associated log-based probabilities. + """ def __init__(self): self.memory = [] self.log_probs = [] def append(self, *args): - """Saves a transition.""" + """ + Adds a transition to the memory. + + Parameters + ---------- + *args + The state, action, reward, next_state, done tuple + """ self.memory.append(Transition(*args)) def append_log_probs(self, logprob): + """ + Adds a log-based probability to the observation. + """ self.log_probs.append(logprob) def clear(self): + """ + Clears the transitions and log-based probabilities. + """ self.memory.clear() self.log_probs.clear() def recall(self): + """ + Return a list of the transitions with their + associated log-based probabilities. + """ if len(self.memory) != len(self.log_probs): raise ValueError("Memory and recorded log probabilities must be the same length.") return list(zip(*tuple(zip(*self.memory)), self.log_probs)) diff --git a/rltorch/memory/PrioritizedReplayMemory.py b/rltorch/memory/PrioritizedReplayMemory.py index 58843e3..1bf153a 100644 --- a/rltorch/memory/PrioritizedReplayMemory.py +++ b/rltorch/memory/PrioritizedReplayMemory.py @@ -147,7 +147,9 @@ class MinSegmentTree(SegmentTree): class PrioritizedReplayMemory(ReplayMemory): def __init__(self, capacity, alpha): - """Create Prioritized Replay buffer. + """ + Create Prioritized Replay buffer. + Parameters ---------- capacity: int @@ -156,9 +158,6 @@ class PrioritizedReplayMemory(ReplayMemory): alpha: float how much prioritization is used (0 - no prioritization, 1 - full prioritization) - See Also - -------- - ReplayBuffer.__init__ """ super(PrioritizedReplayMemory, self).__init__(capacity) assert alpha >= 0 @@ -173,7 +172,14 @@ class PrioritizedReplayMemory(ReplayMemory): self._max_priority = 1.0 def append(self, *args, **kwargs): - """See ReplayBuffer.store_effect""" + """ + Adds a transition to the buffer and add an initial prioritization. + + Parameters + ---------- + *args + The state, action, reward, next_state, done tuple + """ idx = self.position super().append(*args, **kwargs) self._it_sum[idx] = self._max_priority ** self._alpha @@ -191,10 +197,11 @@ class PrioritizedReplayMemory(ReplayMemory): return res def sample(self, batch_size, beta): - """Sample a batch of experiences. - compared to ReplayBuffer.sample - it also returns importance weights and idxes + """ + Sample a batch of experiences. + while returning importance weights and idxes of sampled experiences. + Parameters ---------- batch_size: int @@ -202,6 +209,7 @@ class PrioritizedReplayMemory(ReplayMemory): beta: float To what degree to use importance weights (0 - no corrections, 1 - full correction) + Returns ------- weights: np.array @@ -232,6 +240,32 @@ class PrioritizedReplayMemory(ReplayMemory): return batch def sample_n_steps(self, batch_size, steps, beta): + r""" + Sample a batch of sequential experiences. + while returning importance weights and idxes + of sampled experiences. + + Parameters + ---------- + batch_size: int + How many transitions to sample. + beta: float + To what degree to use importance weights + (0 - no corrections, 1 - full correction) + + Notes + ----- + The number of batches sampled is :math:`\lfloor\frac{batch\_size}{steps}\rfloor`. + + Returns + ------- + weights: np.array + Array of shape (batch_size,) and dtype np.float32 + denoting importance weight of each sampled transition + idxes: np.array + Array of shape (batch_size,) and dtype np.int32 + idexes in buffer of sampled experiences + """ assert beta > 0 sample_size = batch_size // steps @@ -262,9 +296,11 @@ class PrioritizedReplayMemory(ReplayMemory): @jit(forceobj = True) def update_priorities(self, idxes, priorities): - """Update priorities of sampled transitions. + """ + Update priorities of sampled transitions. sets priority of transition at index idxes[i] in buffer to priorities[i]. + Parameters ---------- idxes: [int] diff --git a/rltorch/memory/ReplayMemory.py b/rltorch/memory/ReplayMemory.py index aa32ab7..5507185 100644 --- a/rltorch/memory/ReplayMemory.py +++ b/rltorch/memory/ReplayMemory.py @@ -4,21 +4,38 @@ import torch Transition = namedtuple('Transition', ('state', 'action', 'reward', 'next_state', 'done')) -# Implements a Ring Buffer class ReplayMemory(object): + """ + Creates a ring buffer of a fixed size. + + Parameters + ---------- + capacity : int + The maximum size of the buffer + """ def __init__(self, capacity): self.capacity = capacity self.memory = [] self.position = 0 def append(self, *args): - """Saves a transition.""" + """ + Adds a transition to the buffer. + + Parameters + ---------- + *args + The state, action, reward, next_state, done tuple + """ if len(self.memory) < self.capacity: self.memory.append(None) self.memory[self.position] = Transition(*args) self.position = (self.position + 1) % self.capacity def clear(self): + """ + Clears the buffer. + """ self.memory.clear() self.position = 0 @@ -37,10 +54,35 @@ class ReplayMemory(object): def sample(self, batch_size): + """ + Returns a random sample from the buffer. + + Parameters + ---------- + batch_size : int + The number of observations to sample. + """ return random.sample(self.memory, batch_size) def sample_n_steps(self, batch_size, steps): - idxes = random.sample(range(len(self.memory) - steps), batch_size // steps) + r""" + Returns a random sample of sequential batches of size steps. + + Notes + ----- + The number of batches sampled is :math:`\lfloor\frac{batch\_size}{steps}\rfloor`. + + Parameters + ---------- + batch_size : int + The total number of observations to sample. + steps : int + The number of observations after the one selected to sample. + """ + idxes = random.sample( + range(len(self.memory) - steps), + batch_size // steps + ) step_idxes = [] for i in idxes: step_idxes += range(i, i + steps) @@ -56,10 +98,10 @@ class ReplayMemory(object): return value in self.memory def __getitem__(self, index): - return self.memory[index] + return self.memory[index % self.capacity] def __setitem__(self, index, value): - self.memory[index] = value + self.memory[index % self.capacity] = value def __reversed__(self): return reversed(self.memory) From a47f3f603768f2c7de178e583bb44ed311b138f6 Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Fri, 20 Mar 2020 19:38:35 -0400 Subject: [PATCH 10/12] Seed documentation --- rltorch/seed.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rltorch/seed.py b/rltorch/seed.py index 74d685b..1b943b7 100644 --- a/rltorch/seed.py +++ b/rltorch/seed.py @@ -4,6 +4,14 @@ import random import torch def set_seed(SEED): + """ + Set the seed for repeatability purposes. + + Parameters + ---------- + SEED : int + The seed to set numpy, random, and torch to. + """ # Set `PYTHONHASHSEED` environment variable at a fixed value environ['PYTHONHASHSEED'] = str(SEED) From 5e7de5bed7fa26ef4457b9c21db79d375de762d4 Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Fri, 20 Mar 2020 19:38:45 -0400 Subject: [PATCH 11/12] Documentation for logging utility --- rltorch/log.py | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/rltorch/log.py b/rltorch/log.py index efb4ae2..d24a08f 100644 --- a/rltorch/log.py +++ b/rltorch/log.py @@ -3,6 +3,13 @@ import numpy as np import torch class Logger: + """ + Keeps track of lists of items seperated by tags. + + Notes + ----- + Logger is a dictionary of lists. + """ def __init__(self): self.log = {} def append(self, tag, value): @@ -26,26 +33,22 @@ class Logger: def __reversed__(self): return reversed(self.log) -# Workaround since we can't use SummaryWriter in a different process -# class LogWriter: -# def __init__(self, logger, writer): -# self.logger = logger -# self.writer = writer -# self.steps = Counter() -# def write(self): -# for key in self.logger.keys(): -# for value in self.logger[key]: -# self.steps[key] += 1 -# if isinstance(value, int) or isinstance(value, float): -# self.writer.add_scalar(key, value, self.steps[key]) -# if isinstance(value, np.ndarray) or isinstance(value, torch.Tensor): -# self.writer.add_histogram(key, value, self.steps[key]) -# self.logger.log = {} -# def close(self): -# self.writer.close() - - class LogWriter: + """ + Takes a logger and writes it to a writter. + While keeping track of the number of times it + a certain tag. + + Notes + ----- + Used to keep track of scalars and histograms in + Tensorboard. + + Parameters + ---------- + writer + The tensorboard writer. + """ def __init__(self, writer): self.writer = writer self.steps = Counter() From 4c6dc0a2ea090da99e733ed7f000f04016e31651 Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Fri, 20 Mar 2020 20:16:29 -0400 Subject: [PATCH 12/12] Added network documentation --- docs/source/network.rst | 8 ++++- rltorch/network/ESNetwork.py | 51 ++++++++++++++++++++++++++++---- rltorch/network/Network.py | 32 +++++++++++++++++++- rltorch/network/NoisyLinear.py | 27 +++++++++++++++++ rltorch/network/TargetNetwork.py | 28 ++++++++++++++---- 5 files changed, 134 insertions(+), 12 deletions(-) diff --git a/docs/source/network.rst b/docs/source/network.rst index 1e3d69b..d6e4ef1 100644 --- a/docs/source/network.rst +++ b/docs/source/network.rst @@ -1,4 +1,10 @@ Neural Networks =============== -.. automodule:: rltorch.network +.. autoclass:: rltorch.network.Network + :members: +.. autoclass:: rltorch.network.TargetNetwork + :members: +.. autoclass:: rltorch.network.ESNetwork + :members: +.. autoclass:: rltorch.network.NoisyLinear :members: diff --git a/rltorch/network/ESNetwork.py b/rltorch/network/ESNetwork.py index 7d6d8d0..6c83def 100644 --- a/rltorch/network/ESNetwork.py +++ b/rltorch/network/ESNetwork.py @@ -7,9 +7,36 @@ from copy import deepcopy # What if we want to sometimes do gradient descent as well? class ESNetwork(Network): """ - Network that functions from the paper Evolutionary Strategies (https://arxiv.org/abs/1703.03864) - fitness_fun := model, *args -> fitness_value (float) - We wish to find a model that maximizes the fitness function + Uses evolutionary tecniques to optimize a neural network. + + Notes + ----- + Derived from the paper + Evolutionary Strategies + (https://arxiv.org/abs/1703.03864) + + Parameters + ---------- + model : nn.Module + A PyTorch nn.Module. + optimizer + A PyTorch opimtizer from torch.optim. + population_size : int + The number of networks to evaluate each iteration. + fitness_fn : function + Function that evaluates a network and returns a higher + number for better performing networks. + sigma : number + The standard deviation of the guassian noise added to + the parameters when creating the population. + config : dict + A dictionary of configuration items. + device + A device to send the weights to. + logger + Keeps track of historical weights + name + For use in logger to differentiate in analysis. """ def __init__(self, model, optimizer, population_size, fitness_fn, config, sigma = 0.05, device = None, logger = None, name = ""): super(ESNetwork, self).__init__(model, optimizer, config, device, logger, name) @@ -18,9 +45,15 @@ class ESNetwork(Network): self.sigma = sigma assert self.sigma > 0 - # We're not going to be calculating gradients in the traditional way - # So there's no need to waste computation time keeping track def __call__(self, *args): + """ + Notes + ----- + Since gradients aren't going to be computed in the + traditional fashion, there is no need to keep + track of the computations performed on the + tensors. + """ with torch.no_grad(): result = self.model(*args) return result @@ -48,6 +81,14 @@ class ESNetwork(Network): return candidate_solutions def calc_gradients(self, *args): + """ + Calculate gradients by shifting parameters + towards the networks with the highest fitness value. + + This is calculated by evaluating the fitness of multiple + networks according to the fitness function specified in + the class. + """ ## Generate Noise white_noise_dict, noise_dict = self._generate_noise_dicts() diff --git a/rltorch/network/Network.py b/rltorch/network/Network.py index eeafae9..d16d436 100644 --- a/rltorch/network/Network.py +++ b/rltorch/network/Network.py @@ -1,6 +1,21 @@ class Network: """ - Wrapper around model which provides copy of it instead of trained weights + Wrapper around model and optimizer in PyTorch to abstract away common use cases. + + Parameters + ---------- + model : nn.Module + A PyTorch nn.Module. + optimizer + A PyTorch opimtizer from torch.optim. + config : dict + A dictionary of configuration items. + device + A device to send the weights to. + logger + Keeps track of historical weights + name + For use in logger to differentiate in analysis. """ def __init__(self, model, optimizer, config, device = None, logger = None, name = ""): self.model = model @@ -18,14 +33,29 @@ class Network: return self.model(*args) def clamp_gradients(self, x = 1): + """ + Forcing gradients to stay within a certain interval + by setting it to the bound if it goes over it. + + Parameters + ---------- + x : number > 0 + Sets the interval to be [-x, x] + """ assert x > 0 for param in self.model.parameters(): param.grad.data.clamp_(-x, x) def zero_grad(self): + """ + Clears out gradients held in the model. + """ self.model.zero_grad() def step(self): + """ + Run a step of the optimizer on `model`. + """ self.optimizer.step() def log_named_parameters(self): diff --git a/rltorch/network/NoisyLinear.py b/rltorch/network/NoisyLinear.py index 578457b..cd8b905 100644 --- a/rltorch/network/NoisyLinear.py +++ b/rltorch/network/NoisyLinear.py @@ -6,6 +6,24 @@ import math # This class utilizes this property of the normal distribution # N(mu, sigma) = mu + sigma * N(0, 1) class NoisyLinear(nn.Linear): + """ + Draws the parameters of nn.Linear from a normal distribution. + The parameters of the normal distribution are registered as + learnable parameters in the neural network. + + Parameters + ---------- + in_features + Size of each input sample. + out_features + Size of each output sample. + sigma_init + The starting standard deviation of guassian noise. + bias + If set to False, the layer will not + learn an additive bias. + Default: True + """ def __init__(self, in_features, out_features, sigma_init = 0.017, bias = True): super(NoisyLinear, self).__init__(in_features, out_features, bias = bias) # One of the parameters the network is going to tune is the @@ -27,6 +45,15 @@ class NoisyLinear(nn.Linear): nn.init.uniform_(self.bias, -std, std) def forward(self, x): + r""" + Calculates the output :math:`y` through the following: + + :math:`sigma \sim N(mu_1, std_1)` + + :math:`bias \sim N(mu_2, std_2)` + + :math:`y = sigma \cdot x + bias` + """ # Fill s_normal_weight with values from the standard normal distribution self.s_normal_weight.normal_() weight_noise = self.sigma_weight * self.s_normal_weight.clone().requires_grad_() diff --git a/rltorch/network/TargetNetwork.py b/rltorch/network/TargetNetwork.py index dd80365..3339bdd 100644 --- a/rltorch/network/TargetNetwork.py +++ b/rltorch/network/TargetNetwork.py @@ -1,25 +1,43 @@ from copy import deepcopy -# Derived from ptan library + class TargetNetwork: """ - Wrapper around model which provides copy of it instead of trained weights + Creates a clone of a network with syncing capabilities. + + Parameters + ---------- + network + The network to clone. + device + The device to put the cloned parameters in. """ def __init__(self, network, device = None): self.model = network.model self.target_model = deepcopy(network.model) - if network.device is not None: + if device is not None: + self.target_model = self.target_model.to(device) + elif network.device is not None: self.target_model = self.target_model.to(network.device) def __call__(self, *args): return self.model(*args) def sync(self): + """ + Perform a full state sync with the originating model. + """ self.target_model.load_state_dict(self.model.state_dict()) def partial_sync(self, tau): """ - Blend params of target net with params from the model - :param tau: + Partially move closer to the parameters of the originating + model by updating parameters to be a mix of the + originating and the clone models. + + Parameters + ---------- + tau : number + A number between 0-1 which indicates the proportion of the originator and clone in the new clone. """ assert isinstance(tau, float) assert 0.0 < tau <= 1.0