Created documentation for memory module

2020-03-20 19:31:09 -04:00 · 2020-03-20 19:31:09 -04:00 · 1cad98fcf9
commit 1cad98fcf9
parent 711c2e8dd1
4 changed files with 119 additions and 16 deletions
--- a/docs/source/memory.rst
+++ b/docs/source/memory.rst
@ -1,4 +1,8 @@
 Memory Structures
 =================
-.. automodule:: rltorch.memory
+.. autoclass:: rltorch.memory.ReplayMemory
+   :members:
+.. autoclass:: rltorch.memory.PrioritizedReplayMemory
+   :members:
+.. autoclass:: rltorch.memory.EpisodeMemory
   :members:
--- a/rltorch/memory/EpisodeMemory.py
+++ b/rltorch/memory/EpisodeMemory.py
@ -5,22 +5,43 @@ Transition = namedtuple('Transition',
    ('state', 'action', 'reward', 'next_state', 'done'))

 class EpisodeMemory(object):
+    """
+    Memory structure that stores an entire episode and
+    the observation's associated log-based probabilities.
+    """
    def __init__(self):
        self.memory = []
        self.log_probs = []

    def append(self, *args):
-        """Saves a transition."""
+        """
+        Adds a transition to the memory.
+
+        Parameters
+        ----------
+          *args
+             The state, action, reward, next_state, done tuple
+        """
        self.memory.append(Transition(*args))
    
    def append_log_probs(self, logprob):
+        """
+        Adds a log-based probability to the observation.
+        """
        self.log_probs.append(logprob)

    def clear(self):
+        """
+        Clears the transitions and log-based probabilities.
+        """
        self.memory.clear()
        self.log_probs.clear()

    def recall(self):
+        """
+        Return a list of the transitions with their 
+        associated log-based probabilities.
+        """
        if len(self.memory) != len(self.log_probs):
            raise ValueError("Memory and recorded log probabilities must be the same length.")
        return list(zip(*tuple(zip(*self.memory)), self.log_probs))
--- a/rltorch/memory/PrioritizedReplayMemory.py
+++ b/rltorch/memory/PrioritizedReplayMemory.py
@ -147,7 +147,9 @@ class MinSegmentTree(SegmentTree):

 class PrioritizedReplayMemory(ReplayMemory):
    def __init__(self, capacity, alpha):
-        """Create Prioritized Replay buffer.
+        """
+        Create Prioritized Replay buffer.
+
        Parameters
        ----------
        capacity: int
@ -156,9 +158,6 @@ class PrioritizedReplayMemory(ReplayMemory):
        alpha: float
            how much prioritization is used
            (0 - no prioritization, 1 - full prioritization)
-        See Also
-        --------
-        ReplayBuffer.__init__
        """
        super(PrioritizedReplayMemory, self).__init__(capacity)
        assert alpha >= 0
@ -173,7 +172,14 @@ class PrioritizedReplayMemory(ReplayMemory):
        self._max_priority = 1.0

    def append(self, *args, **kwargs):
-        """See ReplayBuffer.store_effect"""
+        """
+        Adds a transition to the buffer and add an initial prioritization.
+
+        Parameters
+        ----------
+          *args
+             The state, action, reward, next_state, done tuple
+        """
        idx = self.position
        super().append(*args, **kwargs)
        self._it_sum[idx] = self._max_priority ** self._alpha
@ -191,10 +197,11 @@ class PrioritizedReplayMemory(ReplayMemory):
        return res

    def sample(self, batch_size, beta):
-        """Sample a batch of experiences.
-        compared to ReplayBuffer.sample
-        it also returns importance weights and idxes
+        """
+        Sample a batch of experiences.
+        while returning importance weights and idxes
        of sampled experiences.
+
        Parameters
        ----------
        batch_size: int
@ -202,6 +209,7 @@ class PrioritizedReplayMemory(ReplayMemory):
        beta: float
            To what degree to use importance weights
            (0 - no corrections, 1 - full correction)
+        
        Returns
        -------
        weights: np.array
@ -232,6 +240,32 @@ class PrioritizedReplayMemory(ReplayMemory):
        return batch

    def sample_n_steps(self, batch_size, steps, beta):
+        r"""
+        Sample a batch of sequential experiences.
+        while returning importance weights and idxes
+        of sampled experiences.
+
+        Parameters
+        ----------
+        batch_size: int
+            How many transitions to sample.
+        beta: float
+            To what degree to use importance weights
+            (0 - no corrections, 1 - full correction)
+        
+        Notes
+        -----
+        The number of batches sampled is :math:`\lfloor\frac{batch\_size}{steps}\rfloor`.
+
+        Returns
+        -------
+        weights: np.array
+            Array of shape (batch_size,) and dtype np.float32
+            denoting importance weight of each sampled transition
+        idxes: np.array
+            Array of shape (batch_size,) and dtype np.int32
+            idexes in buffer of sampled experiences
+        """
        assert beta > 0

        sample_size = batch_size // steps
@ -262,9 +296,11 @@ class PrioritizedReplayMemory(ReplayMemory):
    
    @jit(forceobj = True)
    def update_priorities(self, idxes, priorities):
-        """Update priorities of sampled transitions.
+        """
+        Update priorities of sampled transitions.
        sets priority of transition at index idxes[i] in buffer
        to priorities[i].
+        
        Parameters
        ----------
        idxes: [int]
--- a/rltorch/memory/ReplayMemory.py
+++ b/rltorch/memory/ReplayMemory.py
@ -4,21 +4,38 @@ import torch
 Transition = namedtuple('Transition',
    ('state', 'action', 'reward', 'next_state', 'done'))

-# Implements a Ring Buffer
 class ReplayMemory(object):
+    """
+    Creates a ring buffer of a fixed size.
+
+    Parameters
+    ----------
+    capacity : int
+      The maximum size of the buffer
+    """
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def append(self, *args):
-        """Saves a transition."""
+        """
+        Adds a transition to the buffer.
+
+        Parameters
+        ----------
+        *args
+          The state, action, reward, next_state, done tuple
+        """
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def clear(self):
+        """
+        Clears the buffer.
+        """
        self.memory.clear()
        self.position = 0

@ -37,10 +54,35 @@ class ReplayMemory(object):


    def sample(self, batch_size):
+        """
+        Returns a random sample from the buffer.
+
+        Parameters
+        ----------
+        batch_size : int
+          The number of observations to sample.
+        """
        return random.sample(self.memory, batch_size)
    
    def sample_n_steps(self, batch_size, steps):
-        idxes = random.sample(range(len(self.memory) - steps), batch_size // steps)
+        r"""
+        Returns a random sample of sequential batches of size steps.
+
+        Notes
+        -----
+        The number of batches sampled is :math:`\lfloor\frac{batch\_size}{steps}\rfloor`.
+
+        Parameters
+        ----------
+        batch_size : int
+          The total number of observations to sample.
+        steps : int
+          The number of observations after the one selected to sample.
+        """
+        idxes = random.sample(
+            range(len(self.memory) - steps), 
+            batch_size // steps
+        )
        step_idxes = []
        for i in idxes:
            step_idxes += range(i, i + steps)
@ -56,10 +98,10 @@ class ReplayMemory(object):
        return value in self.memory

    def __getitem__(self, index):
-        return self.memory[index]
+        return self.memory[index % self.capacity]

    def __setitem__(self, index, value):
-        self.memory[index] = value
+        self.memory[index % self.capacity] = value

    def __reversed__(self):
        return reversed(self.memory)