From b2ab2ee132e38195e8ec5491098c06c1a33c5faf Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Sun, 10 Feb 2019 23:56:21 -0500 Subject: [PATCH] Implemented epsilon as a scheduler --- rltorch/action_selector/EpsilonGreedySelector.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/rltorch/action_selector/EpsilonGreedySelector.py b/rltorch/action_selector/EpsilonGreedySelector.py index d196c1b..7dbc4ae 100644 --- a/rltorch/action_selector/EpsilonGreedySelector.py +++ b/rltorch/action_selector/EpsilonGreedySelector.py @@ -1,15 +1,12 @@ from .ArgMaxSelector import ArgMaxSelector import numpy as np class EpsilonGreedySelector(ArgMaxSelector): - def __init__(self, model, action_size, device = None, epsilon = 0.1, epsilon_decay = 1, epsilon_min = 0.1): + def __init__(self, model, action_size, device = None, epsilon = 0.1): super(EpsilonGreedySelector, self).__init__(model, action_size, device = device) self.epsilon = epsilon - self.epsilon_decay = epsilon_decay - self.epsilon_min = epsilon_min # random_act is already implemented in ArgMaxSelector # best_act is already implemented in ArgMaxSelector def act(self, state): - action = self.random_act() if np.random.rand() < self.epsilon else self.best_act(state) - if self.epsilon > self.epsilon_min: - self.epsilon = self.epsilon * self.epsilon_decay + eps = next(self.epsilon) if isinstance(self.epsilon, collections.Iterable) else self.epsilon + action = self.random_act() if np.random.rand() < epsilon else self.best_act(state) return action \ No newline at end of file