Implemented epsilon as a scheduler
This commit is contained in:
		
							parent
							
								
									04e54cddc2
								
							
						
					
					
						commit
						b2ab2ee132
					
				
					 1 changed files with 3 additions and 6 deletions
				
			
		| 
						 | 
				
			
			@ -1,15 +1,12 @@
 | 
			
		|||
from .ArgMaxSelector import ArgMaxSelector
 | 
			
		||||
import numpy as np 
 | 
			
		||||
class EpsilonGreedySelector(ArgMaxSelector):
 | 
			
		||||
    def __init__(self, model, action_size, device = None, epsilon = 0.1, epsilon_decay = 1, epsilon_min = 0.1):
 | 
			
		||||
    def __init__(self, model, action_size, device = None, epsilon = 0.1):
 | 
			
		||||
        super(EpsilonGreedySelector, self).__init__(model, action_size, device = device)
 | 
			
		||||
        self.epsilon = epsilon
 | 
			
		||||
        self.epsilon_decay = epsilon_decay
 | 
			
		||||
        self.epsilon_min = epsilon_min
 | 
			
		||||
    # random_act is already implemented in ArgMaxSelector
 | 
			
		||||
    # best_act is already implemented in ArgMaxSelector
 | 
			
		||||
    def act(self, state):
 | 
			
		||||
        action = self.random_act() if np.random.rand() < self.epsilon else self.best_act(state)
 | 
			
		||||
        if self.epsilon > self.epsilon_min:
 | 
			
		||||
            self.epsilon = self.epsilon * self.epsilon_decay
 | 
			
		||||
        eps = next(self.epsilon) if isinstance(self.epsilon, collections.Iterable) else self.epsilon
 | 
			
		||||
        action = self.random_act() if np.random.rand() < epsilon else self.best_act(state)
 | 
			
		||||
        return action
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue