Made sure everything went to their appropriate devices
This commit is contained in:
		
							parent
							
								
									39643f04e1
								
							
						
					
					
						commit
						9cd3625fd3
					
				
					 5 changed files with 14 additions and 9 deletions
				
			
		| 
						 | 
				
			
			@ -90,11 +90,11 @@ logwriter = rltorch.log.LogWriter(logger, SummaryWriter())
 | 
			
		|||
# Setting up the networks
 | 
			
		||||
device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
 | 
			
		||||
net = rn.Network(Value(state_size, action_size), 
 | 
			
		||||
                    torch.optim.Adam, config, logger = logger, name = "DQN")
 | 
			
		||||
target_net = rn.TargetNetwork(net)
 | 
			
		||||
                    torch.optim.Adam, config, device = device, logger = logger, name = "DQN")
 | 
			
		||||
target_net = rn.TargetNetwork(net, device = device)
 | 
			
		||||
 | 
			
		||||
# Actor takes a net and uses it to produce actions from given states
 | 
			
		||||
actor = ArgMaxSelector(net, action_size)
 | 
			
		||||
actor = ArgMaxSelector(net, action_size, device = device)
 | 
			
		||||
# Memory stores experiences for later training
 | 
			
		||||
memory = M.ReplayMemory(capacity = config['memory_size'])
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -107,11 +107,11 @@ logwriter = rltorch.log.LogWriter(logger, SummaryWriter())
 | 
			
		|||
# Setting up the networks
 | 
			
		||||
device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
 | 
			
		||||
net = rn.Network(Value(state_size, action_size), 
 | 
			
		||||
                    torch.optim.Adam, config, logger = logger, name = "DQN")
 | 
			
		||||
target_net = rn.TargetNetwork(net)
 | 
			
		||||
                    torch.optim.Adam, config, device = device, logger = logger, name = "DQN")
 | 
			
		||||
target_net = rn.TargetNetwork(net, device = device)
 | 
			
		||||
 | 
			
		||||
# Actor takes a network and uses it to produce actions from given states
 | 
			
		||||
actor = ArgMaxSelector(net, action_size)
 | 
			
		||||
actor = ArgMaxSelector(net, action_size, device = device)
 | 
			
		||||
# Memory stores experiences for later training
 | 
			
		||||
memory = M.ReplayMemory(capacity = config['memory_size'])
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,7 +10,7 @@ class ArgMaxSelector:
 | 
			
		|||
    def best_act(self, state):
 | 
			
		||||
        with torch.no_grad():
 | 
			
		||||
            if self.device is not None:
 | 
			
		||||
                self.device.to(self.device)
 | 
			
		||||
                state = state.to(self.device)
 | 
			
		||||
            action_values = self.model(state).squeeze(0)
 | 
			
		||||
            action = self.random_act() if (action_values[0] == action_values).all() else action_values.argmax().item()
 | 
			
		||||
        return action
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,11 +2,14 @@ class Network:
 | 
			
		|||
    """
 | 
			
		||||
    Wrapper around model which provides copy of it instead of trained weights
 | 
			
		||||
    """
 | 
			
		||||
    def __init__(self, model, optimizer, config, logger = None, name = ""):
 | 
			
		||||
    def __init__(self, model, optimizer, config, device = None, logger = None, name = ""):
 | 
			
		||||
        self.model = model
 | 
			
		||||
        self.optimizer = optimizer(model.parameters(), lr = config['learning_rate'], weight_decay = config['weight_decay'])
 | 
			
		||||
        self.logger = logger
 | 
			
		||||
        self.name = name
 | 
			
		||||
        self.device = device
 | 
			
		||||
        if self.device is not None:
 | 
			
		||||
            self.model = self.model.to(device)
 | 
			
		||||
 | 
			
		||||
    def __call__(self, *args):
 | 
			
		||||
        return self.model(*args)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,9 +4,11 @@ class TargetNetwork:
 | 
			
		|||
    """
 | 
			
		||||
    Wrapper around model which provides copy of it instead of trained weights
 | 
			
		||||
    """
 | 
			
		||||
    def __init__(self, network):
 | 
			
		||||
    def __init__(self, network, device = None):
 | 
			
		||||
        self.model = network.model
 | 
			
		||||
        self.target_model = deepcopy(network.model)
 | 
			
		||||
        if network.device is not None:
 | 
			
		||||
            self.target_model = self.target_model.to(network.device)
 | 
			
		||||
 | 
			
		||||
    def __call__(self, *args):
 | 
			
		||||
        return self.model(*args)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue