Made sure everything went to their appropriate devices
This commit is contained in:
parent
39643f04e1
commit
9cd3625fd3
5 changed files with 14 additions and 9 deletions
|
@ -90,11 +90,11 @@ logwriter = rltorch.log.LogWriter(logger, SummaryWriter())
|
||||||
# Setting up the networks
|
# Setting up the networks
|
||||||
device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
|
device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
|
||||||
net = rn.Network(Value(state_size, action_size),
|
net = rn.Network(Value(state_size, action_size),
|
||||||
torch.optim.Adam, config, logger = logger, name = "DQN")
|
torch.optim.Adam, config, device = device, logger = logger, name = "DQN")
|
||||||
target_net = rn.TargetNetwork(net)
|
target_net = rn.TargetNetwork(net, device = device)
|
||||||
|
|
||||||
# Actor takes a net and uses it to produce actions from given states
|
# Actor takes a net and uses it to produce actions from given states
|
||||||
actor = ArgMaxSelector(net, action_size)
|
actor = ArgMaxSelector(net, action_size, device = device)
|
||||||
# Memory stores experiences for later training
|
# Memory stores experiences for later training
|
||||||
memory = M.ReplayMemory(capacity = config['memory_size'])
|
memory = M.ReplayMemory(capacity = config['memory_size'])
|
||||||
|
|
||||||
|
|
|
@ -107,11 +107,11 @@ logwriter = rltorch.log.LogWriter(logger, SummaryWriter())
|
||||||
# Setting up the networks
|
# Setting up the networks
|
||||||
device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
|
device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
|
||||||
net = rn.Network(Value(state_size, action_size),
|
net = rn.Network(Value(state_size, action_size),
|
||||||
torch.optim.Adam, config, logger = logger, name = "DQN")
|
torch.optim.Adam, config, device = device, logger = logger, name = "DQN")
|
||||||
target_net = rn.TargetNetwork(net)
|
target_net = rn.TargetNetwork(net, device = device)
|
||||||
|
|
||||||
# Actor takes a network and uses it to produce actions from given states
|
# Actor takes a network and uses it to produce actions from given states
|
||||||
actor = ArgMaxSelector(net, action_size)
|
actor = ArgMaxSelector(net, action_size, device = device)
|
||||||
# Memory stores experiences for later training
|
# Memory stores experiences for later training
|
||||||
memory = M.ReplayMemory(capacity = config['memory_size'])
|
memory = M.ReplayMemory(capacity = config['memory_size'])
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ class ArgMaxSelector:
|
||||||
def best_act(self, state):
|
def best_act(self, state):
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
if self.device is not None:
|
if self.device is not None:
|
||||||
self.device.to(self.device)
|
state = state.to(self.device)
|
||||||
action_values = self.model(state).squeeze(0)
|
action_values = self.model(state).squeeze(0)
|
||||||
action = self.random_act() if (action_values[0] == action_values).all() else action_values.argmax().item()
|
action = self.random_act() if (action_values[0] == action_values).all() else action_values.argmax().item()
|
||||||
return action
|
return action
|
||||||
|
|
|
@ -2,11 +2,14 @@ class Network:
|
||||||
"""
|
"""
|
||||||
Wrapper around model which provides copy of it instead of trained weights
|
Wrapper around model which provides copy of it instead of trained weights
|
||||||
"""
|
"""
|
||||||
def __init__(self, model, optimizer, config, logger = None, name = ""):
|
def __init__(self, model, optimizer, config, device = None, logger = None, name = ""):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.optimizer = optimizer(model.parameters(), lr = config['learning_rate'], weight_decay = config['weight_decay'])
|
self.optimizer = optimizer(model.parameters(), lr = config['learning_rate'], weight_decay = config['weight_decay'])
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.name = name
|
self.name = name
|
||||||
|
self.device = device
|
||||||
|
if self.device is not None:
|
||||||
|
self.model = self.model.to(device)
|
||||||
|
|
||||||
def __call__(self, *args):
|
def __call__(self, *args):
|
||||||
return self.model(*args)
|
return self.model(*args)
|
||||||
|
|
|
@ -4,9 +4,11 @@ class TargetNetwork:
|
||||||
"""
|
"""
|
||||||
Wrapper around model which provides copy of it instead of trained weights
|
Wrapper around model which provides copy of it instead of trained weights
|
||||||
"""
|
"""
|
||||||
def __init__(self, network):
|
def __init__(self, network, device = None):
|
||||||
self.model = network.model
|
self.model = network.model
|
||||||
self.target_model = deepcopy(network.model)
|
self.target_model = deepcopy(network.model)
|
||||||
|
if network.device is not None:
|
||||||
|
self.target_model = self.target_model.to(network.device)
|
||||||
|
|
||||||
def __call__(self, *args):
|
def __call__(self, *args):
|
||||||
return self.model(*args)
|
return self.model(*args)
|
||||||
|
|
Loading…
Add table
Reference in a new issue