diff --git a/score_test.py b/score_test.py index ff4a94c5e1b82c90eec0c5bf129bad496046e595..846c39994cb839452176dc48f56384e9b37f26ef 100644 --- a/score_test.py +++ b/score_test.py @@ -22,7 +22,7 @@ test_times = [] test_dones = [] # Load agent # agent = Agent(state_size, action_size, "FC", 0) -# agent.qnetwork_local.load_state_dict(torch.load('./torch_training/Nets/avoid_checkpoint1700.pth')) +# agent.qnetwork_local.load_state_dict(torch.load('./torch_training/Nets/avoid_checkpoint500.pth')) agent = RandomAgent(state_size, action_size) start_time_scoring = time.time() test_idx = 0 diff --git a/scoring/score_test.py b/scoring/score_test.py index fa65f4c15f52a9a963a7f22d428b6c9258eadf64..4baee4a176e80a3c947cbd9402a78b76c735f839 100644 --- a/scoring/score_test.py +++ b/scoring/score_test.py @@ -28,8 +28,8 @@ test_dones = [] sequential_agent_test = False # Load your agent -agent = Agent(state_size, action_size, 0) -agent.qnetwork_local.load_state_dict(torch.load('../torch_training/Nets/avoid_checkpoint60000.pth')) +agent = Agent(state_size, action_size) +agent.qnetwork_local.load_state_dict(torch.load('../torch_training/Nets/avoid_checkpoint500.pth')) # Load the necessary Observation Builder and Predictor predictor = ShortestPathPredictorForRailEnv() diff --git a/torch_training/Nets/avoid_checkpoint15000.pth b/torch_training/Nets/avoid_checkpoint15000.pth deleted file mode 100644 index e1daf228b7f1f6b108329715c3cdbd67805e28ae..0000000000000000000000000000000000000000 Binary files a/torch_training/Nets/avoid_checkpoint15000.pth and /dev/null differ diff --git a/torch_training/Nets/avoid_checkpoint30000.pth b/torch_training/Nets/avoid_checkpoint30000.pth deleted file mode 100644 index 0e2c1b28c1655bc16c9339066b8d105282f14418..0000000000000000000000000000000000000000 Binary files a/torch_training/Nets/avoid_checkpoint30000.pth and /dev/null differ diff --git a/torch_training/Nets/avoid_checkpoint60000.pth b/torch_training/Nets/avoid_checkpoint60000.pth deleted file mode 100644 index b4fef60542f50419353047721fae31f5382e7bd4..0000000000000000000000000000000000000000 Binary files a/torch_training/Nets/avoid_checkpoint60000.pth and /dev/null differ diff --git a/torch_training/dueling_double_dqn.py b/torch_training/dueling_double_dqn.py index 5dfa5c18f16e61af56bc0b49b8ad8eb0aa228615..b7bb4bcdc7c72fa09b352fdf5cf99258f8f9ad0c 100644 --- a/torch_training/dueling_double_dqn.py +++ b/torch_training/dueling_double_dqn.py @@ -16,38 +16,33 @@ GAMMA = 0.99 # discount factor 0.99 TAU = 1e-3 # for soft update of target parameters LR = 0.5e-4 # learning rate 0.5e-4 works UPDATE_EVERY = 10 # how often to update the network -double_dqn = True # If using double dqn algorithm -input_channels = 5 # Number of Input channels device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -#device = torch.device("cpu") print(device) class Agent: """Interacts with and learns from the environment.""" - def __init__(self, state_size, action_size, seed, double_dqn=True, input_channels=5): + def __init__(self, state_size, action_size, double_dqn=True): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action - seed (int): random seed """ self.state_size = state_size self.action_size = action_size - self.seed = random.seed(seed) self.double_dqn = double_dqn # Q-Network - self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) + self.qnetwork_local = QNetwork(state_size, action_size).to(device) self.qnetwork_target = copy.deepcopy(self.qnetwork_local) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory - self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) + self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0 @@ -147,7 +142,7 @@ class Agent: class ReplayBuffer: """Fixed-size buffer to store experience tuples.""" - def __init__(self, action_size, buffer_size, batch_size, seed): + def __init__(self, action_size, buffer_size, batch_size): """Initialize a ReplayBuffer object. Params @@ -155,13 +150,11 @@ class ReplayBuffer: action_size (int): dimension of each action buffer_size (int): maximum size of buffer batch_size (int): size of each training batch - seed (int): random seed """ self.action_size = action_size self.memory = deque(maxlen=buffer_size) self.batch_size = batch_size self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"]) - self.seed = random.seed(seed) def add(self, state, action, reward, next_state, done): """Add a new experience to memory.""" @@ -183,7 +176,7 @@ class ReplayBuffer: dones = torch.from_numpy(self.__v_stack_impr([e.done for e in experiences if e is not None]).astype(np.uint8)) \ .float().to(device) - return (states, actions, rewards, next_states, dones) + return states, actions, rewards, next_states, dones def __len__(self): """Return the current size of internal memory.""" diff --git a/torch_training/model.py b/torch_training/model.py index f7bc3d58aff541670fc239cbff6bc076cf18f945..9a5afccfda50e63271b1f5d8ed5a2d74b5e169e7 100644 --- a/torch_training/model.py +++ b/torch_training/model.py @@ -3,7 +3,7 @@ import torch.nn.functional as F class QNetwork(nn.Module): - def __init__(self, state_size, action_size, seed, hidsize1=128, hidsize2=128): + def __init__(self, state_size, action_size, hidsize1=128, hidsize2=128): super(QNetwork, self).__init__() self.fc1_val = nn.Linear(state_size, hidsize1) diff --git a/torch_training/multi_agent_inference.py b/torch_training/multi_agent_inference.py index 96c8ee74211987803ad1308d9f458ee5dda0d335..580886b1db73ba34d539e14968deea384b5b98be 100644 --- a/torch_training/multi_agent_inference.py +++ b/torch_training/multi_agent_inference.py @@ -87,8 +87,8 @@ dones_list = [] action_prob = [0] * action_size agent_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents() -agent = Agent(state_size, action_size, 0) -with path(torch_training.Nets, "avoid_checkpoint100.pth") as file_in: +agent = Agent(state_size, action_size) +with path(torch_training.Nets, "avoid_checkpoint500.pth") as file_in: agent.qnetwork_local.load_state_dict(torch.load(file_in)) record_images = False diff --git a/torch_training/multi_agent_training.py b/torch_training/multi_agent_training.py index 67474600de423051959da4110f61a145b4226c9e..fe9e27969f129313c19f54fd36738188ba376082 100644 --- a/torch_training/multi_agent_training.py +++ b/torch_training/multi_agent_training.py @@ -121,11 +121,11 @@ def main(argv): observation_radius = 10 # Initialize the agent - agent = Agent(state_size, action_size, 0) + agent = Agent(state_size, action_size) # Here you can pre-load an agent if False: - with path(torch_training.Nets, "avoid_checkpoint2400.pth") as file_in: + with path(torch_training.Nets, "avoid_checkpoint500.pth") as file_in: agent.qnetwork_local.load_state_dict(torch.load(file_in)) # Do training over n_episodes diff --git a/torch_training/multi_agent_two_time_step_training.py b/torch_training/multi_agent_two_time_step_training.py index 2687cd5999a86f3708bb83fabf6b75779c29862f..57f4a619d3bd3ae797fa7fe9aff7c799064bc6f6 100644 --- a/torch_training/multi_agent_two_time_step_training.py +++ b/torch_training/multi_agent_two_time_step_training.py @@ -7,16 +7,16 @@ from collections import deque import matplotlib.pyplot as plt import numpy as np import torch -from importlib_resources import path - -# Import Torch and utility functions to normalize observation -import torch_training.Nets from flatland.envs.observations import TreeObsForRailEnv from flatland.envs.predictions import ShortestPathPredictorForRailEnv from flatland.envs.rail_env import RailEnv from flatland.envs.rail_generators import complex_rail_generator # Import Flatland/ Observations and Predictors from flatland.envs.schedule_generators import complex_schedule_generator +from importlib_resources import path + +# Import Torch and utility functions to normalize observation +import torch_training.Nets from torch_training.dueling_double_dqn import Agent from utils.observation_utils import norm_obs_clip, split_tree @@ -41,7 +41,7 @@ def main(argv): n_agents = np.random.randint(3, 8) n_goals = n_agents + np.random.randint(0, 3) min_dist = int(0.75 * min(x_dim, y_dim)) - tree_depth = 3 + tree_depth = 2 print("main2") demo = False @@ -60,7 +60,6 @@ def main(argv): handle = env.get_agent_handles() features_per_node = env.obs_builder.observation_dim - tree_depth = 2 nr_nodes = 0 for i in range(tree_depth + 1): nr_nodes += np.power(4, i) @@ -87,11 +86,11 @@ def main(argv): agent_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents() # Initialize the agent - agent = Agent(state_size, action_size, 0) + agent = Agent(state_size, action_size) # Here you can pre-load an agent if False: - with path(torch_training.Nets, "avoid_checkpoint30000.pth") as file_in: + with path(torch_training.Nets, "avoid_checkpoint500.pth") as file_in: agent.qnetwork_local.load_state_dict(torch.load(file_in)) # Do training over n_episodes @@ -132,7 +131,7 @@ def main(argv): # Build agent specific observations for a in range(env.get_num_agents()): data, distance, agent_data = split_tree(tree=np.array(obs[a]), - num_features_per_node=11, + num_features_per_node=features_per_node, current_depth=0) data = norm_obs_clip(data) distance = norm_obs_clip(distance) @@ -165,6 +164,7 @@ def main(argv): next_obs, all_rewards, done, _ = env.step(action_dict) for a in range(env.get_num_agents()): data, distance, agent_data = split_tree(tree=np.array(next_obs[a]), + num_features_per_node=features_per_node, current_depth=0) data = norm_obs_clip(data) distance = norm_obs_clip(distance) diff --git a/torch_training/render_agent_behavior.py b/torch_training/render_agent_behavior.py index 4befcd0d2b156f2bf7ff2a2d010ee83b4e29b4b6..2649a2367367e17e39328ca8c28cc9c2f1fc0172 100644 --- a/torch_training/render_agent_behavior.py +++ b/torch_training/render_agent_behavior.py @@ -101,8 +101,8 @@ dones_list = [] action_prob = [0] * action_size agent_obs = [None] * env.get_num_agents() agent_next_obs = [None] * env.get_num_agents() -agent = Agent(state_size, action_size, 0) -with path(torch_training.Nets, "navigator_checkpoint10700.pth") as file_in: +agent = Agent(state_size, action_size) +with path(torch_training.Nets, "avoid_checkpoint500.pth") as file_in: agent.qnetwork_local.load_state_dict(torch.load(file_in)) record_images = False diff --git a/torch_training/training_navigation.py b/torch_training/training_navigation.py index 80e4767fd1e46365c8860dc2d2acd22052c8bc54..bd221ae4b912b89c1d5bb242676d4f75819cfd90 100644 --- a/torch_training/training_navigation.py +++ b/torch_training/training_navigation.py @@ -117,7 +117,7 @@ def main(argv): cummulated_reward = np.zeros(env.get_num_agents()) # Now we load a Double dueling DQN agent - agent = Agent(state_size, action_size, "FC", 0) + agent = Agent(state_size, action_size) for trials in range(1, n_trials + 1):