diff --git a/torch_training/bla.py b/torch_training/bla.py deleted file mode 100644 index 584e50287ab6d59119b936cce7266f05c586c613..0000000000000000000000000000000000000000 --- a/torch_training/bla.py +++ /dev/null @@ -1,229 +0,0 @@ -import getopt -import random -import sys -from collections import deque - -import matplotlib.pyplot as plt -import numpy as np -import torch -from importlib_resources import path - -import torch_training.Nets -from flatland.envs.generators import complex_rail_generator -from flatland.envs.observations import TreeObsForRailEnv -from flatland.envs.predictions import ShortestPathPredictorForRailEnv -from flatland.envs.rail_env import RailEnv -from flatland.utils.rendertools import RenderTool -from torch_training.dueling_double_dqn import Agent -from utils.observation_utils import norm_obs_clip, split_tree - -print("multi_agent_trainging.py (1)") - -def main(argv): - try: - opts, args = getopt.getopt(argv, "n:", ["n_trials="]) - except getopt.GetoptError: - print('training_navigation.py -n <n_trials>') - sys.exit(2) - for opt, arg in opts: - if opt in ('-n', '--n_trials'): - n_trials = int(arg) - print("main1") - random.seed(1) - np.random.seed(1) - - """ - env = RailEnv(width=10, - height=20, obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv())) - env.load("./railway/complex_scene.pkl") - file_load = True - """ - - x_dim = np.random.randint(8, 20) - y_dim = np.random.randint(8, 20) - n_agents = np.random.randint(3, 8) - n_goals = n_agents + np.random.randint(0, 3) - min_dist = int(0.75 * min(x_dim, y_dim)) - print("main2") - - env = RailEnv(width=x_dim, - height=y_dim, - rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist, - max_dist=99999, - seed=0), - obs_builder_object=TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()), - number_of_agents=n_agents) - env.reset(True, True) - file_load = False - observation_helper = TreeObsForRailEnv(max_depth=3, predictor=ShortestPathPredictorForRailEnv()) - env_renderer = RenderTool(env, gl="PILSVG", ) - handle = env.get_agent_handles() - features_per_node = 9 - state_size = features_per_node * 85 * 2 - action_size = 5 - - print("main3") - - # We set the number of episodes we would like to train on - if 'n_trials' not in locals(): - n_trials = 30000 - max_steps = int(3 * (env.height + env.width)) - eps = 1. - eps_end = 0.005 - eps_decay = 0.9995 - action_dict = dict() - final_action_dict = dict() - scores_window = deque(maxlen=100) - done_window = deque(maxlen=100) - time_obs = deque(maxlen=2) - scores = [] - dones_list = [] - action_prob = [0] * action_size - agent_obs = [None] * env.get_num_agents() - agent_next_obs = [None] * env.get_num_agents() - agent = Agent(state_size, action_size, "FC", 0) - with path(torch_training.Nets, "avoid_checkpoint30000.pth") as file_in: - agent.qnetwork_local.load_state_dict(torch.load(file_in)) - - demo = False - record_images = False - frame_step = 0 - - print("Going to run training for {} trials...".format(n_trials)) - for trials in range(1, n_trials + 1): - - if trials % 50 == 0 and not demo: - x_dim = np.random.randint(8, 20) - y_dim = np.random.randint(8, 20) - n_agents = np.random.randint(3, 8) - n_goals = n_agents + np.random.randint(0, 3) - min_dist = int(0.75 * min(x_dim, y_dim)) - env = RailEnv(width=x_dim, - height=y_dim, - rail_generator=complex_rail_generator(nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist, - max_dist=99999, - seed=0), - obs_builder_object=TreeObsForRailEnv(max_depth=3, - predictor=ShortestPathPredictorForRailEnv()), - number_of_agents=n_agents) - env.reset(True, True) - max_steps = int(3 * (env.height + env.width)) - agent_obs = [None] * env.get_num_agents() - agent_next_obs = [None] * env.get_num_agents() - # Reset environment - if file_load: - obs = env.reset(False, False) - else: - obs = env.reset(True, True) - if demo: - env_renderer.set_new_rail() - obs_original = obs.copy() - final_obs = obs.copy() - final_obs_next = obs.copy() - for a in range(env.get_num_agents()): - data, distance, agent_data = split_tree(tree=np.array(obs[a]), - current_depth=0) - data = norm_obs_clip(data) - distance = norm_obs_clip(distance) - agent_data = np.clip(agent_data, -1, 1) - obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) - agent_data = env.agents[a] - speed = 1 # np.random.randint(1,5) - agent_data.speed_data['speed'] = 1. / speed - - for i in range(2): - time_obs.append(obs) - # env.obs_builder.util_print_obs_subtree(tree=obs[0], num_elements_per_node=5) - for a in range(env.get_num_agents()): - agent_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) - - score = 0 - env_done = 0 - # Run episode - for step in range(max_steps): - if demo: - env_renderer.renderEnv(show=True, show_observations=False) - # observation_helper.util_print_obs_subtree(obs_original[0]) - if record_images: - env_renderer.gl.saveImage("./Images/flatland_frame_{:04d}.bmp".format(frame_step)) - frame_step += 1 - # print(step) - # Action - for a in range(env.get_num_agents()): - if demo: - eps = 0 - # action = agent.act(np.array(obs[a]), eps=eps) - print("before act") - #action = agent.act(agent_obs[a], eps=eps) - action = 0 - print("after act") - action_prob[action] += 1 - action_dict.update({a: action}) - # Environment step - - next_obs, all_rewards, done, _ = env.step(action_dict) - # print(all_rewards,action) - obs_original = next_obs.copy() - for a in range(env.get_num_agents()): - a = 5 - # data, distance, agent_data = split_tree(tree=np.array(next_obs[a]), - # current_depth=0) - # data = norm_obs_clip(data) - # distance = norm_obs_clip(distance) - # agent_data = np.clip(agent_data, -1, 1) - # next_obs[a] = np.concatenate((np.concatenate((data, distance)), agent_data)) - time_obs.append(next_obs) - # - # # Update replay buffer and train agent - # for a in range(env.get_num_agents()): - # agent_next_obs[a] = np.concatenate((time_obs[0][a], time_obs[1][a])) - # if done[a]: - # final_obs[a] = agent_obs[a].copy() - # final_obs_next[a] = agent_next_obs[a].copy() - # final_action_dict.update({a: action_dict[a]}) - # if not demo and not done[a]: - # agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a]) - # score += all_rewards[a] / env.get_num_agents() - # - # agent_obs = agent_next_obs.copy() - # if done['__all__']: - # env_done = 1 - # for a in range(env.get_num_agents()): - # agent.step(final_obs[a], final_action_dict[a], all_rewards[a], final_obs_next[a], done[a]) - # break - # # Epsilon decay - # eps = max(eps_end, eps_decay * eps) # decrease epsilon - # - # done_window.append(env_done) - # scores_window.append(score / max_steps) # save most recent score - # scores.append(np.mean(scores_window)) - # dones_list.append((np.mean(done_window))) - - print( - '\rTraining {} Agents on ({},{}).\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( - env.get_num_agents(), x_dim, y_dim, - trials, - np.mean(scores_window), - 100 * np.mean(done_window), - eps, action_prob / np.sum(action_prob)), end=" ") - - if trials % 100 == 0: - print( - '\rTraining {} Agents.\t Episode {}\t Average Score: {:.3f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( - env.get_num_agents(), - trials, - np.mean(scores_window), - 100 * np.mean(done_window), - eps, - action_prob / np.sum(action_prob))) - torch.save(agent.qnetwork_local.state_dict(), - './Nets/avoid_checkpoint' + str(trials) + '.pth') - action_prob = [1] * action_size - -print("multi_agent_trainging.py (2)") - -if __name__ == '__main__': - print("main") - main(sys.argv[1:]) - -print("multi_agent_trainging.py (3)") \ No newline at end of file diff --git a/torch_training/multi_agent_training.py b/torch_training/multi_agent_training.py index 4f823be331850668e18bfdf66d35a915e3f6ccdc..280841c9e918896133290f625e69fc948fa0b52a 100644 --- a/torch_training/multi_agent_training.py +++ b/torch_training/multi_agent_training.py @@ -1,10 +1,10 @@ -import getopt -import random import sys from collections import deque +import getopt import matplotlib.pyplot as plt import numpy as np +import random import torch from importlib_resources import path @@ -17,8 +17,6 @@ from flatland.utils.rendertools import RenderTool from torch_training.dueling_double_dqn import Agent from utils.observation_utils import norm_obs_clip, split_tree -print("multi_agent_trainging.py (1)") - def main(argv): try: @@ -29,7 +27,6 @@ def main(argv): for opt, arg in opts: if opt in ('-n', '--n_trials'): n_trials = int(arg) - print("main1") random.seed(1) np.random.seed(1) @@ -64,8 +61,6 @@ def main(argv): state_size = features_per_node * 85 * 2 action_size = 5 - print("main3") - # We set the number of episodes we would like to train on if 'n_trials' not in locals(): n_trials = 30000 @@ -91,7 +86,6 @@ def main(argv): record_images = False frame_step = 0 - print("Going to run training for {} trials...".format(n_trials)) for trials in range(1, n_trials + 1): if trials % 50 == 0 and not demo: @@ -221,10 +215,5 @@ def main(argv): plt.show() -print("multi_agent_trainging.py (2)") - if __name__ == '__main__': - print("main") main(sys.argv[1:]) - -print("multi_agent_trainging.py (3)") diff --git a/tox.ini b/tox.ini index 36b7c10a081b629375b202ad1e642e7366db2abe..da528553c74be43bc54ce7858bb057e117cee11a 100644 --- a/tox.ini +++ b/tox.ini @@ -22,8 +22,7 @@ passenv = deps = -r{toxinidir}/requirements_torch_training.txt commands = - python -m pip install -r requirements_torch_training.txt - python torch_training/bla.py --n_trials=10 + python torch_training/multi_agent_training.py --n_trials=10 [flake8] max-line-length = 120