diff --git a/.idea/.gitignore b/.idea/.gitignore index e7e9d11d4bf243bffe4bb60b4ac1f9392297f4bf..d6fd6f21b4f8d95e972cf75f62bebcdc4537a139 100644 --- a/.idea/.gitignore +++ b/.idea/.gitignore @@ -1,2 +1,2 @@ -# Default ignored files -/workspace.xml +# Default ignored files +/workspace.xml diff --git a/.idea/misc.xml b/.idea/misc.xml index 65531ca992813bbfedbe43dfae5a5f4337168ed8..5417b684e7619b0b44d2f9d8be364ac8fb576783 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" /> +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" /> </project> \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml index 925278cb163e3cc6c725cda433b8df8b625c3f0b..c9b6e2db97b91b3f14bb3798b10f26f334334bb6 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -1,8 +1,8 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectModuleManager"> - <modules> - <module fileurl="file://$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" filepath="$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" /> - </modules> - </component> +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectModuleManager"> + <modules> + <module fileurl="file://$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" filepath="$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" /> + </modules> + </component> </project> \ No newline at end of file diff --git a/.idea/neurips2020-flatland-starter-kit.iml b/.idea/neurips2020-flatland-starter-kit.iml index 8dc09e5476bcb840206461450ae44f23421d964a..951c9286734f053453803b04b5335bb575715344 100644 --- a/.idea/neurips2020-flatland-starter-kit.iml +++ b/.idea/neurips2020-flatland-starter-kit.iml @@ -1,11 +1,11 @@ -<?xml version="1.0" encoding="UTF-8"?> -<module type="PYTHON_MODULE" version="4"> - <component name="NewModuleRootManager"> - <content url="file://$MODULE_DIR$" /> - <orderEntry type="inheritedJdk" /> - <orderEntry type="sourceFolder" forTests="false" /> - </component> - <component name="TestRunnerService"> - <option name="PROJECT_TEST_RUNNER" value="pytest" /> - </component> +<?xml version="1.0" encoding="UTF-8"?> +<module type="PYTHON_MODULE" version="4"> + <component name="NewModuleRootManager"> + <content url="file://$MODULE_DIR$" /> + <orderEntry type="inheritedJdk" /> + <orderEntry type="sourceFolder" forTests="false" /> + </component> + <component name="TestRunnerService"> + <option name="PROJECT_TEST_RUNNER" value="pytest" /> + </component> </module> \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 94a25f7f4cb416c083d265558da75d457237d671..9661ac713428efbad557d3ba3a62216b5bb7d226 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -1,6 +1,6 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="VcsDirectoryMappings"> - <mapping directory="$PROJECT_DIR$" vcs="Git" /> - </component> +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="VcsDirectoryMappings"> + <mapping directory="$PROJECT_DIR$" vcs="Git" /> + </component> </project> \ No newline at end of file diff --git a/README.md b/README.md index 22846ae8c532fa537e79ca807fe71860100f1f23..8232bb1e569f62e96028faf3bc31adc12c9c201e 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,28 @@  + +# Round 1 - 3rd best RL solution + +## Used agent +* [PPO Agent -> Mitchell Goff](https://github.com/mitchellgoffpc/flatland-training) + +## LICENCE for the Observation EXTRA.py + +The observation can be used freely and reused for further submissions. Only the author needs to be referred to +/mentioned in any submissions - if the entire observation or parts, or the main idea is used. + +Author: Adrian Egli (adrian.egli@gmail.com) + +[Linkedin](https://www.researchgate.net/profile/Adrian_Egli2) +[Researchgate](https://www.linkedin.com/in/adrian-egli-733a9544/) + + + + Main links --- - +* [Submit in 10 minutes](https://flatland.aicrowd.com/getting-started/first-submission.html?_ga=2.175036450.1456714032.1596434204-43124944.1552486604) * [Flatland documentation](https://flatland.aicrowd.com/) * [NeurIPS 2020 Challenge](https://www.aicrowd.com/challenges/neurips-2020-flatland-challenge/) diff --git a/checkpoints/ppo/model_checkpoint.meta b/checkpoints/ppo/model_checkpoint.meta index e998bcda155cbd11e6f1b70e77966ed92812930c..31959a4680ed59613594bfb9418b1e3497ad6ce1 100644 Binary files a/checkpoints/ppo/model_checkpoint.meta and b/checkpoints/ppo/model_checkpoint.meta differ diff --git a/checkpoints/ppo/model_checkpoint.optimizer b/checkpoints/ppo/model_checkpoint.optimizer index 0fbd49e2e1fa62c34dff663c9d47bd53f61128f3..89630787199743db98bebfd8f3132c681a60a099 100644 Binary files a/checkpoints/ppo/model_checkpoint.optimizer and b/checkpoints/ppo/model_checkpoint.optimizer differ diff --git a/checkpoints/ppo/model_checkpoint.policy b/checkpoints/ppo/model_checkpoint.policy index 4868691cf2f12669df395b845b2a903c1d917336..6049b699289690113a6eed1bca3545cd12bf4c4e 100644 Binary files a/checkpoints/ppo/model_checkpoint.policy and b/checkpoints/ppo/model_checkpoint.policy differ diff --git a/dump.rdb b/dump.rdb index ba7e97e6ff27c20f75b47463e01777453f724a57..bcb60c2ec208cac6ff4ea41cc5bd2d73a8e3e945 100644 Binary files a/dump.rdb and b/dump.rdb differ diff --git a/run.py b/run.py index 27c3107896fde363315d78eadd778ffe2ac99f7e..08f32e4b1c10916b44ed0a5363ea3d8470ffa5f0 100644 --- a/run.py +++ b/run.py @@ -30,7 +30,7 @@ def my_controller(extra: Extra, observation, info): # the example here : # https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/envs/observations.py#L14 ##################################################################### -my_observation_builder = Extra(max_depth=2) +my_observation_builder = Extra(max_depth=20) # Or if you want to use your own approach to build the observation from the env_step, # please feel free to pass a DummyObservationBuilder() object as mentioned below, @@ -112,6 +112,8 @@ while True: print("w : ", extra.env.width) print("h : ", extra.env.height) + old_total_done = 0 + old_total_active = 0 while True: ##################################################################### # Evaluation of a single episode @@ -141,7 +143,11 @@ while True: x = (local_env.agents[a].status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED]) total_done += int(x) total_active += int(local_env.agents[a].status == RailAgentStatus.ACTIVE) - # print("total_done:", total_done, "\ttotal_active", total_active, "\t num agents", local_env.get_num_agents()) + if old_total_done != total_done or old_total_active != total_active: + print("total_done:", total_done, "\ttotal_active", total_active, "\t num agents", + local_env.get_num_agents()) + old_total_done = total_done + old_total_active = total_active if done['__all__']: print("Reward : ", sum(list(all_rewards.values()))) diff --git a/src/extra.py b/src/extra.py index e5a75c8d6eb82997aff52f7b14263e78edc39693..0d948bd4796888fdbcbe361d14379f5db31b3b41 100644 --- a/src/extra.py +++ b/src/extra.py @@ -19,7 +19,7 @@ # # Private submission # http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/8 - +# import numpy as np from flatland.core.env_observation_builder import ObservationBuilder from flatland.core.grid.grid4_utils import get_new_position @@ -66,9 +66,7 @@ def fast_count_nonzero(possible_transitions: (int, int, int, int)): class Extra(ObservationBuilder): def __init__(self, max_depth): - self.max_depth = max_depthmodel_checkpoint.meta -model_checkpoint.optimizer -model_checkpoint.policy + self.max_depth = max_depth self.observation_dim = 22 self.agent = None @@ -190,6 +188,9 @@ model_checkpoint.policy return obsData def is_collision(self, obsData): + return False + + def intern_is_collision(self, obsData): if np.sum(obsData[10:14]) == 0: return False if np.sum(obsData[10:14]) == np.sum(obsData[14:18]): @@ -244,15 +245,20 @@ model_checkpoint.policy return has_opp_agent, has_same_agent, visited if agents_on_switch: + pt = 0 for dir_loop in range(4): if possible_transitions[dir_loop] == 1: + pt += 1 hoa, hsa, v = self._explore(handle, get_new_position(new_position, dir_loop), dir_loop, depth + 1) visited.append(v) - has_opp_agent = 0.5 * (has_opp_agent + hoa) - has_same_agent = 0.5 * (has_same_agent + hsa) + has_opp_agent += hoa + has_same_agent + hsa + if pt > 0: + has_opp_agent /= pt + has_same_agent /= pt return has_opp_agent, has_same_agent, visited else: new_direction = fast_argmax(possible_transitions) @@ -267,7 +273,7 @@ model_checkpoint.policy # observation[3] : 1 path towards target (direction 3) / otherwise 0 -> path is longer or there is no path # observation[4] : int(agent.status == RailAgentStatus.READY_TO_DEPART) # observation[5] : int(agent.status == RailAgentStatus.ACTIVE) - # observation[6] : int(agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED) + # observation[6] : deadlock estimated (collision) 1 otherwise 0 # observation[7] : current agent is located at a switch, where it can take a routing decision # observation[8] : current agent is located at a cell, where it has to take a stop-or-go decision # observation[9] : current agent is located one step before/after a switch @@ -284,7 +290,7 @@ model_checkpoint.policy # observation[20] : If there is a path with step (direction 2) and there is a agent with same direction -> 1 # observation[21] : If there is a path with step (direction 3) and there is a agent with same direction -> 1 - observation = np.zeros(self.observation_dim) + observation = np.zeros(self.observation_dim) - 1 visited = [] agent = self.env.agents[handle] @@ -296,7 +302,6 @@ model_checkpoint.policy agent_virtual_position = agent.position observation[5] = 1 else: - observation[6] = 1 agent_virtual_position = (-1, -1) agent_done = True @@ -335,6 +340,8 @@ model_checkpoint.policy observation[8] = int(agents_near_to_switch) observation[9] = int(agents_near_to_switch_all) + observation[6] = int(self.intern_is_collision(observation)) + self.env.dev_obs_dict.update({handle: visited}) return observation @@ -351,6 +358,26 @@ model_checkpoint.policy return action_dict + def rl_agent_act_X(self, observation, info, eps=0.0): + self.loadAgent() + action_dict = {} + active_cnt = 0 + for a in range(self.env.get_num_agents()): + if active_cnt < 1 or self.env.agents[a].status == RailAgentStatus.ACTIVE: + if observation[a][6] == 0: + active_cnt += int(self.env.agents[a].status < RailAgentStatus.DONE) + if info['action_required'][a]: + action_dict[a] = self.agent.act(observation[a], eps=eps) + # action_dict[a] = np.random.randint(5) + else: + action_dict[a] = RailEnvActions.MOVE_FORWARD + else: + action_dict[a] = RailEnvActions.STOP_MOVING + else: + action_dict[a] = RailEnvActions.STOP_MOVING + + return action_dict + def loadAgent(self): if self.agent is not None: return @@ -359,4 +386,4 @@ model_checkpoint.policy print("action_size: ", self.action_size) print("state_size: ", self.state_size) self.agent = Agent(self.state_size, self.action_size, 0) - self.agent.load('./checkpoints/', 0, 1.0) \ No newline at end of file + self.agent.load('./checkpoints/', 0, 1.0)