.

bbc168ab · Adrian Egli · 8b162e9c · bbc168ab · bbc168ab · bbc168ab
Commit bbc168ab authored 4 years ago by Adrian Egli
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
-# Default ignored files
-/workspace.xml
+# Default ignored files
+/workspace.xml
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" filepath="$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" />
-    </modules>
-  </component>
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" filepath="$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" />
+    </modules>
+  </component>
 </project>
\ No newline at end of file
--- a/.idea/neurips2020-flatland-starter-kit.iml
+++ b/.idea/neurips2020-flatland-starter-kit.iml
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="TestRunnerService">
-    <option name="PROJECT_TEST_RUNNER" value="pytest" />
-  </component>
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="pytest" />
+  </component>
 </module>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
-  </component>
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
 </project>
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -7,9 +7,28 @@

 ![flatland](https://i.imgur.com/0rnbSLY.gif)

+
+# Round 1 - 3rd best RL solution 
+
+## Used agent 
+* [PPO Agent -> Mitchell Goff](https://github.com/mitchellgoffpc/flatland-training)
+
+## LICENCE for the Observation EXTRA.py  
+
+The observation can be used freely and reused for further submissions. Only the author needs to be referred to
+/mentioned in any submissions - if the entire observation or parts, or the main idea is used.
+
+Author: Adrian Egli (adrian.egli@gmail.com)
+
+[Linkedin](https://www.researchgate.net/profile/Adrian_Egli2)
+[Researchgate](https://www.linkedin.com/in/adrian-egli-733a9544/)
+
+
+
+
 Main links
 ---
-
+* [Submit in 10 minutes](https://flatland.aicrowd.com/getting-started/first-submission.html?_ga=2.175036450.1456714032.1596434204-43124944.1552486604)
 * [Flatland documentation](https://flatland.aicrowd.com/)
 * [NeurIPS 2020 Challenge](https://www.aicrowd.com/challenges/neurips-2020-flatland-challenge/)


--- a/checkpoints/ppo/model_checkpoint.meta
+++ b/checkpoints/ppo/model_checkpoint.meta
--- a/checkpoints/ppo/model_checkpoint.optimizer
+++ b/checkpoints/ppo/model_checkpoint.optimizer
--- a/checkpoints/ppo/model_checkpoint.policy
+++ b/checkpoints/ppo/model_checkpoint.policy
--- a/dump.rdb
+++ b/dump.rdb
--- a/run.py
+++ b/run.py
@@ -30,7 +30,7 @@ def my_controller(extra: Extra, observation, info):
 # the example here : 
 # https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/envs/observations.py#L14
 #####################################################################
-my_observation_builder = Extra(max_depth=2)
+my_observation_builder = Extra(max_depth=20)

 # Or if you want to use your own approach to build the observation from the env_step, 
 # please feel free to pass a DummyObservationBuilder() object as mentioned below,
@@ -112,6 +112,8 @@ while True:
    print("w : ", extra.env.width)
    print("h : ", extra.env.height)

+    old_total_done = 0
+    old_total_active = 0
    while True:
        #####################################################################
        # Evaluation of a single episode
@@ -141,7 +143,11 @@ while True:
            x = (local_env.agents[a].status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED])
            total_done += int(x)
            total_active += int(local_env.agents[a].status == RailAgentStatus.ACTIVE)
-        # print("total_done:", total_done, "\ttotal_active", total_active, "\t num agents", local_env.get_num_agents())
+        if old_total_done != total_done or old_total_active != total_active:
+            print("total_done:", total_done, "\ttotal_active", total_active, "\t num agents",
+                  local_env.get_num_agents())
+        old_total_done = total_done
+        old_total_active = total_active

        if done['__all__']:
            print("Reward : ", sum(list(all_rewards.values())))

--- a/src/extra.py
+++ b/src/extra.py
@@ -19,7 +19,7 @@
 #
 # Private submission
 # http://gitlab.aicrowd.com/adrian_egli/neurips2020-flatland-starter-kit/issues/8
-
+#
 import numpy as np
 from flatland.core.env_observation_builder import ObservationBuilder
 from flatland.core.grid.grid4_utils import get_new_position
@@ -66,9 +66,7 @@ def fast_count_nonzero(possible_transitions: (int, int, int, int)):
 class Extra(ObservationBuilder):

    def __init__(self, max_depth):
-        self.max_depth = max_depthmodel_checkpoint.meta
-model_checkpoint.optimizer
-model_checkpoint.policy
+        self.max_depth = max_depth
        self.observation_dim = 22
        self.agent = None

@@ -190,6 +188,9 @@ model_checkpoint.policy
        return obsData

    def is_collision(self, obsData):
+        return False
+
+    def intern_is_collision(self, obsData):
        if np.sum(obsData[10:14]) == 0:
            return False
        if np.sum(obsData[10:14]) == np.sum(obsData[14:18]):
@@ -244,15 +245,20 @@ model_checkpoint.policy
                return has_opp_agent, has_same_agent, visited

            if agents_on_switch:
+                pt = 0
                for dir_loop in range(4):
                    if possible_transitions[dir_loop] == 1:
+                        pt += 1
                        hoa, hsa, v = self._explore(handle,
                                                    get_new_position(new_position, dir_loop),
                                                    dir_loop,
                                                    depth + 1)
                        visited.append(v)
-                        has_opp_agent = 0.5 * (has_opp_agent + hoa)
-                        has_same_agent = 0.5 * (has_same_agent + hsa)
+                        has_opp_agent += hoa
+                        has_same_agent + hsa
+                if pt > 0:
+                    has_opp_agent /= pt
+                    has_same_agent /= pt
                return has_opp_agent, has_same_agent, visited
            else:
                new_direction = fast_argmax(possible_transitions)
@@ -267,7 +273,7 @@ model_checkpoint.policy
        # observation[3]  : 1 path towards target (direction 3) / otherwise 0 -> path is longer or there is no path
        # observation[4]  : int(agent.status == RailAgentStatus.READY_TO_DEPART)
        # observation[5]  : int(agent.status == RailAgentStatus.ACTIVE)
-        # observation[6]  : int(agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED)
+        # observation[6]  : deadlock estimated (collision) 1 otherwise 0
        # observation[7]  : current agent is located at a switch, where it can take a routing decision
        # observation[8]  : current agent is located at a cell, where it has to take a stop-or-go decision
        # observation[9]  : current agent is located one step before/after a switch
@@ -284,7 +290,7 @@ model_checkpoint.policy
        # observation[20] : If there is a path with step (direction 2) and there is a agent with same direction -> 1
        # observation[21] : If there is a path with step (direction 3) and there is a agent with same direction -> 1

-        observation = np.zeros(self.observation_dim)
+        observation = np.zeros(self.observation_dim) - 1
        visited = []
        agent = self.env.agents[handle]

@@ -296,7 +302,6 @@ model_checkpoint.policy
            agent_virtual_position = agent.position
            observation[5] = 1
        else:
-            observation[6] = 1
            agent_virtual_position = (-1, -1)
            agent_done = True

@@ -335,6 +340,8 @@ model_checkpoint.policy
        observation[8] = int(agents_near_to_switch)
        observation[9] = int(agents_near_to_switch_all)

+        observation[6] = int(self.intern_is_collision(observation))
+
        self.env.dev_obs_dict.update({handle: visited})

        return observation
@@ -351,6 +358,26 @@ model_checkpoint.policy

        return action_dict

+    def rl_agent_act_X(self, observation, info, eps=0.0):
+        self.loadAgent()
+        action_dict = {}
+        active_cnt = 0
+        for a in range(self.env.get_num_agents()):
+            if active_cnt < 1 or self.env.agents[a].status == RailAgentStatus.ACTIVE:
+                if observation[a][6] == 0:
+                    active_cnt += int(self.env.agents[a].status < RailAgentStatus.DONE)
+                    if info['action_required'][a]:
+                        action_dict[a] = self.agent.act(observation[a], eps=eps)
+                        # action_dict[a] = np.random.randint(5)
+                    else:
+                        action_dict[a] = RailEnvActions.MOVE_FORWARD
+                else:
+                    action_dict[a] = RailEnvActions.STOP_MOVING
+            else:
+                action_dict[a] = RailEnvActions.STOP_MOVING
+
+        return action_dict
+
    def loadAgent(self):
        if self.agent is not None:
            return
@@ -359,4 +386,4 @@ model_checkpoint.policy
        print("action_size: ", self.action_size)
        print("state_size: ", self.state_size)
        self.agent = Agent(self.state_size, self.action_size, 0)
-        self.agent.load('./checkpoints/', 0, 1.0)
\ No newline at end of file
+        self.agent.load('./checkpoints/', 0, 1.0)