Skip to content
Snippets Groups Projects
Commit 81147e7e authored by Egli Adrian (IT-SCI-API-PFI)'s avatar Egli Adrian (IT-SCI-API-PFI)
Browse files

Merge branch 'master' of...

Merge branch 'master' of gitlab.aicrowd.com:adrian_egli/neurips2020-flatland-starter-kit into feature/DeadLockAvoidance_RL_PostChallenge

# Conflicts:
#	.gitignore
#	README.md
#	aicrowd.json
#	apt.txt
#	environment.yml
#	my_observation_builder.py
#	reinforcement_learning/dddqn_policy.py
#	reinforcement_learning/evaluate_agent.py
#	reinforcement_learning/model.py
#	reinforcement_learning/multi_agent_training.py
#	reinforcement_learning/ordered_policy.py
#	reinforcement_learning/policy.py
#	reinforcement_learning/ppo/ppo_agent.py
#	reinforcement_learning/single_agent_training.py
#	run.py
#	utils/dead_lock_avoidance_agent.py
#	utils/deadlock_check.py
#	utils/observation_utils.py
#	utils/timer.py
parents 5ceb9c41 3f2468c2
No related branches found
No related tags found
No related merge requests found
Showing
with 226 additions and 18 deletions
# Default ignored files
/workspace.xml
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" filepath="$PROJECT_DIR$/.idea/neurips2020-flatland-starter-kit.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="pytest" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
PPO checkpoints will be saved here
File added
File added
File added
#!/bin/bash #!/bin/bash
if [ -e environ_secret.sh ] if [ -e environ_secret.sh ]
then then
echo "Note: Gathering environment variables from environ_secret.sh" echo "Note: Gathering environment variables from environ_secret.sh"
source environ_secret.sh source environ_secret.sh
else else
echo "Note: Gathering environment variables from environ.sh" echo "Note: Gathering environment variables from environ.sh"
source environ.sh source environ.sh
fi fi
# Expected Env variables : in environ.sh # Expected Env variables : in environ.sh
sudo docker run \ sudo docker run \
--net=host \ --net=host \
-v ./scratch/test-envs:/flatland_envs:z \ -v ./scratch/test-envs:/flatland_envs:z \
-it ${IMAGE_NAME}:${IMAGE_TAG} \ -it ${IMAGE_NAME}:${IMAGE_TAG} \
/home/aicrowd/run.sh /home/aicrowd/run.sh
dump.rdb 0 → 100644
File added
File added
File added
File added
File added
import numpy as np
from flatland.envs.rail_env import RailEnvActions
from reinforcement_learning.policy import Policy
from reinforcement_learning.ppo.ppo_agent import PPOAgent
from utils.dead_lock_avoidance_agent import DeadLockAvoidanceAgent
from utils.extra import ExtraPolicy
class MultiPolicy(Policy):
def __init__(self, state_size, action_size, n_agents, env):
self.state_size = state_size
self.action_size = action_size
self.memory = []
self.loss = 0
self.extra_policy = ExtraPolicy(state_size, action_size)
self.ppo_policy = PPOAgent(state_size + action_size, action_size, n_agents, env)
def load(self, filename):
self.ppo_policy.load(filename)
self.extra_policy.load(filename)
def save(self, filename):
self.ppo_policy.save(filename)
self.extra_policy.save(filename)
def step(self, handle, state, action, reward, next_state, done):
action_extra_state = self.extra_policy.act(handle, state, 0.0)
action_extra_next_state = self.extra_policy.act(handle, next_state, 0.0)
extended_state = np.copy(state)
for action_itr in np.arange(self.action_size):
extended_state = np.append(extended_state, [int(action_extra_state == action_itr)])
extended_next_state = np.copy(next_state)
for action_itr in np.arange(self.action_size):
extended_next_state = np.append(extended_next_state, [int(action_extra_next_state == action_itr)])
self.extra_policy.step(handle, state, action, reward, next_state, done)
self.ppo_policy.step(handle, extended_state, action, reward, extended_next_state, done)
def act(self, handle, state, eps=0.):
action_extra_state = self.extra_policy.act(handle, state, 0.0)
extended_state = np.copy(state)
for action_itr in np.arange(self.action_size):
extended_state = np.append(extended_state, [int(action_extra_state == action_itr)])
action_ppo = self.ppo_policy.act(handle, extended_state, eps)
self.loss = self.ppo_policy.loss
return action_ppo
def reset(self):
self.ppo_policy.reset()
self.extra_policy.reset()
def test(self):
self.ppo_policy.test()
self.extra_policy.test()
def start_step(self):
self.extra_policy.start_step()
self.ppo_policy.start_step()
def end_step(self):
self.extra_policy.end_step()
self.ppo_policy.end_step()
import sys
import numpy as np
from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.predictions import ShortestPathPredictorForRailEnv
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import complex_rail_generator
from flatland.envs.schedule_generators import complex_schedule_generator
from flatland.utils.rendertools import RenderTool
from pathlib import Path
base_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(base_dir))
from reinforcement_learning.ordered_policy import OrderedPolicy
np.random.seed(2)
x_dim = 20 # np.random.randint(8, 20)
y_dim = 20 # np.random.randint(8, 20)
n_agents = 10 # np.random.randint(3, 8)
n_goals = n_agents + np.random.randint(0, 3)
min_dist = int(0.75 * min(x_dim, y_dim))
env = RailEnv(width=x_dim,
height=y_dim,
rail_generator=complex_rail_generator(
nr_start_goal=n_goals, nr_extra=5, min_dist=min_dist,
max_dist=99999,
seed=0
),
schedule_generator=complex_schedule_generator(),
obs_builder_object=TreeObsForRailEnv(max_depth=1, predictor=ShortestPathPredictorForRailEnv()),
number_of_agents=n_agents)
env.reset(True, True)
tree_depth = 1
observation_helper = TreeObsForRailEnv(max_depth=tree_depth, predictor=ShortestPathPredictorForRailEnv())
env_renderer = RenderTool(env, gl="PGL", )
handle = env.get_agent_handles()
n_episodes = 1
max_steps = 100 * (env.height + env.width)
record_images = False
policy = OrderedPolicy()
action_dict = dict()
for trials in range(1, n_episodes + 1):
# Reset environment
obs, info = env.reset(True, True)
done = env.dones
env_renderer.reset()
frame_step = 0
# Run episode
for step in range(max_steps):
env_renderer.render_env(show=True, show_observations=False, show_predictions=True)
if record_images:
env_renderer.gl.save_image("./Images/flatland_frame_{:04d}.bmp".format(frame_step))
frame_step += 1
# Action
acting_agent = 0
for a in range(env.get_num_agents()):
if done[a]:
acting_agent += 1
if a == acting_agent:
action = policy.act(obs[a])
else:
action = 4
action_dict.update({a: action})
# Environment step
obs, all_rewards, done, _ = env.step(action_dict)
if done['__all__']:
break
#!/bin/bash #!/bin/bash
# manually install submodules.
python ./run.py python ./run.py
from time import time
import numpy as np
from flatland.envs.rail_env import fast_isclose
def print_timing(label, start_time, end_time):
print("{:>10.4f}ms".format(1000 * (end_time - start_time)) + "\t" + label)
def check_isclose(nbr=100000):
s = time()
for x in range(nbr):
fast_isclose(x, 0.0, rtol=1e-03)
e = time()
print_timing("fast_isclose", start_time=s, end_time=e)
s = time()
for x in range(nbr):
np.isclose(x, 0.0, rtol=1e-03)
e = time()
print_timing("np.isclose", start_time=s, end_time=e)
if __name__ == "__main__":
check_isclose()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment