Commit 684fefd3 authored by MasterScrat's avatar MasterScrat

Fixes and hacks to allow the use of a single policy with multiple agents

parent 7d742c53
......@@ -49,7 +49,9 @@ class FlatlandRllibWrapper(object):
for agent, done in dones.items():
if agent != '__all__' and not agent in obs:
continue # skip agent if there is no observation
if agent not in self._agents_done:
# FIXME the check below should be kept in MARL training
#if agent not in self._agents_done:
if True or agent not in self._agents_done:
if agent != '__all__':
if done:
self._agents_done.append(agent)
......
......@@ -73,10 +73,15 @@ class FlatlandSingle(gym.Env):
return env
def step(self, action_dict):
def step(self, action_list):
# print("="*50)
# print(action_dict)
step_r = self._env.step({0: action_dict})
action_dict = {}
for i, action in enumerate(action_list):
action_dict[i] = action
step_r = self._env.step(action_dict)
# print(step_r)
# print("="*50)
......@@ -95,7 +100,7 @@ class FlatlandSingle(gym.Env):
# print(foo)
# print("="*50)
return [step for step in foo.obs.values()],
return [step for step in foo.values()]
#return foo
@property
......
......@@ -45,7 +45,8 @@ def on_episode_end(info):
if agent_info["agent_done"]:
episode_done_agents += 1
assert len(episode._agent_to_last_info) == episode_num_agents
# Not a valid check when considering a single policy for multiple agents
#assert len(episode._agent_to_last_info) == episode_num_agents
norm_factor = 1.0 / (episode_max_steps + episode_num_agents)
percentage_complete = float(episode_done_agents) / episode_num_agents
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment