From 08d70296cf03ca920444c09bea16de55d3539353 Mon Sep 17 00:00:00 2001 From: Erik Nygren <erik.nygren@sbb.ch> Date: Sun, 1 Sep 2019 11:19:41 -0400 Subject: [PATCH] only store observations in buffer when you are allowed to take an action! --- torch_training/multi_agent_training.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/torch_training/multi_agent_training.py b/torch_training/multi_agent_training.py index 4822704..ba30d46 100644 --- a/torch_training/multi_agent_training.py +++ b/torch_training/multi_agent_training.py @@ -175,6 +175,10 @@ def main(argv): # Action for a in range(env.get_num_agents()): + if env.agents[a].speed_data['position_fraction'] == 0.: + register_action_state[a] = True + else: + register_action_state[a] = False action = agent.act(agent_obs[a], eps=eps) action_prob[action] += 1 action_dict.update({a: action}) @@ -192,7 +196,7 @@ def main(argv): final_obs[a] = agent_obs[a].copy() final_obs_next[a] = agent_next_obs[a].copy() final_action_dict.update({a: action_dict[a]}) - if not done[a]: + if not done[a] and register_action_state[a]: agent.step(agent_obs[a], action_dict[a], all_rewards[a], agent_next_obs[a], done[a]) score += all_rewards[a] / env.get_num_agents() -- GitLab