Skip to content
Snippets Groups Projects
Commit ed1f9cc4 authored by Erik Nygren's avatar Erik Nygren
Browse files

fixing learning issues

parent a7ac4c7f
No related branches found
No related tags found
No related merge requests found
...@@ -125,6 +125,7 @@ def main(argv): ...@@ -125,6 +125,7 @@ def main(argv):
# Build agent specific observations # Build agent specific observations
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
agent_obs[a] = normalize_observation(obs[a], observation_radius=10) agent_obs[a] = normalize_observation(obs[a], observation_radius=10)
agent_obs_buffer[a] = agent_obs[a].copy()
# Reset score and done # Reset score and done
score = 0 score = 0
...@@ -136,11 +137,13 @@ def main(argv): ...@@ -136,11 +137,13 @@ def main(argv):
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
if env.agents[a].speed_data['position_fraction'] < 0.001: if env.agents[a].speed_data['position_fraction'] < 0.001:
register_action_state[a] = True register_action_state[a] = True
action = agent.act(agent_obs[a], eps=eps)
action_prob[action] += 1
if step == 0:
agent_action_buffer[a] = action
else: else:
register_action_state[a] = False register_action_state[a] = False
action = 0
action = agent.act(agent_obs[a], eps=eps)
action_prob[action] += 1
action_dict.update({a: action}) action_dict.update({a: action})
# Environment step # Environment step
...@@ -150,6 +153,7 @@ def main(argv): ...@@ -150,6 +153,7 @@ def main(argv):
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
agent_next_obs[a] = normalize_observation(next_obs[a], observation_radius=10) agent_next_obs[a] = normalize_observation(next_obs[a], observation_radius=10)
cummulated_reward[a] += all_rewards[a] cummulated_reward[a] += all_rewards[a]
# Update replay buffer and train agent # Update replay buffer and train agent
for a in range(env.get_num_agents()): for a in range(env.get_num_agents()):
if done[a]: if done[a]:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment