diff --git a/examples/training_navigation.py b/examples/training_navigation.py index 2704e84463279638c6e86ae8c3259c5653b9c508..7996d03c6923147e9e4d8e1ec168b2fb43a4b410 100644 --- a/examples/training_navigation.py +++ b/examples/training_navigation.py @@ -109,9 +109,10 @@ for trials in range(1, n_trials + 1): for a in range(env.number_of_agents): if demo: eps = 0 - action = 2# agent.act(np.array(obs[a]), eps=eps) + action = agent.act(np.array(obs[a]), eps=eps) action_prob[action] += 1 action_dict.update({a: action}) + env.obs_builder.util_print_obs_subtree(tree=obs[a], num_features_per_node=5) # Environment step next_obs, all_rewards, done, _ = env.step(action_dict) for a in range(env.number_of_agents): @@ -126,7 +127,7 @@ for trials in range(1, n_trials + 1): if done['__all__']: env_done = 1 break - # Epsioln decay + # Epsilon decay eps = max(eps_end, eps_decay * eps) # decrease epsilon done_window.append(env_done) diff --git a/flatland/core/env_observation_builder.py b/flatland/core/env_observation_builder.py index ac5bfd8b867fed728de79fc2f2270381fdde04af..b5f7ef564694d077fe753bf7857f0a72e3e53a88 100644 --- a/flatland/core/env_observation_builder.py +++ b/flatland/core/env_observation_builder.py @@ -326,7 +326,7 @@ class TreeObsForRailEnv(ObservationBuilder): if not last_isDeadEnd: # Keep walking through the tree along `direction' exploring = True - + # TODO: Remove below calculation, this is computed already above and could be reused for i in range(4): if cell_transitions[i]: position = self._new_position(position, i)