Skip to content
Snippets Groups Projects
Commit af698037 authored by Erik Nygren's avatar Erik Nygren
Browse files

minor bugfixes and added TODO for observation speed up

parent a355b142
No related branches found
No related tags found
No related merge requests found
......@@ -109,9 +109,10 @@ for trials in range(1, n_trials + 1):
for a in range(env.number_of_agents):
if demo:
eps = 0
action = 2# agent.act(np.array(obs[a]), eps=eps)
action = agent.act(np.array(obs[a]), eps=eps)
action_prob[action] += 1
action_dict.update({a: action})
env.obs_builder.util_print_obs_subtree(tree=obs[a], num_features_per_node=5)
# Environment step
next_obs, all_rewards, done, _ = env.step(action_dict)
for a in range(env.number_of_agents):
......@@ -126,7 +127,7 @@ for trials in range(1, n_trials + 1):
if done['__all__']:
env_done = 1
break
# Epsioln decay
# Epsilon decay
eps = max(eps_end, eps_decay * eps) # decrease epsilon
done_window.append(env_done)
......
......@@ -326,7 +326,7 @@ class TreeObsForRailEnv(ObservationBuilder):
if not last_isDeadEnd:
# Keep walking through the tree along `direction'
exploring = True
# TODO: Remove below calculation, this is computed already above and could be reused
for i in range(4):
if cell_transitions[i]:
position = self._new_position(position, i)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment