Skip to content
Snippets Groups Projects
Commit c61daad7 authored by Erik Nygren's avatar Erik Nygren
Browse files

minor bugfix in cheching for normalizing value

parent 84962d1d
No related branches found
No related tags found
No related merge requests found
...@@ -10,8 +10,8 @@ np.random.seed(1) ...@@ -10,8 +10,8 @@ np.random.seed(1)
# Example generate a rail given a manual specification, # Example generate a rail given a manual specification,
# a map of tuples (cell_type, rotation) # a map of tuples (cell_type, rotation)
transition_probability = [10.0, # empty cell - Case 0 transition_probability = [0.5, # empty cell - Case 0
50.0, # Case 1 - straight 1.0, # Case 1 - straight
1.0, # Case 2 - simple switch 1.0, # Case 2 - simple switch
0.3, # Case 3 - diamond drossing 0.3, # Case 3 - diamond drossing
0.5, # Case 4 - single slip 0.5, # Case 4 - single slip
...@@ -20,8 +20,8 @@ transition_probability = [10.0, # empty cell - Case 0 ...@@ -20,8 +20,8 @@ transition_probability = [10.0, # empty cell - Case 0
0.0] # Case 7 - dead end 0.0] # Case 7 - dead end
# Example generate a random rail # Example generate a random rail
env = RailEnv(width=5, env = RailEnv(width=7,
height=5, height=7,
rail_generator=random_rail_generator(cell_type_relative_proportion=transition_probability), rail_generator=random_rail_generator(cell_type_relative_proportion=transition_probability),
number_of_agents=1) number_of_agents=1)
env_renderer = RenderTool(env) env_renderer = RenderTool(env)
...@@ -49,7 +49,7 @@ def max_lt(seq, val): ...@@ -49,7 +49,7 @@ def max_lt(seq, val):
idx = len(seq)-1 idx = len(seq)-1
while idx >= 0: while idx >= 0:
if seq[idx] < val and seq[idx] > 0: if seq[idx] < val and seq[idx] >= 0:
return seq[idx] return seq[idx]
idx -= 1 idx -= 1
return None return None
...@@ -110,6 +110,7 @@ for trials in range(1, n_trials + 1): ...@@ -110,6 +110,7 @@ for trials in range(1, n_trials + 1):
eps, action_prob/np.sum(action_prob)), eps, action_prob/np.sum(action_prob)),
end=" ") end=" ")
if trials % 100 == 0: if trials % 100 == 0:
action_prob = [1]*4
print( print(
'\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format(
env.number_of_agents, env.number_of_agents,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment