Skip to content
Snippets Groups Projects
Commit 0da9e3c3 authored by Erik Nygren's avatar Erik Nygren
Browse files

minor updates to how multi speed steps are handled

parent d83c686d
No related branches found
No related tags found
No related merge requests found
......@@ -294,7 +294,8 @@ class RailEnv(Environment):
alpha = 1.0
beta = 1.0
# Epsilon to avoid rounding errors
epsilon = 0.01
invalid_action_penalty = 0 # previously -2; GIACOMO: we decided that invalid actions will carry no penalty
step_penalty = -1 * alpha
global_reward = 1 * beta
......@@ -310,7 +311,6 @@ class RailEnv(Environment):
self.rewards_dict = {i: r + global_reward for i, r in self.rewards_dict.items()}
return self._get_observations(), self.rewards_dict, self.dones, {}
# for i in range(len(self.agents_handles)):
for i_agent in range(self.get_num_agents()):
agent = self.agents[i_agent]
agent.old_direction = agent.direction
......@@ -331,7 +331,7 @@ class RailEnv(Environment):
agent.malfunction_data['malfunction'] -= 1
# Broken agents are stopped
self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
self.rewards_dict[i_agent] += step_penalty # * agent.speed_data['speed']
self.agents[i_agent].moving = False
action_dict[i_agent] = RailEnvActions.DO_NOTHING
......@@ -350,7 +350,8 @@ class RailEnv(Environment):
# Keep moving
action = RailEnvActions.MOVE_FORWARD
if action == RailEnvActions.STOP_MOVING and agent.moving and agent.speed_data['position_fraction'] == 0.:
if action == RailEnvActions.STOP_MOVING and agent.moving and agent.speed_data[
'position_fraction'] <= epsilon:
# Only allow halting an agent on entering new cells.
agent.moving = False
self.rewards_dict[i_agent] += stop_penalty
......@@ -372,7 +373,7 @@ class RailEnv(Environment):
# If the agent can make an action
action_selected = False
if agent.speed_data['position_fraction'] == 0.:
if agent.speed_data['position_fraction'] <= epsilon:
if action != RailEnvActions.DO_NOTHING and action != RailEnvActions.STOP_MOVING:
cell_free, new_cell_valid, new_direction, new_position, transition_valid = \
self._check_action_on_agent(action, agent)
......@@ -395,14 +396,14 @@ class RailEnv(Environment):
else:
# TODO: an invalid action was chosen after entering the cell. The agent cannot move.
self.rewards_dict[i_agent] += invalid_action_penalty
self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
self.rewards_dict[i_agent] += step_penalty #* agent.speed_data['speed']
self.rewards_dict[i_agent] += stop_penalty
agent.moving = False
continue
else:
# TODO: an invalid action was chosen after entering the cell. The agent cannot move.
self.rewards_dict[i_agent] += invalid_action_penalty
self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed']
self.rewards_dict[i_agent] += step_penalty #* agent.speed_data['speed']
self.rewards_dict[i_agent] += stop_penalty
agent.moving = False
continue
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment