Skip to content
Snippets Groups Projects
Commit 0da9e3c3 authored by Erik Nygren's avatar Erik Nygren
Browse files

minor updates to how multi speed steps are handled

parent d83c686d
No related branches found
No related tags found
No related merge requests found
...@@ -294,7 +294,8 @@ class RailEnv(Environment): ...@@ -294,7 +294,8 @@ class RailEnv(Environment):
alpha = 1.0 alpha = 1.0
beta = 1.0 beta = 1.0
# Epsilon to avoid rounding errors
epsilon = 0.01
invalid_action_penalty = 0 # previously -2; GIACOMO: we decided that invalid actions will carry no penalty invalid_action_penalty = 0 # previously -2; GIACOMO: we decided that invalid actions will carry no penalty
step_penalty = -1 * alpha step_penalty = -1 * alpha
global_reward = 1 * beta global_reward = 1 * beta
...@@ -310,7 +311,6 @@ class RailEnv(Environment): ...@@ -310,7 +311,6 @@ class RailEnv(Environment):
self.rewards_dict = {i: r + global_reward for i, r in self.rewards_dict.items()} self.rewards_dict = {i: r + global_reward for i, r in self.rewards_dict.items()}
return self._get_observations(), self.rewards_dict, self.dones, {} return self._get_observations(), self.rewards_dict, self.dones, {}
# for i in range(len(self.agents_handles)):
for i_agent in range(self.get_num_agents()): for i_agent in range(self.get_num_agents()):
agent = self.agents[i_agent] agent = self.agents[i_agent]
agent.old_direction = agent.direction agent.old_direction = agent.direction
...@@ -331,7 +331,7 @@ class RailEnv(Environment): ...@@ -331,7 +331,7 @@ class RailEnv(Environment):
agent.malfunction_data['malfunction'] -= 1 agent.malfunction_data['malfunction'] -= 1
# Broken agents are stopped # Broken agents are stopped
self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed'] self.rewards_dict[i_agent] += step_penalty # * agent.speed_data['speed']
self.agents[i_agent].moving = False self.agents[i_agent].moving = False
action_dict[i_agent] = RailEnvActions.DO_NOTHING action_dict[i_agent] = RailEnvActions.DO_NOTHING
...@@ -350,7 +350,8 @@ class RailEnv(Environment): ...@@ -350,7 +350,8 @@ class RailEnv(Environment):
# Keep moving # Keep moving
action = RailEnvActions.MOVE_FORWARD action = RailEnvActions.MOVE_FORWARD
if action == RailEnvActions.STOP_MOVING and agent.moving and agent.speed_data['position_fraction'] == 0.: if action == RailEnvActions.STOP_MOVING and agent.moving and agent.speed_data[
'position_fraction'] <= epsilon:
# Only allow halting an agent on entering new cells. # Only allow halting an agent on entering new cells.
agent.moving = False agent.moving = False
self.rewards_dict[i_agent] += stop_penalty self.rewards_dict[i_agent] += stop_penalty
...@@ -372,7 +373,7 @@ class RailEnv(Environment): ...@@ -372,7 +373,7 @@ class RailEnv(Environment):
# If the agent can make an action # If the agent can make an action
action_selected = False action_selected = False
if agent.speed_data['position_fraction'] == 0.: if agent.speed_data['position_fraction'] <= epsilon:
if action != RailEnvActions.DO_NOTHING and action != RailEnvActions.STOP_MOVING: if action != RailEnvActions.DO_NOTHING and action != RailEnvActions.STOP_MOVING:
cell_free, new_cell_valid, new_direction, new_position, transition_valid = \ cell_free, new_cell_valid, new_direction, new_position, transition_valid = \
self._check_action_on_agent(action, agent) self._check_action_on_agent(action, agent)
...@@ -395,14 +396,14 @@ class RailEnv(Environment): ...@@ -395,14 +396,14 @@ class RailEnv(Environment):
else: else:
# TODO: an invalid action was chosen after entering the cell. The agent cannot move. # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
self.rewards_dict[i_agent] += invalid_action_penalty self.rewards_dict[i_agent] += invalid_action_penalty
self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed'] self.rewards_dict[i_agent] += step_penalty #* agent.speed_data['speed']
self.rewards_dict[i_agent] += stop_penalty self.rewards_dict[i_agent] += stop_penalty
agent.moving = False agent.moving = False
continue continue
else: else:
# TODO: an invalid action was chosen after entering the cell. The agent cannot move. # TODO: an invalid action was chosen after entering the cell. The agent cannot move.
self.rewards_dict[i_agent] += invalid_action_penalty self.rewards_dict[i_agent] += invalid_action_penalty
self.rewards_dict[i_agent] += step_penalty * agent.speed_data['speed'] self.rewards_dict[i_agent] += step_penalty #* agent.speed_data['speed']
self.rewards_dict[i_agent] += stop_penalty self.rewards_dict[i_agent] += stop_penalty
agent.moving = False agent.moving = False
continue continue
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment