From bcd064590a17c04e0549832c22d5bb80f8c2d8aa Mon Sep 17 00:00:00 2001 From: Erik Nygren <erik.nygren@sbb.ch> Date: Tue, 9 Jul 2019 07:51:17 -0400 Subject: [PATCH] minor bugfixes. Simple distance map test (thanks Christian). This test will be enhanced soon. --- flatland/envs/observations.py | 6 ++++-- flatland/envs/rail_env.py | 34 +++++++++++++++++++++++----------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/flatland/envs/observations.py b/flatland/envs/observations.py index 18af8a0c..594c6d1f 100644 --- a/flatland/envs/observations.py +++ b/flatland/envs/observations.py @@ -16,8 +16,10 @@ class TreeObsForRailEnv(ObservationBuilder): TreeObsForRailEnv object. This object returns observation vectors for agents in the RailEnv environment. - The information is local to each agent and exploits the tree structure of the rail + The information is local to each agent and exploits the graph structure of the rail network to simplify the representation of the state of the environment for each agent. + + For details about the features in the tree observation see the get() function. """ observation_dim = 9 @@ -204,7 +206,7 @@ class TreeObsForRailEnv(ObservationBuilder): [... from 'right] + [... from 'back'] - Finally, each node information is composed of 8 floating point values: + Each node information is composed of 9 features: #1: if own target lies on the explored branch the current distance from the agent in number of cells is stored. diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index 8abfd1b3..85043119 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -1,8 +1,5 @@ """ -Definition of the RailEnv environment and related level-generation functions. - -Generator functions are functions that take width, height and num_resets as arguments and return -a GridTransitionMap object. +Definition of the RailEnv environment. """ # TODO: _ this is a global method --> utils or remove later @@ -46,20 +43,35 @@ class RailEnv(Environment): to avoid bottlenecks. The valid actions in the environment are: - 0: do nothing - 1: turn left and move to the next cell; if the agent was not moving, movement is started - 2: move to the next cell in front of the agent; if the agent was not moving, movement is started - 3: turn right and move to the next cell; if the agent was not moving, movement is started - 4: stop moving + + - 0: do nothing (continue moving or stay still) + - 1: turn left at switch and move to the next cell; if the agent was not moving, movement is started + - 2: move to the next cell in front of the agent; if the agent was not moving, movement is started + - 3: turn right at switch and move to the next cell; if the agent was not moving, movement is started + - 4: stop moving Moving forward in a dead-end cell makes the agent turn 180 degrees and step to the cell it came from. + The actions of the agents are executed in order of their handle to prevent deadlocks and to allow them to learn relative priorities. - TODO: WRITE ABOUT THE REWARD FUNCTION, and possibly allow for alpha and - beta to be passed as parameters to __init__(). + Reward Function: + + It costs each agent a step_penalty for every time-step taken in the environment. Independent of the movement + of the agent. Currently all other penalties such as penalty for stopping, starting and invalid actions are set to 0. + + alpha = 1 + beta = 1 + Reward function parameters: + + - invalid_action_penalty = 0 + - step_penalty = -alpha + - global_reward = beta + - stop_penalty = 0 # penalty for stopping a moving agent + - start_penalty = 0 # penalty for starting a stopped agent + """ def __init__(self, -- GitLab