From bcd064590a17c04e0549832c22d5bb80f8c2d8aa Mon Sep 17 00:00:00 2001
From: Erik Nygren <erik.nygren@sbb.ch>
Date: Tue, 9 Jul 2019 07:51:17 -0400
Subject: [PATCH] minor bugfixes. Simple distance map test (thanks Christian).
 This test will be enhanced soon.

---
 flatland/envs/observations.py |  6 ++++--
 flatland/envs/rail_env.py     | 34 +++++++++++++++++++++++-----------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/flatland/envs/observations.py b/flatland/envs/observations.py
index 18af8a0c..594c6d1f 100644
--- a/flatland/envs/observations.py
+++ b/flatland/envs/observations.py
@@ -16,8 +16,10 @@ class TreeObsForRailEnv(ObservationBuilder):
     TreeObsForRailEnv object.
 
     This object returns observation vectors for agents in the RailEnv environment.
-    The information is local to each agent and exploits the tree structure of the rail
+    The information is local to each agent and exploits the graph structure of the rail
     network to simplify the representation of the state of the environment for each agent.
+
+    For details about the features in the tree observation see the get() function.
     """
 
     observation_dim = 9
@@ -204,7 +206,7 @@ class TreeObsForRailEnv(ObservationBuilder):
             [... from 'right] +
             [... from 'back']
 
-        Finally, each node information is composed of 8 floating point values:
+        Each node information is composed of 9 features:
 
         #1: if own target lies on the explored branch the current distance from the agent in number of cells is stored.
 
diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index 8abfd1b3..85043119 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -1,8 +1,5 @@
 """
-Definition of the RailEnv environment and related level-generation functions.
-
-Generator functions are functions that take width, height and num_resets as arguments and return
-a GridTransitionMap object.
+Definition of the RailEnv environment.
 """
 # TODO:  _ this is a global method --> utils or remove later
 
@@ -46,20 +43,35 @@ class RailEnv(Environment):
     to avoid bottlenecks.
 
     The valid actions in the environment are:
-        0: do nothing
-        1: turn left and move to the next cell; if the agent was not moving, movement is started
-        2: move to the next cell in front of the agent; if the agent was not moving, movement is started
-        3: turn right and move to the next cell; if the agent was not moving, movement is started
-        4: stop moving
+
+     -   0: do nothing (continue moving or stay still)
+     -   1: turn left at switch and move to the next cell; if the agent was not moving, movement is started
+     -   2: move to the next cell in front of the agent; if the agent was not moving, movement is started
+     -   3: turn right at switch and move to the next cell; if the agent was not moving, movement is started
+     -   4: stop moving
 
     Moving forward in a dead-end cell makes the agent turn 180 degrees and step
     to the cell it came from.
 
+
     The actions of the agents are executed in order of their handle to prevent
     deadlocks and to allow them to learn relative priorities.
 
-    TODO: WRITE ABOUT THE REWARD FUNCTION, and possibly allow for alpha and
-    beta to be passed as parameters to __init__().
+    Reward Function:
+
+    It costs each agent a step_penalty for every time-step taken in the environment. Independent of the movement
+    of the agent. Currently all other penalties such as penalty for stopping, starting and invalid actions are set to 0.
+
+    alpha = 1
+    beta = 1
+    Reward function parameters:
+
+    - invalid_action_penalty = 0
+    - step_penalty = -alpha
+    - global_reward = beta
+    - stop_penalty = 0  # penalty for stopping a moving agent
+    - start_penalty = 0  # penalty for starting a stopped agent
+
     """
 
     def __init__(self,
-- 
GitLab