diff --git a/flatland/envs/agent_utils.py b/flatland/envs/agent_utils.py
index ef87c96e995015f98022ea9e619276fd28ba49ad..9831f6037678ec2e0f4547870f95a7fa76513314 100644
--- a/flatland/envs/agent_utils.py
+++ b/flatland/envs/agent_utils.py
@@ -9,10 +9,12 @@ from flatland.envs.schedule_utils import Schedule
 
 
 class RailAgentStatus(IntEnum):
-    READY_TO_DEPART = 0  # not in grid yet (position is None) -> prediction as if it were at initial position
-    ACTIVE = 1  # in grid (position is not None), not done -> prediction is remaining path
-    DONE = 2  # in grid (position is not None), but done -> prediction is stay at target forever
-    DONE_REMOVED = 3  # removed from grid (position is None) -> prediction is None
+    WAITING = 0
+    READY_TO_DEPART = 1  # not in grid yet (position is None) -> prediction as if it were at initial position
+    ACTIVE = 2  # in grid (position is not None), not done -> prediction is remaining path
+    DONE = 3  # in grid (position is not None), but done -> prediction is stay at target forever
+    DONE_REMOVED = 4  # removed from grid (position is None) -> prediction is None
+    CANCELLED = 5
 
 
 Agent = NamedTuple('Agent', [('initial_position', Tuple[int, int]),
@@ -33,13 +35,14 @@ Agent = NamedTuple('Agent', [('initial_position', Tuple[int, int]),
 
 @attrs
 class EnvAgent:
+    # INIT FROM HERE IN _from_schedule()
     initial_position = attrib(type=Tuple[int, int])
     initial_direction = attrib(type=Grid4TransitionsEnum)
     direction = attrib(type=Grid4TransitionsEnum)
     target = attrib(type=Tuple[int, int])
     moving = attrib(default=False, type=bool)
 
-    # NEW - time scheduling
+    # NEW : EnvAgent - Schedule properties
     earliest_departure = attrib(default=None, type=int)  # default None during _from_schedule()
     latest_arrival = attrib(default=None, type=int)  # default None during _from_schedule()
 
@@ -58,8 +61,9 @@ class EnvAgent:
                           'moving_before_malfunction': False})))
 
     handle = attrib(default=None)
+    # INIT TILL HERE IN _from_schedule()
 
-    status = attrib(default=RailAgentStatus.READY_TO_DEPART, type=RailAgentStatus)
+    status = attrib(default=RailAgentStatus.WAITING, type=RailAgentStatus)
     position = attrib(default=None, type=Optional[Tuple[int, int]])
 
     # used in rendering
@@ -68,12 +72,17 @@ class EnvAgent:
 
     def reset(self):
         """
-        Resets the agents to their initial values of the episode
+        Resets the agents to their initial values of the episode. Called after ScheduleTime generation.
         """
         self.position = None
         # TODO: set direction to None: https://gitlab.aicrowd.com/flatland/flatland/issues/280
         self.direction = self.initial_direction
-        self.status = RailAgentStatus.READY_TO_DEPART
+
+        if (self.earliest_departure == 0):
+            self.status = RailAgentStatus.READY_TO_DEPART
+        else:
+            self.status = RailAgentStatus.WAITING
+            
         self.old_position = None
         self.old_direction = None
         self.moving = False
diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index cef55ae732eba625a92a04fbdfc315fadda57aa7..289058834446bbdb24cd26a937f30c596f5e3a7b 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -34,7 +34,7 @@ from gym.utils import seeding
 # from flatland.envs.rail_generators import random_rail_generator, RailGenerator
 # from flatland.envs.schedule_generators import random_schedule_generator, ScheduleGenerator
 
-# NEW 
+# NEW : Imports
 from flatland.envs.schedule_time_generators import schedule_time_generator
 
 # Adrian Egli performance fix (the fast methods brings more than 50%)
@@ -379,23 +379,34 @@ class RailEnv(Environment):
             if optionals and 'agents_hints' in optionals:
                 agents_hints = optionals['agents_hints']
 
-            schedule = self.schedule_generator(self.rail, self.number_of_agents, agents_hints, self.num_resets,
-                                               self.np_random)
+            schedule = self.schedule_generator(self.rail, self.number_of_agents, agents_hints, 
+                                               self.num_resets, self.np_random)
             self.agents = EnvAgent.from_schedule(schedule)
 
             # Get max number of allowed time steps from schedule generator
             # Look at the specific schedule generator used to see where this number comes from
-            self._max_episode_steps = schedule.max_episode_steps
+            self._max_episode_steps = schedule.max_episode_steps # NEW UPDATE THIS!
 
+        # Agent Positions Map
         self.agent_positions = np.zeros((self.height, self.width), dtype=int) - 1
 
-        # Reset agents to initial
-        self.reset_agents()
+        # Reset distance map - basically initializing
         self.distance_map.reset(self.agents, self.rail)
 
-        # NEW - time window scheduling
-        schedule_time_generator(self.agents, self.distance_map, schedule, self.np_random, temp_info=optionals)
+        # NEW : Time Schedule Generation
+        # find agent speeds (needed for max_ep_steps recalculation)
+        if (type(self.schedule_generator.speed_ratio_map) is dict):
+            config_speeds = list(self.schedule_generator.speed_ratio_map.keys())
+        else:
+            config_speeds = [1.0]
+
+        self._max_episode_steps = schedule_time_generator(self.agents, config_speeds, self.distance_map, 
+                                        self._max_episode_steps, self.np_random, temp_info=optionals)
+        
+        # Reset agents to initial states
+        self.reset_agents()
 
+        # WHY
         for agent in self.agents:
             # Induce malfunctions
             if activate_agents:
@@ -584,10 +595,24 @@ class RailEnv(Environment):
         if have_all_agents_ended:
             self.dones["__all__"] = True
             self.rewards_dict = {i: self.global_reward for i in range(self.get_num_agents())}
+        
         if (self._max_episode_steps is not None) and (self._elapsed_steps >= self._max_episode_steps):
             self.dones["__all__"] = True
-            for i_agent in range(self.get_num_agents()):
+        
+            for i_agent, agent in enumerate(self.agents):
+                # NEW : STEP:REW: CANCELLED check / reward (never departed)
+                if (agent.status == RailAgentStatus.READY_TO_DEPART):
+                    agent.status = RailAgentStatus.CANCELLED
+                    # NEGATIVE REWARD?
+                
+                # NEW : STEP:REW: Departed but never reached
+                if (agent.status == RailAgentStatus.ACTIVE):
+                    pass
+                    # NEGATIVE REWARD?
+
                 self.dones[i_agent] = True
+
+        
         if self.record_steps:
             self.record_timestep(action_dict_)
 
@@ -738,6 +763,13 @@ class RailEnv(Environment):
         if agent.status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED]:  # this agent has already completed...
             return
 
+        # NEW : STEP: WAITING > WAITING or WAITING > READY_TO_DEPART
+        if (agent.status == RailAgentStatus.WAITING):
+            if ( self._elapsed_steps >= agent.earliest_departure ):
+                agent.status == RailAgentStatus.READY_TO_DEPART
+            self.motionCheck.addAgent(i_agent, None, None)
+            return
+
         # agent gets active by a MOVE_* action and if c
         if agent.status == RailAgentStatus.READY_TO_DEPART:
             is_action_starting = action in [
@@ -848,7 +880,8 @@ class RailEnv(Environment):
     def _step_agent2_cf(self, i_agent):
         agent = self.agents[i_agent]
 
-        if agent.status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED]:
+        # NEW : REW: no reward during WAITING...
+        if agent.status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED, RailAgentStatus.WAITING]:
             return
 
         (move, rc_next) = self.motionCheck.check_motion(i_agent, agent.position)
@@ -889,18 +922,37 @@ class RailEnv(Environment):
                 agent.direction = new_direction
                 agent.speed_data['position_fraction'] = 0.0
 
+            # NEW : REW: Check DONE  before / after LA & Check if RUNNING before / after LA
             # has the agent reached its target?
             if np.equal(agent.position, agent.target).all():
-                agent.status = RailAgentStatus.DONE
-                self.dones[i_agent] = True
-                self.active_agents.remove(i_agent)
-                agent.moving = False
-                self._remove_agent_from_scene(agent)
-            else:
-                self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
+                # arrived before Latest Arrival
+                if (self._elapsed_steps <= agent.latest_arrival):
+                    agent.status = RailAgentStatus.DONE
+                    self.dones[i_agent] = True
+                    self.active_agents.remove(i_agent)
+                    agent.moving = False
+                    self._remove_agent_from_scene(agent)
+                else: # arrived after latest arrival
+                    agent.status = RailAgentStatus.DONE
+                    self.dones[i_agent] = True
+                    self.active_agents.remove(i_agent)
+                    agent.moving = False
+                    self._remove_agent_from_scene(agent)   
+                    # NEGATIVE REWARD?
+
+            else: # not reached its target and moving
+                # running before Latest Arrival
+                if (self._elapsed_steps <= agent.latest_arrival):
+                    self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
+                else: # running after Latest Arrival
+                    self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed'] # + # NEGATIVE REWARD? per step?
         else:
-            # step penalty if not moving (stopped now or before)
-            self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
+            # stopped (!move) before Latest Arrival
+            if (self._elapsed_steps <= agent.latest_arrival):
+                self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed']
+            else:  # stopped (!move) after Latest Arrival
+                self.rewards_dict[i_agent] += self.step_penalty * \
+                    agent.speed_data['speed']  # + # NEGATIVE REWARD? per step?
 
     def _set_agent_to_initial_position(self, agent: EnvAgent, new_position: IntVector2D):
         """
diff --git a/flatland/envs/rail_env_shortest_paths.py b/flatland/envs/rail_env_shortest_paths.py
index 6bfb4bb558f135388b41ee2b830f74984e62eddc..afa0dd79be256149b0800ac2763bb6820970406f 100644
--- a/flatland/envs/rail_env_shortest_paths.py
+++ b/flatland/envs/rail_env_shortest_paths.py
@@ -227,7 +227,9 @@ def get_shortest_paths(distance_map: DistanceMap, max_depth: Optional[int] = Non
     shortest_paths = dict()
 
     def _shortest_path_for_agent(agent):
-        if agent.status == RailAgentStatus.READY_TO_DEPART:
+        if agent.status == RailAgentStatus.WAITING:
+            position = agent.initial_position
+        elif agent.status == RailAgentStatus.READY_TO_DEPART:
             position = agent.initial_position
         elif agent.status == RailAgentStatus.ACTIVE:
             position = agent.position
diff --git a/flatland/envs/schedule_time_generators.py b/flatland/envs/schedule_time_generators.py
index 1587ffc378b424b21cfb9e4f3660f631521e5f32..dafa0aca361037a7758ce3e3dff740b52879e8bd 100644
--- a/flatland/envs/schedule_time_generators.py
+++ b/flatland/envs/schedule_time_generators.py
@@ -30,17 +30,44 @@ from flatland.envs.distance_map import DistanceMap
 # city_positions = []
 # #### DATA COLLECTION *************************
 
-def schedule_time_generator(agents: List[EnvAgent], distance_map: DistanceMap, schedule: Schedule,
-                            np_random: RandomState = None, temp_info=None) -> None:
+def schedule_time_generator(agents: List[EnvAgent], config_speeds: List[float],  distance_map: DistanceMap, 
+                            max_episode_steps: int, np_random: RandomState = None, temp_info=None) -> int:
+    
+    # Multipliers
+    old_max_episode_steps_multiplier = 3.0
+    new_max_episode_steps_multiplier = 1.5
+    travel_buffer_multiplier = 1.3 # must be strictly lesser than new_max_episode_steps_multiplier
+    end_buffer_multiplier = 0.05
+    mean_shortest_path_multiplier = 0.2
     
     from flatland.envs.rail_env_shortest_paths import get_shortest_paths
     shortest_paths = get_shortest_paths(distance_map)
+    shortest_paths_lengths = [len(v) for k,v in shortest_paths.items()]
+
+    # Find mean_shortest_path_time
+    agent_shortest_path_times = []
+    for agent in agents:
+        speed = agent.speed_data['speed']
+        distance = shortest_paths_lengths[agent.handle]
+        agent_shortest_path_times.append(int(np.ceil(distance / speed)))
+
+    mean_shortest_path_time = np.mean(agent_shortest_path_times)
+
+    # Deciding on a suitable max_episode_steps
+    max_sp_len = max(shortest_paths_lengths) # longest path
+    min_speed = min(config_speeds)           # slowest possible speed in config
     
-    max_episode_steps = int(schedule.max_episode_steps * 1.0) #needs to be increased due to fractional speeds taking way longer (best - be calculated here)
-    end_buffer = max_episode_steps // 20                 #schedule.end_buffer
+    longest_sp_time = max_sp_len / min_speed
+    max_episode_steps_new = int(np.ceil(longest_sp_time * new_max_episode_steps_multiplier))
+    
+    max_episode_steps_old = int(max_episode_steps * old_max_episode_steps_multiplier)
+
+    max_episode_steps = min(max_episode_steps_new, max_episode_steps_old)
+    
+    end_buffer = max_episode_steps * end_buffer_multiplier
     latest_arrival_max = max_episode_steps-end_buffer
-    travel_buffer_multiplier = 1.7
 
+    # Useless unless needed by returning
     earliest_departures = []
     latest_arrivals = []
 
@@ -89,11 +116,10 @@ def schedule_time_generator(agents: List[EnvAgent], distance_map: DistanceMap, s
     # #### DATA COLLECTION *************************
 
     for agent in agents:
-        agent_speed = agent.speed_data['speed']
-        agent_shortest_path = shortest_paths[agent.handle]
-        agent_shortest_path_len = len(agent_shortest_path)
-        agent_shortest_path_time = int(np.ceil(agent_shortest_path_len / agent_speed)) # for fractional speeds 1/3 etc
-        agent_travel_time_max = min( int(np.ceil(agent_shortest_path_time * travel_buffer_multiplier)), latest_arrival_max) # min(this, latest_arrival_max), SHOULD NOT BE lesser than shortest path time
+        agent_shortest_path_time = agent_shortest_path_times[agent.handle]
+        agent_travel_time_max = int(np.ceil((agent_shortest_path_time * travel_buffer_multiplier) \
+                                            + (mean_shortest_path_time * mean_shortest_path_multiplier)))
+        
         departure_window_max = latest_arrival_max - agent_travel_time_max
 
         earliest_departure = np_random.randint(0, departure_window_max)
@@ -124,6 +150,9 @@ def schedule_time_generator(agents: List[EnvAgent], distance_map: DistanceMap, s
     # save_sp_fig()
     # #### DATA COLLECTION *************************
 
+    # returns max_episode_steps after deciding on the new value 
+    return max_episode_steps
+
 
 # #### DATA COLLECTION *************************
 # # Histogram 1