diff --git a/examples/play_model.py b/examples/play_model.py
index 04aa55c1e5a713cf7c49a9ac87bc1ba6babd4406..2c18c3e3fbf5e54320f3382ae158f542a2130080 100644
--- a/examples/play_model.py
+++ b/examples/play_model.py
@@ -149,7 +149,7 @@ def main(render=True, delay=0.0):
     for trials in range(1, n_trials + 1):
 
         # Reset environment
-        # obs = env.reset()
+        obs = env.reset()
 
         for a in range(env.number_of_agents):
             norm = max(1, max_lt(obs[a],np.inf))
diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py
index 6b0c64a17cd22b0d26073755e9d26509d53334cf..9327167047ec0c8af40e0d84bf598f7e069b2990 100644
--- a/flatland/envs/rail_env.py
+++ b/flatland/envs/rail_env.py
@@ -508,6 +508,7 @@ class RailEnv(Environment):
             for move_index in range(4):
                 if moves[move_index]:
                     valid_movements.append((direction, move_index))
+        # print("pos", rcPos, "targ", rcTarget, "valid movements", valid_movements)
 
         valid_starting_directions = []
         for m in valid_movements:
@@ -527,15 +528,15 @@ class RailEnv(Environment):
         if rcPos is None:
             rcPos = np.random.choice(len(self.valid_positions))
 
-        # iAgent = self.number_of_agents
-        self.number_of_agents += 1
-        
-        self.env.agents_position.append(rcPos)
-        self.env.agents_handles.append(max(self.env.agents_handles + [-1]) + 1)  # max(handles) + 1, starting at 0
-        self.env.agents_direction.append(0)
-        self.env.agents_target.append(rcPos)  # set the target to the origin initially
+        iAgent = self.number_of_agents
         
+        self.agents_position.append(tuple(rcPos))  # ensure it's a tuple not a list
+        self.agents_handles.append(max(self.agents_handles + [-1]) + 1)  # max(handles) + 1, starting at 0
+        self.agents_direction.append(0)
+        self.agents_target.append(rcPos)  # set the target to the origin initially
+        self.number_of_agents += 1        
         self.check_agent_lists()
+        return iAgent
     
     def reset(self, regen_rail=True, replace_agents=True):
         if regen_rail or self.rail is None:
@@ -572,12 +573,12 @@ class RailEnv(Environment):
                 re_generate = False
 
                 for i in range(self.number_of_agents):
-                    direction = self.pick_agent_direction(self.agents_position[i], self.agents_target)
+                    direction = self.pick_agent_direction(self.agents_position[i], self.agents_target[i])
                     if direction is None:
                         re_generate = True
                         break
                     else:
-                        self.agents_direction = direction
+                        self.agents_direction[i] = direction
 
                 # Jeremy extracted this into the method pick_agent_direction
                 if False:
diff --git a/flatland/utils/editor.py b/flatland/utils/editor.py
index 4c612b53c08798a7de0c4eab072cf540f8df0f20..97fda4839c4bd6c1dc4b50b4ba8a670aa7236f97 100644
--- a/flatland/utils/editor.py
+++ b/flatland/utils/editor.py
@@ -79,20 +79,15 @@ class JupEditor(object):
         rcCell = ((array([y, x]) - self.yxBase) / self.nPixCell).astype(int)
 
         if self.drawMode == "Origin":
-            self.env.add_agent(rcCell, rcCell, 0)
-            # self.iAgent = len(self.env.agents_position)
-            # self.env.agents_position.append(rcCell)
-            # self.env.agents_handles.append(max(self.env.agents_handles + [-1]) + 1)
-            # self.env.agents_direction.append(0)
-            # self.env.agents_target.append(rcCell)  # set the target to the origin initially
-            # self.env.number_of_agents = self.iAgent + 1
+            self.iAgent = self.env.add_agent(rcCell, rcCell, 0)
             self.drawMode = "Destination"
             self.player = None  # will need to start a new player
+
         elif self.drawMode == "Destination" and self.iAgent is not None:
             self.env.agents_target[self.iAgent] = rcCell
             self.drawMode = "Origin"
         
-        self.log("agent", self.drawMode, self.iAgent, rcCell)
+        #self.log("agent", self.drawMode, self.iAgent, rcCell)
 
         self.redraw()
 
@@ -259,7 +254,7 @@ class JupEditor(object):
     def step_event(self, event=None):
         if self.player is None:
             self.player = Player(self.env)
-            self.env.reset(regen_rail=False)
+            self.env.reset(regen_rail=False, replace_agents=False)
         self.player.step()
         self.redraw()
 
@@ -275,7 +270,7 @@ class JupEditor(object):
     def bg_updater(self):
         try:
             for i in range(20):
-                self.log("step ", i)
+                #self.log("step ", i)
                 self.step_event()
                 time.sleep(0.2)
         finally: