diff --git a/examples/play_model.py b/examples/play_model.py index 04aa55c1e5a713cf7c49a9ac87bc1ba6babd4406..2c18c3e3fbf5e54320f3382ae158f542a2130080 100644 --- a/examples/play_model.py +++ b/examples/play_model.py @@ -149,7 +149,7 @@ def main(render=True, delay=0.0): for trials in range(1, n_trials + 1): # Reset environment - # obs = env.reset() + obs = env.reset() for a in range(env.number_of_agents): norm = max(1, max_lt(obs[a],np.inf)) diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index 6b0c64a17cd22b0d26073755e9d26509d53334cf..9327167047ec0c8af40e0d84bf598f7e069b2990 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -508,6 +508,7 @@ class RailEnv(Environment): for move_index in range(4): if moves[move_index]: valid_movements.append((direction, move_index)) + # print("pos", rcPos, "targ", rcTarget, "valid movements", valid_movements) valid_starting_directions = [] for m in valid_movements: @@ -527,15 +528,15 @@ class RailEnv(Environment): if rcPos is None: rcPos = np.random.choice(len(self.valid_positions)) - # iAgent = self.number_of_agents - self.number_of_agents += 1 - - self.env.agents_position.append(rcPos) - self.env.agents_handles.append(max(self.env.agents_handles + [-1]) + 1) # max(handles) + 1, starting at 0 - self.env.agents_direction.append(0) - self.env.agents_target.append(rcPos) # set the target to the origin initially + iAgent = self.number_of_agents + self.agents_position.append(tuple(rcPos)) # ensure it's a tuple not a list + self.agents_handles.append(max(self.agents_handles + [-1]) + 1) # max(handles) + 1, starting at 0 + self.agents_direction.append(0) + self.agents_target.append(rcPos) # set the target to the origin initially + self.number_of_agents += 1 self.check_agent_lists() + return iAgent def reset(self, regen_rail=True, replace_agents=True): if regen_rail or self.rail is None: @@ -572,12 +573,12 @@ class RailEnv(Environment): re_generate = False for i in range(self.number_of_agents): - direction = self.pick_agent_direction(self.agents_position[i], self.agents_target) + direction = self.pick_agent_direction(self.agents_position[i], self.agents_target[i]) if direction is None: re_generate = True break else: - self.agents_direction = direction + self.agents_direction[i] = direction # Jeremy extracted this into the method pick_agent_direction if False: diff --git a/flatland/utils/editor.py b/flatland/utils/editor.py index 4c612b53c08798a7de0c4eab072cf540f8df0f20..97fda4839c4bd6c1dc4b50b4ba8a670aa7236f97 100644 --- a/flatland/utils/editor.py +++ b/flatland/utils/editor.py @@ -79,20 +79,15 @@ class JupEditor(object): rcCell = ((array([y, x]) - self.yxBase) / self.nPixCell).astype(int) if self.drawMode == "Origin": - self.env.add_agent(rcCell, rcCell, 0) - # self.iAgent = len(self.env.agents_position) - # self.env.agents_position.append(rcCell) - # self.env.agents_handles.append(max(self.env.agents_handles + [-1]) + 1) - # self.env.agents_direction.append(0) - # self.env.agents_target.append(rcCell) # set the target to the origin initially - # self.env.number_of_agents = self.iAgent + 1 + self.iAgent = self.env.add_agent(rcCell, rcCell, 0) self.drawMode = "Destination" self.player = None # will need to start a new player + elif self.drawMode == "Destination" and self.iAgent is not None: self.env.agents_target[self.iAgent] = rcCell self.drawMode = "Origin" - self.log("agent", self.drawMode, self.iAgent, rcCell) + #self.log("agent", self.drawMode, self.iAgent, rcCell) self.redraw() @@ -259,7 +254,7 @@ class JupEditor(object): def step_event(self, event=None): if self.player is None: self.player = Player(self.env) - self.env.reset(regen_rail=False) + self.env.reset(regen_rail=False, replace_agents=False) self.player.step() self.redraw() @@ -275,7 +270,7 @@ class JupEditor(object): def bg_updater(self): try: for i in range(20): - self.log("step ", i) + #self.log("step ", i) self.step_event() time.sleep(0.2) finally: