diff --git a/flatland/cli.py b/flatland/cli.py index 9bb7107476a0981fda13b05502949549ce8d4d31..51db57b3bc5c7e2889ac57df84c95a15b59685f7 100644 --- a/flatland/cli.py +++ b/flatland/cli.py @@ -61,13 +61,24 @@ def demo(args=None): help="Evaluation Service ID. This has to match the service id on the client.", required=False ) +@click.option('--shuffle', + type=bool, + default=True, + help="Shuffle the environments before starting evaluation.", + required=False + ) +@click.option('--disable_timeouts', + default=False, + help="Disable all evaluation timeouts.", + required=False + ) @click.option('--results_path', type=click.Path(exists=False), default=None, help="Path where the evaluator should write the results metadata.", required=False ) -def evaluator(tests, service_id, results_path): +def evaluator(tests, service_id, shuffle, disable_timeouts, results_path): try: redis_connection = redis.Redis() redis_connection.ping() @@ -82,7 +93,9 @@ def evaluator(tests, service_id, results_path): flatland_rl_service_id=service_id, visualize=False, result_output_path=results_path, - verbose=False + verbose=False, + shuffle=shuffle, + disable_timeouts=disable_timeouts ) grader.run() diff --git a/flatland/envs/agent_chains.py b/flatland/envs/agent_chains.py index 02734f463fd6566767226391e48772a30543bac4..a2792479c0877aef2b41a8b62af7d55f5ab46bb0 100644 --- a/flatland/envs/agent_chains.py +++ b/flatland/envs/agent_chains.py @@ -32,12 +32,12 @@ class MotionCheck(object): if xlabel: self.G.nodes[rc1]["xlabel"] = xlabel self.G.add_edge(rc1, rc2) - + def find_stops(self): """ find all the stopped agents as a set of rc position nodes A stopped agent is a self-loop on a cell node. """ - + # get the (sparse) adjacency matrix spAdj = nx.linalg.adjacency_matrix(self.G) @@ -45,7 +45,7 @@ class MotionCheck(object): # the where turns this into a list of indices of the 1s giStops = np.where(spAdj.diagonal())[0] - # convert the cell/node indices into the node rc values + # convert the cell/node indices into the node rc values lvAll = list(self.G.nodes()) # pick out the stops by their indices lvStops = [ lvAll[i] for i in giStops ] @@ -78,7 +78,7 @@ class MotionCheck(object): # Find all the stops in this chain svCompStops = svStops.intersection(Gwcc) #print(svCompStops) - + if len(svCompStops) > 0: # We need to traverse it in reverse - back up the movement edges @@ -90,7 +90,7 @@ class MotionCheck(object): iter_stops = nx.algorithms.traversal.dfs_postorder_nodes(Gwcc_rev, vStop) lStops = list(iter_stops) svBlocked.update(lStops) - + return svBlocked def find_swaps(self): @@ -102,7 +102,7 @@ class MotionCheck(object): llvSwaps = [lvLoop for lvLoop in llvLoops if len(lvLoop) == 2 ] svSwaps = { v for lvSwap in llvSwaps for v in lvSwap } return svSwaps - + def find_same_dest(self): """ find groups of agents which are trying to land on the same cell. ie there is a gap of one cell between them and they are both landing on it. @@ -123,22 +123,22 @@ class MotionCheck(object): elif v in svBlocked: self.G.nodes[v]["color"] = "red" elif len(dPred)>1: - + if self.G.nodes[v].get("color") == "red": continue - + if self.G.nodes[v].get("agent") is None: - self.G.nodes[v]["color"] = "blue" + self.G.nodes[v]["color"] = "blue" else: self.G.nodes[v]["color"] = "magenta" - + # predecessors of a contended cell diAgCell = {self.G.nodes[vPred].get("agent"): vPred for vPred in dPred} - + # remove the agent with the lowest index, who wins iAgWinner = min(diAgCell) diAgCell.pop(iAgWinner) - + # Block all the remaining predessors, and their tree of preds for iAg, v in diAgCell.items(): self.G.nodes[v]["color"] = "red" @@ -163,7 +163,7 @@ class MotionCheck(object): sColor = dAttr["color"] if sColor in [ "red", "purple" ]: return (False, rcPos) - + dSucc = self.G.succ[rcPos] # This should never happen - only the next cell of an agent has no successor @@ -179,7 +179,7 @@ class MotionCheck(object): return (True, rcNext) - + def render(omc:MotionCheck, horizontal=True): try: @@ -210,10 +210,10 @@ class ChainTestEnv(object): def addAgentToRow(self, c1, c2, xlabel=None): self.addAgent((self.iRowNext, c1), (self.iRowNext, c2), xlabel=xlabel) - - def create_test_chain(self, - nAgents:int, + + def create_test_chain(self, + nAgents:int, rcVel:Tuple[int] = (0,1), liStopped:List[int]=[], xlabel=None): @@ -227,7 +227,7 @@ class ChainTestEnv(object): else: rcVel1 = rcVel self.omc.addAgent(iAg+self.iAgNext, rcPos, (rcPos[0] + rcVel1[0], rcPos[1] + rcVel1[1]) ) - + if xlabel: self.omc.G.nodes[lrcAgPos[0]]["xlabel"] = xlabel @@ -237,7 +237,7 @@ class ChainTestEnv(object): def nextRow(self): self.iRowNext+=1 - + def create_test_agents(omc:MotionCheck): @@ -326,7 +326,7 @@ def create_test_agents2(omc:MotionCheck): cte.addAgentToRow(3, 2) cte.addAgent((cte.iRowNext+1, 2), (cte.iRowNext, 2)) cte.nextRow() - + if False: cte.nextRow() cte.nextRow() @@ -342,7 +342,7 @@ def create_test_agents2(omc:MotionCheck): cte.addAgentToRow(3, 4) cte.addAgent((cte.iRowNext+1, 3), (cte.iRowNext, 3)) cte.nextRow() - + cte.nextRow() cte.addAgentToRow(1, 2, "Tree") @@ -370,8 +370,8 @@ def test_agent_following(): lvCells = omc.G.nodes() - lColours = [ "magenta" if v in svStops - else "red" if v in svBlocked + lColours = [ "magenta" if v in svStops + else "red" if v in svBlocked else "purple" if v in svSwaps else "lightblue" for v in lvCells ] @@ -388,4 +388,3 @@ def main(): if __name__=="__main__": main() - \ No newline at end of file diff --git a/flatland/envs/rail_env.py b/flatland/envs/rail_env.py index b258e1d2f5bf72d33ad7470ca5a28f58260ec79d..d9ff7120789d5ad58ba4acc15d16586f0e34b827 100644 --- a/flatland/envs/rail_env.py +++ b/flatland/envs/rail_env.py @@ -154,11 +154,11 @@ class RailEnv(Environment): def __init__(self, width, height, - rail_generator = None, - schedule_generator = None, # : sched_gen.ScheduleGenerator = sched_gen.random_schedule_generator(), + rail_generator=None, + schedule_generator=None, # : sched_gen.ScheduleGenerator = sched_gen.random_schedule_generator(), number_of_agents=1, obs_builder_object: ObservationBuilder = GlobalObsForRailEnv(), - malfunction_generator_and_process_data=None, #mal_gen.no_malfunction_generator(), + malfunction_generator_and_process_data=None, # mal_gen.no_malfunction_generator(), malfunction_generator=None, remove_agents_at_target=True, random_seed=1, @@ -208,18 +208,18 @@ class RailEnv(Environment): elif malfunction_generator is not None: self.malfunction_generator = malfunction_generator # malfunction_process_data is not used - #self.malfunction_generator, self.malfunction_process_data = malfunction_generator_and_process_data + # self.malfunction_generator, self.malfunction_process_data = malfunction_generator_and_process_data self.malfunction_process_data = self.malfunction_generator.get_process_data() # replace default values here because we can't use default args values because of cyclic imports else: self.malfunction_generator = mal_gen.NoMalfunctionGen() self.malfunction_process_data = self.malfunction_generator.get_process_data() - #self.rail_generator: RailGenerator = rail_generator + # self.rail_generator: RailGenerator = rail_generator if rail_generator is None: rail_generator = rail_gen.random_rail_generator() self.rail_generator = rail_generator - #self.schedule_generator: ScheduleGenerator = schedule_generator + # self.schedule_generator: ScheduleGenerator = schedule_generator if schedule_generator is None: schedule_generator = sched_gen.random_schedule_generator() self.schedule_generator = schedule_generator @@ -266,13 +266,12 @@ class RailEnv(Environment): # save episode timesteps ie agent positions, orientations. (not yet actions / observations) self.record_steps = record_steps # whether to save timesteps # save timesteps in here: [[[row, col, dir, malfunction],...nAgents], ...nSteps] - self.cur_episode = [] - self.list_actions = [] # save actions in here + self.cur_episode = [] + self.list_actions = [] # save actions in here self.close_following = close_following # use close following logic self.motionCheck = ac.MotionCheck() - def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) random.seed(seed) @@ -304,8 +303,6 @@ class RailEnv(Environment): agent.reset() self.active_agents = [i for i in range(len(self.agents))] - - def action_required(self, agent): """ Check if an agent needs to provide an action @@ -365,7 +362,6 @@ class RailEnv(Environment): else: raise ValueError("Could not invoke __call__ or generate on rail_generator") - self.rail = rail self.height, self.width = self.rail.grid.shape @@ -377,8 +373,6 @@ class RailEnv(Environment): if optionals and 'distance_map' in optionals: self.distance_map.set(optionals['distance_map']) - - if regenerate_schedule or regenerate_rail or self.get_num_agents() == 0: agents_hints = None if optionals and 'agents_hints' in optionals: @@ -475,7 +469,6 @@ class RailEnv(Environment): """ - #malfunction: Malfunction = self.malfunction_generator(agent, self.np_random) if "generate" in dir(self.malfunction_generator): malfunction: mal_gen.Malfunction = self.malfunction_generator.generate(agent, self.np_random) else: @@ -564,15 +557,13 @@ class RailEnv(Environment): # Perform step on the agent self._step_agent_cf(i_agent, action_dict_.get(i_agent)) - # second loop: check for collisions / conflicts self.motionCheck.find_conflicts() - # third loop: update positions for i_agent, agent in enumerate(self.agents): self._step_agent2_cf(i_agent) - + # manage the boolean flag to check if all agents are indeed done (or done_removed) have_all_agents_ended &= (agent.status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED]) @@ -584,9 +575,6 @@ class RailEnv(Environment): # Fix agents that finished their malfunction such that they can perform an action in the next step self._fix_agent_after_malfunction(agent) - - - # Check for end of episode + set global reward to all rewards! if have_all_agents_ended: @@ -601,8 +589,6 @@ class RailEnv(Environment): return self._get_observations(), self.rewards_dict, self.dones, info_dict - - def _step_agent(self, i_agent, action: Optional[RailEnvActions] = None): """ Performs a step and step, start and stop penalty on a single agent in the following sub steps: @@ -714,7 +700,7 @@ class RailEnv(Environment): # Perform stored action to transition to the next cell as soon as cell is free # Notice that we've already checked new_cell_valid and transition valid when we stored the action, # so we only have to check cell_free now! - + # Traditional check that next cell is free # cell and transition validity was checked when we stored transition_action_on_cellexit! cell_free, new_cell_valid, new_direction, new_position, transition_valid = self._check_action_on_agent( @@ -727,7 +713,6 @@ class RailEnv(Environment): self._move_agent_to_new_position(agent, new_position) agent.direction = new_direction agent.speed_data['position_fraction'] = 0.0 - # has the agent reached its target? if np.equal(agent.position, agent.target).all(): @@ -758,7 +743,6 @@ class RailEnv(Environment): self.motionCheck.addAgent(i_agent, None, None) return - agent.old_direction = agent.direction agent.old_position = agent.position @@ -767,7 +751,7 @@ class RailEnv(Environment): if agent.malfunction_data['malfunction'] > 0: self.motionCheck.addAgent(i_agent, agent.position, agent.position) # agent will get penalty in step_agent2_cf - #self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed'] + # self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed'] return # Is the agent at the beginning of the cell? Then, it can take an action. @@ -827,14 +811,14 @@ class RailEnv(Environment): self.rewards_dict[i_agent] += self.invalid_action_penalty self.rewards_dict[i_agent] += self.stop_penalty agent.moving = False - self.motionCheck.addAgent(i_agent, agent.position, agent.position) + self.motionCheck.addAgent(i_agent, agent.position, agent.position) return if new_position is None: self.motionCheck.addAgent(i_agent, agent.position, agent.position) if agent.moving: print("Agent", i_agent, "new_pos none, but moving") - + # Check the pos_frac position fraction if agent.moving: agent.speed_data['position_fraction'] += agent.speed_data['speed'] @@ -853,27 +837,25 @@ class RailEnv(Environment): else: # This agent hasn't yet crossed the cell self.motionCheck.addAgent(i_agent, agent.position, agent.position) - - def _step_agent2_cf(self, i_agent): agent = self.agents[i_agent] - if agent.status in [ RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED ]: + if agent.status in [RailAgentStatus.DONE, RailAgentStatus.DONE_REMOVED]: return (move, rc_next) = self.motionCheck.check_motion(i_agent, agent.position) if agent.position is not None: sbTrans = format(self.rail.grid[agent.position], "016b") - trans_block = sbTrans[agent.direction*4 : agent.direction * 4 + 4] + trans_block = sbTrans[agent.direction * 4: agent.direction * 4 + 4] if (trans_block == "0000"): print (i_agent, agent.position, agent.direction, sbTrans, trans_block) # if agent cannot enter env, then we should have move=False - + if move: if agent.position is None: # agent is entering the env - #print(i_agent, "writing new pos ", rc_next, " into agent position (None)") + # print(i_agent, "writing new pos ", rc_next, " into agent position (None)") agent.position = rc_next agent.status = RailAgentStatus.ACTIVE agent.speed_data['position_fraction'] = 0.0 @@ -881,7 +863,7 @@ class RailEnv(Environment): else: # normal agent move cell_free, new_cell_valid, new_direction, new_position, transition_valid = self._check_action_on_agent( agent.speed_data['transition_action_on_cellexit'], agent) - + if not all([transition_valid, new_cell_valid]): print(f"ERRROR: step_agent2 invalid transition ag {i_agent} dir {new_direction} pos {agent.position} next {rc_next}") @@ -891,14 +873,14 @@ class RailEnv(Environment): f"stored action: {agent.speed_data['transition_action_on_cellexit']}") sbTrans = format(self.rail.grid[agent.position], "016b") - trans_block = sbTrans[agent.direction*4 : agent.direction * 4 + 4] + trans_block = sbTrans[agent.direction * 4: agent.direction * 4 + 4] if (trans_block == "0000"): print ("ERROR: ", i_agent, agent.position, agent.direction, sbTrans, trans_block) agent.position = rc_next agent.direction = new_direction - agent.speed_data['position_fraction'] = 0.0 - + agent.speed_data['position_fraction'] = 0.0 + # has the agent reached its target? if np.equal(agent.position, agent.target).all(): agent.status = RailAgentStatus.DONE @@ -912,10 +894,6 @@ class RailEnv(Environment): # step penalty if not moving (stopped now or before) self.rewards_dict[i_agent] += self.step_penalty * agent.speed_data['speed'] - - - - def _set_agent_to_initial_position(self, agent: EnvAgent, new_position: IntVector2D): """ Sets the agent to its initial position. Updates the agent object and the position @@ -1091,7 +1069,7 @@ class RailEnv(Environment): ------ Dict object """ - #print(f"_get_obs - num agents: {self.get_num_agents()} {list(range(self.get_num_agents()))}") + # print(f"_get_obs - num agents: {self.get_num_agents()} {list(range(self.get_num_agents()))}") self.obs_dict = self.obs_builder.get_many(list(range(self.get_num_agents()))) return self.obs_dict @@ -1110,8 +1088,6 @@ class RailEnv(Environment): """ return Grid4Transitions.get_entry_directions(self.rail.get_full_transitions(row, col)) - - def _exp_distirbution_synced(self, rate: float) -> float: """ Generates sample from exponential distribution @@ -1140,5 +1116,3 @@ class RailEnv(Environment): def save(self, filename): print("deprecated call to env.save() - pls call RailEnvPersister.save()") persistence.RailEnvPersister.save(self, filename) - - diff --git a/flatland/evaluators/service.py b/flatland/evaluators/service.py index dff9fe6448e3799c79739405f57325163a8e271f..7eef7f0667335cddd233a17327ea06960e8cfc97 100644 --- a/flatland/evaluators/service.py +++ b/flatland/evaluators/service.py @@ -9,6 +9,7 @@ import time import traceback import json import itertools +import re import crowdai_api import msgpack @@ -107,6 +108,7 @@ class FlatlandRemoteEvaluationService: shuffle=True, missing_only=False, result_output_path=None, + disable_timeouts=False ): # Episode recording properties @@ -121,6 +123,17 @@ class FlatlandRemoteEvaluationService: os.makedirs(self.mergeDir) self.use_pickle = use_pickle self.missing_only = missing_only + self.disable_timeouts = disable_timeouts + + if self.disable_timeouts: + print("=" * 20) + print("Timeout are DISABLED!") + print("=" * 20) + + if not shuffle: + print("=" * 20) + print("Env shuffling is DISABLED!") + print("=" * 20) # Test Env folder Paths self.test_env_folder = test_env_folder @@ -202,6 +215,7 @@ class FlatlandRemoteEvaluationService: self.simulation_steps = [] self.simulation_times = [] self.env_step_times = [] + self.nb_malfunctioning_trains = [] self.begin_simulation = False self.current_step = 0 self.visualize = visualize @@ -279,12 +293,15 @@ class FlatlandRemoteEvaluationService:   ├── ....... └── Level_99.pkl """ - env_paths = sorted(glob.glob( - os.path.join( - self.test_env_folder, - "*/*.pkl" + env_paths = sorted( + glob.glob( + os.path.join( + self.test_env_folder, + "*/*.pkl" + ) ) - )) + ) + # Remove the root folder name from the individual # lists, so that we only have the path relative # to the test root folder @@ -292,13 +309,21 @@ class FlatlandRemoteEvaluationService: x, self.test_env_folder ) for x in env_paths]) + # Sort in proper order + def get_file_order(f): + numbers = re.findall(r'\d+', os.path.relpath(f)) + value = int(numbers[0]) * 1000 + int(numbers[1]) + return value + + env_paths.sort(key=get_file_order) + # if requested, only generate actions for those envs which don't already have them if self.mergeDir and self.missing_only: existing_paths = (itertools.chain.from_iterable( [glob.glob(os.path.join(self.mergeDir, f"envs/*.{ext}")) for ext in ["pkl", "mpk"]])) existing_paths = [os.path.relpath(sPath, self.mergeDir) for sPath in existing_paths] - env_paths = sorted(set(env_paths) - set(existing_paths)) + env_paths = set(env_paths) - set(existing_paths) return env_paths @@ -308,7 +333,7 @@ class FlatlandRemoteEvaluationService: information specific to each of the individual env evaluations. This loads the template CSV with pre-filled information from the - provided metadata.csv file, and fills it up with + provided metadata.csv file, and fills it up with evaluation runtime information. """ self.evaluation_metadata_df = None @@ -329,6 +354,7 @@ class FlatlandRemoteEvaluationService: self.evaluation_metadata_df["percentage_complete"] = np.nan self.evaluation_metadata_df["steps"] = np.nan self.evaluation_metadata_df["simulation_time"] = np.nan + self.evaluation_metadata_df["nb_malfunctioning_trains"] = np.nan # Add client specific columns # TODO: This needs refactoring @@ -341,7 +367,7 @@ class FlatlandRemoteEvaluationService: def update_evaluation_metadata(self): """ This function is called when we move from one simulation to another - and it simply tries to update the simulation specific information + and it simply tries to update the simulation specific information for the **previous** episode in the metadata_df if it exists. """ @@ -357,6 +383,7 @@ class FlatlandRemoteEvaluationService: _row.percentage_complete = self.simulation_percentage_complete[-1] _row.steps = self.simulation_steps[-1] _row.simulation_time = self.simulation_times[-1] + _row.nb_malfunctioning_trains = self.nb_malfunctioning_trains[-1] # TODO: This needs refactoring # Add controller_inference_time_metrics @@ -380,7 +407,7 @@ class FlatlandRemoteEvaluationService: last_simulation_env_file_path ] = _row - # Delete this key from the stats to ensure that it + # Delete this key from the stats to ensure that it # gets computed again from scratch in the next episode self.delete_key_in_running_stats( "current_episode_controller_inference_time") @@ -462,6 +489,9 @@ class FlatlandRemoteEvaluationService: """ COMMAND_TIMEOUT = 10 ** 6 + if self.disable_timeouts: + COMMAND_TIMEOUT = None + @timeout_decorator.timeout(COMMAND_TIMEOUT, use_signals=use_signals_in_timeout) # timeout for each command def _get_next_command(command_channel, _redis): """ @@ -576,7 +606,7 @@ class FlatlandRemoteEvaluationService: There are still test envs left that are yet to be evaluated """ test_env_file_path = self.env_file_paths[self.simulation_count] - print("Evaluating : {}".format(test_env_file_path)) + print("Evaluating {} ({}/{})".format(test_env_file_path, self.simulation_count, len(self.env_file_paths))) test_env_file_path = os.path.join( self.test_env_folder, test_env_file_path @@ -589,11 +619,6 @@ class FlatlandRemoteEvaluationService: obs_builder_object=DummyObservationBuilder(), record_steps=True) - if self.begin_simulation: - # If begin simulation has already been initialized - # atleast once - # This adds the simulation time for the previous episode - self.simulation_times.append(time.time() - self.begin_simulation) self.begin_simulation = time.time() # Update evaluation metadata for the previous episode @@ -610,6 +635,7 @@ class FlatlandRemoteEvaluationService: self.simulation_rewards_normalized.append(0) self.simulation_percentage_complete.append(0) self.simulation_steps.append(0) + self.nb_malfunctioning_trains.append(0) self.current_step = 0 @@ -715,6 +741,12 @@ class FlatlandRemoteEvaluationService: self.env.get_num_agents() ) + num_malfunctioning = sum(agent.malfunction_data['malfunction'] > 0 for agent in self.env.agents) + if (num_malfunctioning > 0): + print(num_malfunctioning, "agent malfunctioning at step", self.current_step) + + self.nb_malfunctioning_trains[-1] += num_malfunctioning + # record the actions before checking for done if self.actionDir is not None: self.lActions.append(action) @@ -723,6 +755,11 @@ class FlatlandRemoteEvaluationService: if done["__all__"]: self.simulation_done = True + if self.begin_simulation: + # If begin simulation has already been initialized at least once + # This adds the simulation time for the previous episode + self.simulation_times.append(time.time() - self.begin_simulation) + # Compute percentage complete complete = 0 for i_agent in range(self.env.get_num_agents()): @@ -732,12 +769,19 @@ class FlatlandRemoteEvaluationService: percentage_complete = complete * 1.0 / self.env.get_num_agents() self.simulation_percentage_complete[-1] = percentage_complete - print("Evaluation finished in {} timesteps. Percentage agents done: {:.3f}. Normalized reward: {:.3f}.".format( + print("Evaluation finished in {} timesteps, {:.3f} seconds. Percentage agents done: {:.3f}. Normalized reward: {:.3f}. Number of malfunctions: {}.".format( self.simulation_steps[-1], + self.simulation_times[-1], self.simulation_percentage_complete[-1], - self.simulation_rewards_normalized[-1] + self.simulation_rewards_normalized[-1], + self.nb_malfunctioning_trains[-1] )) + # Write intermediate results + if self.result_output_path: + self.evaluation_metadata_df.to_csv(self.result_output_path) + print("Wrote intermediate output results to : {}".format(self.result_output_path)) + if self.actionDir is not None: self.save_actions() @@ -883,7 +927,7 @@ class FlatlandRemoteEvaluationService: self.evaluation_metadata_df.to_csv(self.result_output_path) print("Wrote output results to : {}".format(self.result_output_path)) - # Upload the metadata file to S3 + # Upload the metadata file to S3 if aicrowd_helpers.is_grading() and aicrowd_helpers.is_aws_configured(): metadata_s3_key = aicrowd_helpers.upload_to_s3( self.result_output_path @@ -925,7 +969,7 @@ class FlatlandRemoteEvaluationService: ################################################################################# # Compute the mean rewards, mean normalized_reward and mean_percentage_complete # we group all the results by the test_ids - # so we first compute the mean in each of the test_id groups, + # so we first compute the mean in each of the test_id groups, # and then we compute the mean across each of the test_id groups # # @@ -1004,10 +1048,11 @@ class FlatlandRemoteEvaluationService: self.simulation_rewards[-1] = self.env._max_episode_steps * self.env.get_num_agents() self.simulation_rewards_normalized[-1] = -1.0 - print("Evaluation TIMED OUT after {} timesteps, using max penalty. Percentage agents done: {:.3f}. Normalized reward: {:.3f}.".format( + print("Evaluation TIMED OUT after {} timesteps, using max penalty. Percentage agents done: {:.3f}. Normalized reward: {:.3f}. Number of malfunctions: {}".format( self.simulation_steps[-1], self.simulation_percentage_complete[-1], - self.simulation_rewards_normalized[-1] + self.simulation_rewards_normalized[-1], + self.nb_malfunctioning_trains[-1], )) self.timeout_counter += 1 @@ -1079,12 +1124,12 @@ class FlatlandRemoteEvaluationService: return _error ########################################### # We keep a record of the previous command - # to be able to have different behaviors + # to be able to have different behaviors # between different "command transitions" - # - # An example use case, is when we want to - # have a different timeout for the - # first step in every environment + # + # An example use case, is when we want to + # have a different timeout for the + # first step in every environment # to account for some initial planning time self.previous_command = command except Exception as e: @@ -1141,12 +1186,24 @@ if __name__ == "__main__": help="don't shuffle the envs. Default is to shuffle.", required=False) + parser.add_argument('--disableTimeouts', + default=False, + action="store_true", + help="Disable all timeouts.", + required=False) + parser.add_argument('--missingOnly', default=False, action="store_true", help="only request the envs/actions which are missing", required=False) + parser.add_argument('--resultsDir', + default="/tmp/output.csv", + help="Results CSV path", + required=False) + + parser.add_argument('--verbose', default=False, action="store_true", @@ -1162,13 +1219,14 @@ if __name__ == "__main__": verbose=args.verbose, visualize=True, video_generation_envs=["Test_0/Level_100.pkl"], - result_output_path="/tmp/output.csv", + result_output_path=args.resultsDir, actionDir=args.actionDir, episodeDir=args.episodeDir, mergeDir=args.mergeDir, use_pickle=args.pickle, shuffle=not args.noShuffle, missing_only=args.missingOnly, + disable_timeouts=args.disableTimeouts ) result = grader.run() if result['type'] == messages.FLATLAND_RL.ENV_SUBMIT_RESPONSE: diff --git a/requirements_dev.txt b/requirements_dev.txt index aff1d72c46cc877d72683059f0e428c6e9f054c3..93414562b79e3c0d5e1a77e42b967dc0ea4028fe 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -21,6 +21,5 @@ attrs gym==0.14.0 networkx ipycanvas -pygraphviz graphviz imageio