diff --git a/flatland/__init__.py b/flatland/__init__.py index d86974cacc8ec578627aff21603db4418faffdce..91abefb98e2d87630408e5fd5e2797f522f4365c 100644 --- a/flatland/__init__.py +++ b/flatland/__init__.py @@ -4,4 +4,4 @@ __author__ = """S.P. Mohanty""" __email__ = 'mohanty@aicrowd.com' -__version__ = '0.3.1' +__version__ = '0.3.2' diff --git a/flatland/evaluators/service.py b/flatland/evaluators/service.py index 0691d6328ce5fb1503f9a210970bd76fa0f3d584..e66161163dcdee48acc0ebcfb0bd6ba397ea72d0 100644 --- a/flatland/evaluators/service.py +++ b/flatland/evaluators/service.py @@ -104,6 +104,9 @@ class FlatlandRemoteEvaluationService: "score": { "score": 0.0, "score_secondary": 0.0 + }, + "meta": { + "normalized_reward": 0.0 } } @@ -113,6 +116,7 @@ class FlatlandRemoteEvaluationService: self.reward = 0 self.simulation_count = -1 self.simulation_rewards = [] + self.simulation_rewards_normalized = [] self.simulation_percentage_complete = [] self.simulation_steps = [] self.simulation_times = [] @@ -318,6 +322,7 @@ class FlatlandRemoteEvaluationService: self.begin_simulation = time.time() self.simulation_rewards.append(0) + self.simulation_rewards_normalized.append(0) self.simulation_percentage_complete.append(0) self.simulation_steps.append(0) @@ -348,12 +353,14 @@ class FlatlandRemoteEvaluationService: self.simulation_count * 1.0 / len(self.env_file_paths), 0, 1) mean_reward = np.mean(self.simulation_rewards) + mean_normalized_reward = np.mean(self.simulation_rewards_normalized) mean_percentage_complete = np.mean(self.simulation_percentage_complete) self.evaluation_state["state"] = "IN_PROGRESS" self.evaluation_state["progress"] = progress self.evaluation_state["simulation_count"] = self.simulation_count self.evaluation_state["score"]["score"] = mean_percentage_complete self.evaluation_state["score"]["score_secondary"] = mean_reward + self.evaluation_state["meta"]["normalized_reward"] = mean_normalized_reward self.handle_aicrowd_info_event(self.evaluation_state) def handle_env_step(self, command): @@ -379,6 +386,17 @@ class FlatlandRemoteEvaluationService: cumulative_reward = np.sum(list(all_rewards.values())) self.simulation_rewards[-1] += cumulative_reward self.simulation_steps[-1] += 1 + """ + The normalized rewards normalize the reward for an + episode by dividing the whole reward by max-time-steps + allowed in that episode, and the number of agents present in + that episode + """ + self.simulation_rewards_normalized[-1] += \ + cumulative_reward / ( + self.env._max_episode_steps + + self.env.get_num_agents() + ) if done["__all__"]: # Compute percentage complete @@ -440,6 +458,7 @@ class FlatlandRemoteEvaluationService: ) mean_reward = np.mean(self.simulation_rewards) + mean_normalized_reward = np.mean(self.simulation_rewards_normalized) mean_percentage_complete = np.mean(self.simulation_percentage_complete) if self.visualize and len(os.listdir(self.vizualization_folder_name)) > 0: @@ -474,6 +493,7 @@ class FlatlandRemoteEvaluationService: _command_response['type'] = messages.FLATLAND_RL.ENV_SUBMIT_RESPONSE _payload = {} _payload['mean_reward'] = mean_reward + _payload['mean_normalized_reward'] = mean_normalized_reward _payload['mean_percentage_complete'] = mean_percentage_complete _command_response['payload'] = _payload self.send_response(_command_response, command) @@ -486,11 +506,13 @@ class FlatlandRemoteEvaluationService: self.evaluation_state["simulation_count"] = self.simulation_count self.evaluation_state["score"]["score"] = mean_percentage_complete self.evaluation_state["score"]["score_secondary"] = mean_reward + self.evaluation_state["meta"]["normalized_reward"] = mean_normalized_reward self.handle_aicrowd_success_event(self.evaluation_state) print("#"*100) print("EVALUATION COMPLETE !!") print("#"*100) print("# Mean Reward : {}".format(mean_reward)) + print("# Mean Normalized Reward : {}".format(mean_normalized_reward)) print("# Mean Percentage Complete : {}".format(mean_percentage_complete)) print("#"*100) print("#"*100) diff --git a/requirements_dev.txt b/requirements_dev.txt index 619e276988353631bc098d9b24af56d03ade3545..d8e23acef9bea1486cfd2b6055ad8c7416414eb4 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -5,7 +5,7 @@ pytest>=3.8.2 pytest-runner>=4.2 Click>=7.0 crowdai-api>=0.1.21 -boto3>=1.9.194 +boto3 numpy>=1.16.2 recordtype>=1.3 xarray>=0.11.3 diff --git a/setup.cfg b/setup.cfg index 5f92a326f2bce7c4450019a04ed37cd4c5b6cc21..e85a00ffb12e4b13063dcdd3a26e7c4643e0702b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.1 +current_version = 0.3.2 commit = True tag = True diff --git a/setup.py b/setup.py index f6c9ae52e844c8d82aac42c663d257d34cb1f6b8..c749a01e14ff23eab132aba1c0a9965c6ba89255 100644 --- a/setup.py +++ b/setup.py @@ -79,6 +79,6 @@ setup( test_suite='tests', tests_require=test_requirements, url='https://gitlab.aicrowd.com/flatland/flatland', - version='0.3.1', + version='0.3.2', zip_safe=False, )