Skip to content
Snippets Groups Projects
Commit 1a37a638 authored by mohanty's avatar mohanty
Browse files

Merge branch 'spm/code-improvements' into 'master'

Spm/code improvements

Closes #129

See merge request flatland/flatland!130
parents 53023fd5 f7c0b7f1
No related branches found
No related tags found
No related merge requests found
...@@ -4,4 +4,4 @@ ...@@ -4,4 +4,4 @@
__author__ = """S.P. Mohanty""" __author__ = """S.P. Mohanty"""
__email__ = 'mohanty@aicrowd.com' __email__ = 'mohanty@aicrowd.com'
__version__ = '0.3.1' __version__ = '0.3.2'
...@@ -104,6 +104,9 @@ class FlatlandRemoteEvaluationService: ...@@ -104,6 +104,9 @@ class FlatlandRemoteEvaluationService:
"score": { "score": {
"score": 0.0, "score": 0.0,
"score_secondary": 0.0 "score_secondary": 0.0
},
"meta": {
"normalized_reward": 0.0
} }
} }
...@@ -113,6 +116,7 @@ class FlatlandRemoteEvaluationService: ...@@ -113,6 +116,7 @@ class FlatlandRemoteEvaluationService:
self.reward = 0 self.reward = 0
self.simulation_count = -1 self.simulation_count = -1
self.simulation_rewards = [] self.simulation_rewards = []
self.simulation_rewards_normalized = []
self.simulation_percentage_complete = [] self.simulation_percentage_complete = []
self.simulation_steps = [] self.simulation_steps = []
self.simulation_times = [] self.simulation_times = []
...@@ -318,6 +322,7 @@ class FlatlandRemoteEvaluationService: ...@@ -318,6 +322,7 @@ class FlatlandRemoteEvaluationService:
self.begin_simulation = time.time() self.begin_simulation = time.time()
self.simulation_rewards.append(0) self.simulation_rewards.append(0)
self.simulation_rewards_normalized.append(0)
self.simulation_percentage_complete.append(0) self.simulation_percentage_complete.append(0)
self.simulation_steps.append(0) self.simulation_steps.append(0)
...@@ -348,12 +353,14 @@ class FlatlandRemoteEvaluationService: ...@@ -348,12 +353,14 @@ class FlatlandRemoteEvaluationService:
self.simulation_count * 1.0 / len(self.env_file_paths), self.simulation_count * 1.0 / len(self.env_file_paths),
0, 1) 0, 1)
mean_reward = np.mean(self.simulation_rewards) mean_reward = np.mean(self.simulation_rewards)
mean_normalized_reward = np.mean(self.simulation_rewards_normalized)
mean_percentage_complete = np.mean(self.simulation_percentage_complete) mean_percentage_complete = np.mean(self.simulation_percentage_complete)
self.evaluation_state["state"] = "IN_PROGRESS" self.evaluation_state["state"] = "IN_PROGRESS"
self.evaluation_state["progress"] = progress self.evaluation_state["progress"] = progress
self.evaluation_state["simulation_count"] = self.simulation_count self.evaluation_state["simulation_count"] = self.simulation_count
self.evaluation_state["score"]["score"] = mean_percentage_complete self.evaluation_state["score"]["score"] = mean_percentage_complete
self.evaluation_state["score"]["score_secondary"] = mean_reward self.evaluation_state["score"]["score_secondary"] = mean_reward
self.evaluation_state["meta"]["normalized_reward"] = mean_normalized_reward
self.handle_aicrowd_info_event(self.evaluation_state) self.handle_aicrowd_info_event(self.evaluation_state)
def handle_env_step(self, command): def handle_env_step(self, command):
...@@ -379,6 +386,17 @@ class FlatlandRemoteEvaluationService: ...@@ -379,6 +386,17 @@ class FlatlandRemoteEvaluationService:
cumulative_reward = np.sum(list(all_rewards.values())) cumulative_reward = np.sum(list(all_rewards.values()))
self.simulation_rewards[-1] += cumulative_reward self.simulation_rewards[-1] += cumulative_reward
self.simulation_steps[-1] += 1 self.simulation_steps[-1] += 1
"""
The normalized rewards normalize the reward for an
episode by dividing the whole reward by max-time-steps
allowed in that episode, and the number of agents present in
that episode
"""
self.simulation_rewards_normalized[-1] += \
cumulative_reward / (
self.env._max_episode_steps +
self.env.get_num_agents()
)
if done["__all__"]: if done["__all__"]:
# Compute percentage complete # Compute percentage complete
...@@ -440,6 +458,7 @@ class FlatlandRemoteEvaluationService: ...@@ -440,6 +458,7 @@ class FlatlandRemoteEvaluationService:
) )
mean_reward = np.mean(self.simulation_rewards) mean_reward = np.mean(self.simulation_rewards)
mean_normalized_reward = np.mean(self.simulation_rewards_normalized)
mean_percentage_complete = np.mean(self.simulation_percentage_complete) mean_percentage_complete = np.mean(self.simulation_percentage_complete)
if self.visualize and len(os.listdir(self.vizualization_folder_name)) > 0: if self.visualize and len(os.listdir(self.vizualization_folder_name)) > 0:
...@@ -474,6 +493,7 @@ class FlatlandRemoteEvaluationService: ...@@ -474,6 +493,7 @@ class FlatlandRemoteEvaluationService:
_command_response['type'] = messages.FLATLAND_RL.ENV_SUBMIT_RESPONSE _command_response['type'] = messages.FLATLAND_RL.ENV_SUBMIT_RESPONSE
_payload = {} _payload = {}
_payload['mean_reward'] = mean_reward _payload['mean_reward'] = mean_reward
_payload['mean_normalized_reward'] = mean_normalized_reward
_payload['mean_percentage_complete'] = mean_percentage_complete _payload['mean_percentage_complete'] = mean_percentage_complete
_command_response['payload'] = _payload _command_response['payload'] = _payload
self.send_response(_command_response, command) self.send_response(_command_response, command)
...@@ -486,11 +506,13 @@ class FlatlandRemoteEvaluationService: ...@@ -486,11 +506,13 @@ class FlatlandRemoteEvaluationService:
self.evaluation_state["simulation_count"] = self.simulation_count self.evaluation_state["simulation_count"] = self.simulation_count
self.evaluation_state["score"]["score"] = mean_percentage_complete self.evaluation_state["score"]["score"] = mean_percentage_complete
self.evaluation_state["score"]["score_secondary"] = mean_reward self.evaluation_state["score"]["score_secondary"] = mean_reward
self.evaluation_state["meta"]["normalized_reward"] = mean_normalized_reward
self.handle_aicrowd_success_event(self.evaluation_state) self.handle_aicrowd_success_event(self.evaluation_state)
print("#"*100) print("#"*100)
print("EVALUATION COMPLETE !!") print("EVALUATION COMPLETE !!")
print("#"*100) print("#"*100)
print("# Mean Reward : {}".format(mean_reward)) print("# Mean Reward : {}".format(mean_reward))
print("# Mean Normalized Reward : {}".format(mean_normalized_reward))
print("# Mean Percentage Complete : {}".format(mean_percentage_complete)) print("# Mean Percentage Complete : {}".format(mean_percentage_complete))
print("#"*100) print("#"*100)
print("#"*100) print("#"*100)
......
...@@ -5,7 +5,7 @@ pytest>=3.8.2 ...@@ -5,7 +5,7 @@ pytest>=3.8.2
pytest-runner>=4.2 pytest-runner>=4.2
Click>=7.0 Click>=7.0
crowdai-api>=0.1.21 crowdai-api>=0.1.21
boto3>=1.9.194 boto3
numpy>=1.16.2 numpy>=1.16.2
recordtype>=1.3 recordtype>=1.3
xarray>=0.11.3 xarray>=0.11.3
......
[bumpversion] [bumpversion]
current_version = 0.3.1 current_version = 0.3.2
commit = True commit = True
tag = True tag = True
......
...@@ -79,6 +79,6 @@ setup( ...@@ -79,6 +79,6 @@ setup(
test_suite='tests', test_suite='tests',
tests_require=test_requirements, tests_require=test_requirements,
url='https://gitlab.aicrowd.com/flatland/flatland', url='https://gitlab.aicrowd.com/flatland/flatland',
version='0.3.1', version='0.3.2',
zip_safe=False, zip_safe=False,
) )
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment