Merge branch 'spm/code-improvements' into 'master'

Spm/code improvements Closes #129 See merge request flatland/flatland!130

Merge branch 'spm/code-improvements' into 'master'
Spm/code improvements Closes #129 See merge request flatland/flatland!130
1a37a638 · mohanty · 53023fd5 · f7c0b7f1 · 1a37a638 · 1a37a638
Commit 1a37a638 authored 5 years ago by mohanty
--- a/flatland/__init__.py
+++ b/flatland/__init__.py
@@ -4,4 +4,4 @@

 __author__ = """S.P. Mohanty"""
 __email__ = 'mohanty@aicrowd.com'
-__version__ = '0.3.1'
+__version__ = '0.3.2'
--- a/flatland/evaluators/service.py
+++ b/flatland/evaluators/service.py
@@ -104,6 +104,9 @@ class FlatlandRemoteEvaluationService:
            "score": {
                "score": 0.0,
                "score_secondary": 0.0
+            },
+            "meta": {
+                "normalized_reward": 0.0
            }
        }
        
@@ -113,6 +116,7 @@ class FlatlandRemoteEvaluationService:
        self.reward = 0
        self.simulation_count = -1
        self.simulation_rewards = []
+        self.simulation_rewards_normalized = []
        self.simulation_percentage_complete = []
        self.simulation_steps = []
        self.simulation_times = []
@@ -318,6 +322,7 @@ class FlatlandRemoteEvaluationService:
            self.begin_simulation = time.time()

            self.simulation_rewards.append(0)
+            self.simulation_rewards_normalized.append(0)
            self.simulation_percentage_complete.append(0)
            self.simulation_steps.append(0)

@@ -348,12 +353,14 @@ class FlatlandRemoteEvaluationService:
                    self.simulation_count * 1.0 / len(self.env_file_paths),
                    0, 1)
        mean_reward = np.mean(self.simulation_rewards)
+        mean_normalized_reward = np.mean(self.simulation_rewards_normalized)
        mean_percentage_complete = np.mean(self.simulation_percentage_complete)
        self.evaluation_state["state"] = "IN_PROGRESS"
        self.evaluation_state["progress"] = progress
        self.evaluation_state["simulation_count"] = self.simulation_count
        self.evaluation_state["score"]["score"] = mean_percentage_complete
        self.evaluation_state["score"]["score_secondary"] = mean_reward
+        self.evaluation_state["meta"]["normalized_reward"] = mean_normalized_reward
        self.handle_aicrowd_info_event(self.evaluation_state)

    def handle_env_step(self, command):
@@ -379,6 +386,17 @@ class FlatlandRemoteEvaluationService:
        cumulative_reward = np.sum(list(all_rewards.values()))
        self.simulation_rewards[-1] += cumulative_reward
        self.simulation_steps[-1] += 1
+        """
+        The normalized rewards normalize the reward for an 
+        episode by dividing the whole reward by max-time-steps 
+        allowed in that episode, and the number of agents present in 
+        that episode
+        """
+        self.simulation_rewards_normalized[-1] += \
+            cumulative_reward / (
+                        self.env._max_episode_steps + 
+                        self.env.get_num_agents()
+                    )

        if done["__all__"]:
            # Compute percentage complete
@@ -440,6 +458,7 @@ class FlatlandRemoteEvaluationService:
            )
        
        mean_reward = np.mean(self.simulation_rewards)
+        mean_normalized_reward = np.mean(self.simulation_rewards_normalized)
        mean_percentage_complete = np.mean(self.simulation_percentage_complete)

        if self.visualize and len(os.listdir(self.vizualization_folder_name)) > 0:
@@ -474,6 +493,7 @@ class FlatlandRemoteEvaluationService:
        _command_response['type'] = messages.FLATLAND_RL.ENV_SUBMIT_RESPONSE
        _payload = {}
        _payload['mean_reward'] = mean_reward
+        _payload['mean_normalized_reward'] = mean_normalized_reward
        _payload['mean_percentage_complete'] = mean_percentage_complete
        _command_response['payload'] = _payload
        self.send_response(_command_response, command)
@@ -486,11 +506,13 @@ class FlatlandRemoteEvaluationService:
        self.evaluation_state["simulation_count"] = self.simulation_count
        self.evaluation_state["score"]["score"] = mean_percentage_complete
        self.evaluation_state["score"]["score_secondary"] = mean_reward
+        self.evaluation_state["meta"]["normalized_reward"] = mean_normalized_reward
        self.handle_aicrowd_success_event(self.evaluation_state)
        print("#"*100)
        print("EVALUATION COMPLETE !!")
        print("#"*100)
        print("# Mean Reward : {}".format(mean_reward))
+        print("# Mean Normalized Reward : {}".format(mean_normalized_reward))
        print("# Mean Percentage Complete : {}".format(mean_percentage_complete))
        print("#"*100)
        print("#"*100)

--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -5,7 +5,7 @@ pytest>=3.8.2
 pytest-runner>=4.2
 Click>=7.0
 crowdai-api>=0.1.21
-boto3>=1.9.194
+boto3
 numpy>=1.16.2
 recordtype>=1.3
 xarray>=0.11.3

--- a/setup.cfg
+++ b/setup.cfg
 [bumpversion]
-current_version = 0.3.1
+current_version = 0.3.2
 commit = True
 tag = True


--- a/setup.py
+++ b/setup.py
@@ -79,6 +79,6 @@ setup(
    test_suite='tests',
    tests_require=test_requirements,
    url='https://gitlab.aicrowd.com/flatland/flatland',
-    version='0.3.1',
+    version='0.3.2',
    zip_safe=False,
 )