Skip to content
Snippets Groups Projects
Commit bf6cce10 authored by spmohanty's avatar spmohanty
Browse files

Fix typo in the variable storing simulation rewards

parent 3d054dc2
No related branches found
No related tags found
No related merge requests found
...@@ -70,7 +70,7 @@ class FlatlandRemoteEvaluationService: ...@@ -70,7 +70,7 @@ class FlatlandRemoteEvaluationService:
self.env_available = False self.env_available = False
self.reward = 0 self.reward = 0
self.simulation_count = 0 self.simulation_count = 0
self.simualation_rewards = [] self.simulation_rewards = []
self.simulation_percentage_complete = [] self.simulation_percentage_complete = []
self.simulation_steps = [] self.simulation_steps = []
self.simulation_times = [] self.simulation_times = []
...@@ -208,7 +208,7 @@ class FlatlandRemoteEvaluationService: ...@@ -208,7 +208,7 @@ class FlatlandRemoteEvaluationService:
self.simulation_times.append(time.time()-self.begin_simulation) self.simulation_times.append(time.time()-self.begin_simulation)
self.begin_simulation = time.time() self.begin_simulation = time.time()
self.simualation_rewards.append(0) self.simulation_rewards.append(0)
self.simulation_percentage_complete.append(0) self.simulation_percentage_complete.append(0)
self.simulation_steps.append(0) self.simulation_steps.append(0)
...@@ -259,7 +259,7 @@ class FlatlandRemoteEvaluationService: ...@@ -259,7 +259,7 @@ class FlatlandRemoteEvaluationService:
_observation, all_rewards, done, info = self.env.step(action) _observation, all_rewards, done, info = self.env.step(action)
cumulative_reward = np.sum(list(all_rewards.values())) cumulative_reward = np.sum(list(all_rewards.values()))
self.simualation_rewards[-1] += cumulative_reward self.simulation_rewards[-1] += cumulative_reward
self.simulation_steps[-1] += 1 self.simulation_steps[-1] += 1
if done["__all__"]: if done["__all__"]:
...@@ -299,10 +299,16 @@ class FlatlandRemoteEvaluationService: ...@@ -299,10 +299,16 @@ class FlatlandRemoteEvaluationService:
# Register simulation time of the last episode # Register simulation time of the last episode
self.simulation_times.append(time.time()-self.begin_simulation) self.simulation_times.append(time.time()-self.begin_simulation)
if len(self.simulation_rewards) != len(self.env_file_paths):
raise Exception(
"""env.submit called before the agent had the chance to operate on all the test environments.
"""
)
_response = {} _response = {}
_response['type'] = messages.FLATLAND_RL.ENV_SUBMIT_RESPONSE _response['type'] = messages.FLATLAND_RL.ENV_SUBMIT_RESPONSE
_payload = {} _payload = {}
_payload['mean_reward'] = np.mean(self.simualation_rewards) _payload['mean_reward'] = np.mean(self.simulation_rewards)
_payload['mean_percentage_complete'] = \ _payload['mean_percentage_complete'] = \
np.mean(self.simulation_percentage_complete) np.mean(self.simulation_percentage_complete)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment