From 95d2dc949cef5342cb6db732f236d102bc6ba6fb Mon Sep 17 00:00:00 2001 From: Dipam Chakraborty <dipam@aicrowd.com> Date: Thu, 11 Nov 2021 12:46:16 +0530 Subject: [PATCH] round 2 env with action repeats every 30ms --- .gitignore | 3 +- .../deepracer_gym/envs/deepracer_gym_env.py | 32 ++++++++++++++++++- deepracer-gym/random_actions_example.py | 8 +++-- deepracer-gym/start_deepracer_docker.sh | 2 +- 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 8686387..42968a8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__ deepracer-gym/deepracer_gym.egg-info/ -playground/ \ No newline at end of file +playground/ +deepracer-gym/dump.py diff --git a/deepracer-gym/deepracer_gym/envs/deepracer_gym_env.py b/deepracer-gym/deepracer_gym/envs/deepracer_gym_env.py index e497b6c..72dfb58 100644 --- a/deepracer-gym/deepracer_gym/envs/deepracer_gym_env.py +++ b/deepracer-gym/deepracer_gym/envs/deepracer_gym_env.py @@ -1,17 +1,47 @@ import numpy as np import gym from deepracer_gym.zmq_client import DeepracerEnvHelper - +import time +import warnings class DeepracerGymEnv(gym.Env): def __init__(self): self.action_space = gym.spaces.Discrete(5) self.deepracer_helper = DeepracerEnvHelper() + self.last_step_time = None + self.max_step_time = 0.03 # seconds def reset(self): observation = self.deepracer_helper.env_reset() return observation def step(self, action): + + if self.last_step_time is not None: + time_delta = time.time() - self.last_step_time - self.max_step_time + else: + time_delta = -1 + done = False + n_repeats = 0 + + # This is to emulate async nature of the real world track + # If action is not returned within required time limit, the same action would be repeated + while (not done) and time_delta > 0: + time_delta -= self.max_step_time + observation, reward, done, info = self._step_sim(self.last_action) + n_repeats += 1 + + if n_repeats > 0: + warn_msg = f"Action was repeated {n_repeats} times, try to reduce model step time to {self.max_step_time} seconds" + warnings.warn(warn_msg) + + if not done: + observation, reward, done, info = self._step_sim(action) + + self.last_action = action + self.last_step_time = time.time() + return observation, reward, done, info + + def _step_sim(self, action): rl_coach_obs = self.deepracer_helper.send_act_rcv_obs(action) observation, reward, done, info = self.deepracer_helper.unpack_rl_coach_obs(rl_coach_obs) return observation, reward, done, info diff --git a/deepracer-gym/random_actions_example.py b/deepracer-gym/random_actions_example.py index c496ee6..61de439 100644 --- a/deepracer-gym/random_actions_example.py +++ b/deepracer-gym/random_actions_example.py @@ -2,6 +2,7 @@ import gym import numpy as np import deepracer_gym +import time env = gym.make('deepracer_gym:deepracer-v0') @@ -13,8 +14,11 @@ steps_completed = 0 episodes_completed = 0 total_reward = 0 -for _ in range(500): +for i in range(500): observation, reward, done, info = env.step(np.random.randint(5)) + # Example to show the delay in model inference + if (i-5) % 5 == 0: + time.sleep(0.1) steps_completed += 1 total_reward += reward @@ -23,4 +27,4 @@ for _ in range(500): episodes_completed += 1 print("Episodes Completed:", episodes_completed, "Steps:", steps_completed, "Reward", total_reward) steps_completed = 0 - total_reward = 0 \ No newline at end of file + total_reward = 0 diff --git a/deepracer-gym/start_deepracer_docker.sh b/deepracer-gym/start_deepracer_docker.sh index cb04278..dd2f02a 100644 --- a/deepracer-gym/start_deepracer_docker.sh +++ b/deepracer-gym/start_deepracer_docker.sh @@ -1 +1 @@ -docker run -it --rm --name=deepracer -p 8888:8888 -p 5000:5000 --cpus="3" --memory="6g" aicrowd/base-images:deepracer_round1_release /bin/bash +docker run -it --rm --name=deepracer -p 8888:8888 -p 5000:5000 --cpus="3" --memory="6g" aicrowd/base-images:deepracer_round2_release /bin/bash -- GitLab