diff --git a/.gitignore b/.gitignore index 868638762c6a4e7d96e2c91ff9be284b8002307d..42968a8bb47d3a942cc6f87a7abdea98a4d470eb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__ deepracer-gym/deepracer_gym.egg-info/ -playground/ \ No newline at end of file +playground/ +deepracer-gym/dump.py diff --git a/deepracer-gym/deepracer_gym/envs/deepracer_gym_env.py b/deepracer-gym/deepracer_gym/envs/deepracer_gym_env.py index e497b6cfc59ca2e38aa63d141c84460359e311b6..72dfb5839a31fca127ad0e5c22c36a450a8d2bd3 100644 --- a/deepracer-gym/deepracer_gym/envs/deepracer_gym_env.py +++ b/deepracer-gym/deepracer_gym/envs/deepracer_gym_env.py @@ -1,17 +1,47 @@ import numpy as np import gym from deepracer_gym.zmq_client import DeepracerEnvHelper - +import time +import warnings class DeepracerGymEnv(gym.Env): def __init__(self): self.action_space = gym.spaces.Discrete(5) self.deepracer_helper = DeepracerEnvHelper() + self.last_step_time = None + self.max_step_time = 0.03 # seconds def reset(self): observation = self.deepracer_helper.env_reset() return observation def step(self, action): + + if self.last_step_time is not None: + time_delta = time.time() - self.last_step_time - self.max_step_time + else: + time_delta = -1 + done = False + n_repeats = 0 + + # This is to emulate async nature of the real world track + # If action is not returned within required time limit, the same action would be repeated + while (not done) and time_delta > 0: + time_delta -= self.max_step_time + observation, reward, done, info = self._step_sim(self.last_action) + n_repeats += 1 + + if n_repeats > 0: + warn_msg = f"Action was repeated {n_repeats} times, try to reduce model step time to {self.max_step_time} seconds" + warnings.warn(warn_msg) + + if not done: + observation, reward, done, info = self._step_sim(action) + + self.last_action = action + self.last_step_time = time.time() + return observation, reward, done, info + + def _step_sim(self, action): rl_coach_obs = self.deepracer_helper.send_act_rcv_obs(action) observation, reward, done, info = self.deepracer_helper.unpack_rl_coach_obs(rl_coach_obs) return observation, reward, done, info diff --git a/deepracer-gym/random_actions_example.py b/deepracer-gym/random_actions_example.py index c496ee6c7a904c552ea57cc41409773ab8a51353..61de4393fa07941c881f42be187d58aa217e5748 100644 --- a/deepracer-gym/random_actions_example.py +++ b/deepracer-gym/random_actions_example.py @@ -2,6 +2,7 @@ import gym import numpy as np import deepracer_gym +import time env = gym.make('deepracer_gym:deepracer-v0') @@ -13,8 +14,11 @@ steps_completed = 0 episodes_completed = 0 total_reward = 0 -for _ in range(500): +for i in range(500): observation, reward, done, info = env.step(np.random.randint(5)) + # Example to show the delay in model inference + if (i-5) % 5 == 0: + time.sleep(0.1) steps_completed += 1 total_reward += reward @@ -23,4 +27,4 @@ for _ in range(500): episodes_completed += 1 print("Episodes Completed:", episodes_completed, "Steps:", steps_completed, "Reward", total_reward) steps_completed = 0 - total_reward = 0 \ No newline at end of file + total_reward = 0 diff --git a/deepracer-gym/start_deepracer_docker.sh b/deepracer-gym/start_deepracer_docker.sh index cb042782a383d259afb0d63f0fbd0799021767f6..dd2f02a632b995ceddac7f9efd800b0805d6899a 100644 --- a/deepracer-gym/start_deepracer_docker.sh +++ b/deepracer-gym/start_deepracer_docker.sh @@ -1 +1 @@ -docker run -it --rm --name=deepracer -p 8888:8888 -p 5000:5000 --cpus="3" --memory="6g" aicrowd/base-images:deepracer_round1_release /bin/bash +docker run -it --rm --name=deepracer -p 8888:8888 -p 5000:5000 --cpus="3" --memory="6g" aicrowd/base-images:deepracer_round2_release /bin/bash