Commit 95d2dc94 authored by Dipam Chakraborty's avatar Dipam Chakraborty
Browse files

round 2 env with action repeats every 30ms

parent f1108999
__pycache__ __pycache__
deepracer-gym/deepracer_gym.egg-info/ deepracer-gym/deepracer_gym.egg-info/
playground/ playground/
\ No newline at end of file deepracer-gym/dump.py
import numpy as np import numpy as np
import gym import gym
from deepracer_gym.zmq_client import DeepracerEnvHelper from deepracer_gym.zmq_client import DeepracerEnvHelper
import time
import warnings
class DeepracerGymEnv(gym.Env): class DeepracerGymEnv(gym.Env):
def __init__(self): def __init__(self):
self.action_space = gym.spaces.Discrete(5) self.action_space = gym.spaces.Discrete(5)
self.deepracer_helper = DeepracerEnvHelper() self.deepracer_helper = DeepracerEnvHelper()
self.last_step_time = None
self.max_step_time = 0.03 # seconds
def reset(self): def reset(self):
observation = self.deepracer_helper.env_reset() observation = self.deepracer_helper.env_reset()
return observation return observation
def step(self, action): def step(self, action):
if self.last_step_time is not None:
time_delta = time.time() - self.last_step_time - self.max_step_time
else:
time_delta = -1
done = False
n_repeats = 0
# This is to emulate async nature of the real world track
# If action is not returned within required time limit, the same action would be repeated
while (not done) and time_delta > 0:
time_delta -= self.max_step_time
observation, reward, done, info = self._step_sim(self.last_action)
n_repeats += 1
if n_repeats > 0:
warn_msg = f"Action was repeated {n_repeats} times, try to reduce model step time to {self.max_step_time} seconds"
warnings.warn(warn_msg)
if not done:
observation, reward, done, info = self._step_sim(action)
self.last_action = action
self.last_step_time = time.time()
return observation, reward, done, info
def _step_sim(self, action):
rl_coach_obs = self.deepracer_helper.send_act_rcv_obs(action) rl_coach_obs = self.deepracer_helper.send_act_rcv_obs(action)
observation, reward, done, info = self.deepracer_helper.unpack_rl_coach_obs(rl_coach_obs) observation, reward, done, info = self.deepracer_helper.unpack_rl_coach_obs(rl_coach_obs)
return observation, reward, done, info return observation, reward, done, info
......
...@@ -2,6 +2,7 @@ import gym ...@@ -2,6 +2,7 @@ import gym
import numpy as np import numpy as np
import deepracer_gym import deepracer_gym
import time
env = gym.make('deepracer_gym:deepracer-v0') env = gym.make('deepracer_gym:deepracer-v0')
...@@ -13,8 +14,11 @@ steps_completed = 0 ...@@ -13,8 +14,11 @@ steps_completed = 0
episodes_completed = 0 episodes_completed = 0
total_reward = 0 total_reward = 0
for _ in range(500): for i in range(500):
observation, reward, done, info = env.step(np.random.randint(5)) observation, reward, done, info = env.step(np.random.randint(5))
# Example to show the delay in model inference
if (i-5) % 5 == 0:
time.sleep(0.1)
steps_completed += 1 steps_completed += 1
total_reward += reward total_reward += reward
...@@ -23,4 +27,4 @@ for _ in range(500): ...@@ -23,4 +27,4 @@ for _ in range(500):
episodes_completed += 1 episodes_completed += 1
print("Episodes Completed:", episodes_completed, "Steps:", steps_completed, "Reward", total_reward) print("Episodes Completed:", episodes_completed, "Steps:", steps_completed, "Reward", total_reward)
steps_completed = 0 steps_completed = 0
total_reward = 0 total_reward = 0
\ No newline at end of file
docker run -it --rm --name=deepracer -p 8888:8888 -p 5000:5000 --cpus="3" --memory="6g" aicrowd/base-images:deepracer_round1_release /bin/bash docker run -it --rm --name=deepracer -p 8888:8888 -p 5000:5000 --cpus="3" --memory="6g" aicrowd/base-images:deepracer_round2_release /bin/bash
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment