Commit 5dcf58ef authored by Dipam Chakraborty's avatar Dipam Chakraborty
Browse files

add deepracer gym

parent 3474eebf
__pycache__
\ No newline at end of file
# Instructions for using the Deepracer Gym Environment
\ No newline at end of file
Metadata-Version: 1.0
Name: deepracer-gym
Version: 0.0.1
Summary: UNKNOWN
Home-page: UNKNOWN
Author: UNKNOWN
Author-email: UNKNOWN
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
README.md
setup.py
deepracer_gym.egg-info/PKG-INFO
deepracer_gym.egg-info/SOURCES.txt
deepracer_gym.egg-info/dependency_links.txt
deepracer_gym.egg-info/requires.txt
deepracer_gym.egg-info/top_level.txt
\ No newline at end of file
from gym.envs.registration import register
register(id="deepracer-v0", entry_point="deepracer_gym.envs:DeepracerGymEnv")
from deepracer_gym.envs.deepracer_gym_env import DeepracerGymEnv
\ No newline at end of file
import numpy as np
import gym
from deepracer_gym.zmq_client import DeepracerEnvHelper
class DeepracerGymEnv(gym.Env):
def __init__(self):
self.action_space = gym.spaces.Discrete(5)
self.deepracer_helper = DeepracerEnvHelper()
def reset(self):
observation = self.deepracer_helper.env_reset()
return observation
def step(self, action):
rl_coach_obs = self.deepracer_helper.send_act_rcv_obs(action)
observation, reward, done, info = self.deepracer_helper.unpack_rl_coach_obs(rl_coach_obs)
return observation, reward, done, info
if __name__ == '__main__':
env = DeepracerGymEnv()
obs = env.reset()
steps_completed = 0
episodes_completed = 0
total_reward = 0
for _ in range(500):
observation, reward, done, info = env.step(np.random.randint(5))
steps_completed += 1
total_reward += reward
if done:
episodes_completed += 1
print("Episodes Completed:", episodes_completed, "Steps:", steps_completed, "Reward", total_reward)
steps_completed = 0
total_reward = 0
import zmq
import msgpack
import msgpack_numpy as m
m.patch()
class DeepracerZMQClient:
def __init__(self, host="127.0.0.1", port=8888):
self.host = host
self.port = port
self.socket = zmq.Context().socket(zmq.REQ)
self.socket.set(zmq.SNDTIMEO, 20000)
self.socket.set(zmq.RCVTIMEO, 20000)
self.socket.connect(f"tcp://{self.host}:{self.port}")
def set_agent_ready(self):
packed_msg = msgpack.packb({"Agent Ready": 1})
self.socket.send(packed_msg)
def recieve_response(self):
packed_response = self.socket.recv()
response = msgpack.unpackb(packed_response)
return response
def send_msg(self, msg: dict):
packed_msg = msgpack.packb(msg)
self.socket.send(packed_msg)
response = self.recieve_response()
return response
class DeepracerEnvHelper:
def __init__(self):
self.zmq_client = DeepracerZMQClient()
self.zmq_client.set_agent_ready()
self.obs = None
self.previous_done = False
def send_act_rcv_obs(self, action):
action_dict = {"action": action}
self.obs = self.zmq_client.send_msg(action_dict)
self.previous_done = self.obs['_game_over']
return self.obs
def env_reset(self):
if self.obs is None: # First communication to zmq server
self.obs = self.zmq_client.recieve_response()
elif self.previous_done: # To prevent dummy episode on already done env
pass
else: # Can't reset env before episode completes - Passing '1' until episode completes
action = 1
done = False
while not done:
self.obs = self.send_act_rcv_obs(action)
done = self.obs['_game_over']
self.previous_done = True
return self.obs
def unpack_rl_coach_obs(self, rl_coach_obs):
observation = rl_coach_obs['_next_state']
reward = rl_coach_obs['_reward']
done = rl_coach_obs['_game_over']
info = rl_coach_obs['info']
if type(info) is not dict:
info = {}
info['goal'] = rl_coach_obs['_goal']
return observation, reward, done, info
if __name__ == "__main__":
client = DeepracerZMQClient()
packed_msg = msgpack.packb({"Ready": 1})
client.socket.send(packed_msg)
episodes_completed = 0
steps_completed = 0
while True:
packed_response = client.socket.recv()
env_response = msgpack.unpackb(packed_response)
steps_completed += 1
if env_response['_game_over']:
episodes_completed += 1
print("Episodes Completed:", episodes_completed, "Steps:", steps_completed)
steps_completed = 0
packed_action = msgpack.packb({"action": 1})
client.socket.send(packed_action)
import gym
import numpy as np
import deepracer_gym
env = gym.make('deepracer_gym:deepracer-v0')
obs = env.reset()
print("Deepracer Environment Connected succesfully")
steps_completed = 0
episodes_completed = 0
total_reward = 0
for _ in range(500):
observation, reward, done, info = env.step(np.random.randint(5))
steps_completed += 1
total_reward += reward
if done:
episodes_completed += 1
print("Episodes Completed:", episodes_completed, "Steps:", steps_completed, "Reward", total_reward)
steps_completed = 0
total_reward = 0
\ No newline at end of file
from setuptools import setup
setup(name="deepracer_gym", version="0.0.1", install_requires=["gym","zmq", "msgpack", "msgpack_numpy"])
docker run -it --rm --name=deepracer -p 8888:8888 -p 5000:5000 --cpus="3" --memory="6g" aicrowd/deepracer:round1_release /bin/bash
\ No newline at end of file
docker stop deepracer
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment