Commit 038d0df2 authored by Dipam Chakraborty's avatar Dipam Chakraborty
Browse files

Added notebooks for workshop

parent f0ac22a2
FROM aicrowd/base-images:deepracer_round1_workshop
RUN apt-get install python3-setuptools
SHELL ["/bin/bash", "-c"]
RUN source ~/.bashrc && /root/anaconda/bin/python3 -m pip install jupyter jupyterlab jupyterhub
CMD ["/root/anaconda/bin/jupyter-lab", "--ip=0.0.0.0", "--port=8888", "--allow-root"]
......@@ -54,7 +54,7 @@ class Agent(nn.Module):
class TorchDeepracerAgent(DeepracerAgent):
def __init__(self):
self.model = torch.load('../deepracer-gym/runs/workshop_model.pth')
self.model = torch.load('../deepracer-gym/runs/workshop_model_final.pth')
self.device = 'cpu'
def get_acton(self, obs):
......@@ -69,4 +69,4 @@ class TorchDeepracerAgent(DeepracerAgent):
def compute_action(self, observations, info):
observation = observations['STEREO_CAMERAS']
self.get_action(observation)
return self.get_action(observation)
%% Cell type:markdown id: tags:
# Welcome to the AIcrowd Deepracer Workshop 🏎
![](https://i.imgur.com/59xcC0s.jpeg)
%% Cell type:markdown id: tags:
# Exploring the Deepracer simulator
## What is Deepracer
A 1/8 scale model of a racing car made by AWS as a learning tool for the use of machine learning for self driving.
## Why use the simulator
- The simulator provides a faster interface to train models
- It is open source and free to use
- It is safer than training on the real car
## What is the inputs and outputs to the simulator
**Outputs** - The simulator send the video output from stereo cameras on board the car.
**Inputs** - The simulator recieves the direction of movement for the car. In the reinforecement learning context, these are the actions.
%% Cell type:code id: tags:
``` python
# Some utilities
import matplotlib.pyplot as plt
def view_obs(obs):
plt.suptitle("Deepracer Stereo Camera Observation")
plt.subplot(1,2,1)
plt.title("Left Camera")
plt.imshow(obs[...,0], cmap='gray')
plt.subplot(1,2,2)
plt.title("Right Camera")
plt.imshow(obs[...,1], cmap='gray')
```
%% Cell type:markdown id: tags:
# What to the actions do
The environment takes 5 discrete actions
- 0 - Left 30 Degrees
- 1 - Left 15 Degrees
- 2 - Forward
- 3 - Right 15 Degrees
- 4 - Right 15 Degrees
**There is no stop action**
![](https://i.imgur.com/DF2vHND.png)
%% Cell type:markdown id: tags:
# Running the simulator
%% Cell type:code id: tags:
``` python
# Imports
import gym
import numpy as np
import deepracer_gym
# Make the deepracer environment in OpenAI gym
env = gym.make('deepracer_gym:deepracer-v0')
obs = env.reset()
print("Deepracer Environment Connected succesfully")
```
%% Cell type:markdown id: tags:
# Running random actions
%% Cell type:code id: tags:
``` python
steps_completed = 0
episodes_completed = 0
total_reward = 0
done = False
while not done:
# observation - Stereo camera images
# reward - Higher if car is closer to the center of the lane
# done - Simulation episode completed
# info - Extra information if any
observation, reward, done, info = env.step(np.random.randint(5))
steps_completed += 1
total_reward += reward
if done:
episodes_completed += 1
print("Episodes Completed:", episodes_completed, "Steps:", steps_completed, "Reward", total_reward)
steps_completed = 0
total_reward = 0
# Show a lot of plots with plt.pause
view_obs(observation)
plt.pause(1e-3)
```
%% Cell type:markdown id: tags:
# More information on the Deepracer environment and competition
📄 Documentation links
- [What is AWS Deepracer](https://docs.aws.amazon.com/deepracer/latest/developerguide/what-is-deepracer.html)
- [Deepracer homepage](https://aws.amazon.com/deepracer/)
- [ICRA Paper](https://ieeexplore.ieee.org/document/9197465)
💪 Challenge information
- [Challenge page](https://www.aicrowd.com/challenges/neurips-2021-aws-deepracer-ai-driving-olympics-challenge)
- [Leaderboard](https://www.aicrowd.com/challenges/neurips-2021-aws-deepracer-ai-driving-olympics-challenge/leaderboards)
🗣 Community
- [Challenge discussion forum](https://www.aicrowd.com/challenges/neurips-2021-aws-deepracer-ai-driving-olympics-challenge/discussion)
- [Discord server for challenge discussions](https://discord.gg/SwAdb6R8)
%% Cell type:markdown id: tags:
# Welcome to the AIcrowd Deepracer Workshop 🏎
![](https://i.imgur.com/59xcC0s.jpeg)
%% Cell type:markdown id: tags:
# Introduction to RL Training 💪
Deep Reinforcement Learning is essentially a **two step process**
- **Collecting Experience** - Interacting with the environment using the policy in the neural network model.
- **Training based on rewards** - Updating the model and hence the policy that interacts with the environment.
![](https://gradientscience.org/images/rl/RLSetup.jpg)
%% Cell type:markdown id: tags:
# Training Proximal Policy Optimization (PPO)
PPO is a widely used Deep RL algorithm developed by OpenAI - https://openai.com/blog/openai-baselines-ppo/
%% Cell type:code id: tags:
``` python
# Imports
import gym
import numpy as np
from ppo import run_ppo
# Code for PPO is adapted to the Deepracer env from CleanRL - https://github.com/vwxyzjn/cleanrl
```
%% Cell type:markdown id: tags:
# Training PPO for a few steps
%% Cell type:code id: tags:
``` python
hyperparams_shortrun = {
'exp_name': 'ppo', # Change this
'gym_id': 'deepracer_gym:deepracer-v0',
'cuda': False, # Change this if GPU available
'saved_model_name': 'workshop_model_shortrun.pth', # Change this to any name
'learning_rate': 0.001, # Learning rate for the neural network
'total_timesteps': 1000, # Total timesteps for the training
'anneal_lr': False, # Taper the learning rate to 0 as training is about to finish
'gamma': 0.995, # Depends on the expected episode length - Rule of thumb 1-(1/episode_length)
'num_minibatches': 4, # Number of minibatches to split the experience
'update_epochs': 3, # Epochs per policy update
'num_steps': 256, # Number of steps before making a policy update
'batch_size': 256, # Should be equal to num_steps * num_envs
'minibatch_size': 64, # Should be a factor of batch_size
}
run_ppo(hyperparams_shortrun)
```
%% Cell type:markdown id: tags:
# Start tensorboard to visualize your runs
This is optional
%% Cell type:code id: tags:
``` python
%tensorboard --logdir runs/
# This will work on most jupyter enviornments but may not work everywhere
# On VSCode - Click Launch Tensorboard Session on top to open in a new tab
# Alternatively start tensorboard from a terminal
# tensorboard --logdir neurips-2021-aws-deepracer-starter-kit/deepracer-gym/runs/
# Here's a useful link for those interested
# https://anthony-sarkis.medium.com/tensorboard-quick-start-in-5-minutes-e3ec69f673af
```
%% Cell type:markdown id: tags:
# Train your own model
%% Cell type:code id: tags:
``` python
hyperparams_final = {
'exp_name': 'ppo', # Change this
'gym_id': 'deepracer_gym:deepracer-v0',
'cuda': False, # Change this if GPU available
'saved_model_name': 'workshop_model_final.pth', # Change this to any name
'learning_rate': 0.001, # Learning rate for the neural network
'total_timesteps': 15000, # Total timesteps for the training
'anneal_lr': False, # Taper the learning rate to 0 as training is about to finish
'gamma': 0.995, # Depends on the expected episode length - Rule of thumb 1-(1/episode_length)
'num_minibatches': 4, # Number of minibatches to split the experience
'update_epochs': 3, # Epochs per policy update
'num_steps': 512, # Number of steps before making a policy update
'batch_size': 512, # Should be equal to num_steps * num_envs
'minibatch_size': 128, # Should be a factor of batch_size
}
# If you send a blank dictionary, default hyperparameters will be used
run_ppo(hyperparams_final)
```
%% Cell type:markdown id: tags:
# More information on the Deepracer environment and competition
📄 Documentation links
- [What is AWS Deepracer](https://docs.aws.amazon.com/deepracer/latest/developerguide/what-is-deepracer.html)
- [Deepracer homepage](https://aws.amazon.com/deepracer/)
- [ICRA Paper](https://ieeexplore.ieee.org/document/9197465)
💪 Challenge information
- [Challenge page](https://www.aicrowd.com/challenges/neurips-2021-aws-deepracer-ai-driving-olympics-challenge)
- [Leaderboard](https://www.aicrowd.com/challenges/neurips-2021-aws-deepracer-ai-driving-olympics-challenge/leaderboards)
🗣 Community
- [Challenge discussion forum](https://www.aicrowd.com/challenges/neurips-2021-aws-deepracer-ai-driving-olympics-challenge/discussion)
- [Discord server for challenge discussions](https://discord.gg/SwAdb6R8)
####
# Code adapted from CleanRL - https://github.com/vwxyzjn/cleanrl
####
import argparse
import os
import random
......@@ -27,7 +31,6 @@ default_hparams = {
'gamma': 0.995, # Tunable
'num_minibatches': 4, # Tunable
'update_epochs': 3, # Tunable
'clip_coef': 0.2, # Tunable
'num_steps': 512,
'batch_size': 512, # Should be equal to num_steps * num_envs
'minibatch_size': 64, # Should be a factor of batch_size
......@@ -36,6 +39,7 @@ default_hparams = {
'torch_deterministic': False,
'num_envs': 1, # Deepracer docker can't support more than 1
'gae': True,
'clip_coef': 0.2,
'gae_lambda': 0.95,
'norm_adv': True,
'clip_vloss': True,
......
......@@ -12,3 +12,5 @@ dependencies:
- tensorboard
- easydict
- gym
- jupyter
- jupyterlab
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment