run.sh 3.5 KB
Newer Older
1
#!/bin/bash
2
set -e
3

4
5
6
#########################################
# Your experiment file for submission   #
#########################################
7

Dipam Chakraborty's avatar
Dipam Chakraborty committed
8
# export EXPERIMENT_DEFAULT="experiments/impala-baseline.yaml"
Dipam Chakraborty's avatar
Dipam Chakraborty committed
9
# export EXPERIMENT_DEFAULT="experiments/custom-torch-ppo.yaml"
Dipam Chakraborty's avatar
Dipam Chakraborty committed
10
11
# export EXPERIMENT_DEFAULT="experiments/custom-ppg.yaml"
export EXPERIMENT_DEFAULT="experiments/ppg-experimental.yaml"
12
export EXPERIMENT=${EXPERIMENT:-$EXPERIMENT_DEFAULT}
13

14
15
16
17
18
19
20
21
if [[ -z $AICROWD_IS_GRADING ]]; then
  ##########################################################################
  # This section contains commands you would like to run, when running     #
  # the codebase on your machines. During evaluation AICROWD_IS_GRADING    #
  # variable is set, due to which this block will be skipped.              #
  ##########################################################################

  export OUTPUTS_DIR=./outputs
Dipam Chakraborty's avatar
Dipam Chakraborty committed
22
  export RAY_MEMORY_LIMIT=40000000000
Dipam Chakraborty's avatar
8 cpus    
Dipam Chakraborty committed
23
  export RAY_CPUS=8
Dipam Chakraborty's avatar
Dipam Chakraborty committed
24
  export RAY_STORE_MEMORY=40000000000
25
26
27
28
29
30
31
32
33
34

  # Cleaning output directory between multiple runs
  rm -rf ${OUTPUTS_DIR}
  mkdir ${OUTPUTS_DIR}
fi


export VALID_RUN=false
print_banner() {
cat << BANNER
spmohanty's avatar
spmohanty committed
35
36
37
38
39
40
41
           _____                          _ 
     /\   |_   _|                        | |
    /  \    | |  ___ _ __ _____      ____| |
   / /\ \   | | / __| '__/ _ \ \ /\ / / _  |
  / ____ \ _| || (__| | | (_) \ V  V / (_| |
 /_/    \_\_____\___|_|  \___/ \_/\_/ \__,_|
 
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
BANNER
}

print_usage() {
cat << USAGE
Available Parameters

--train: for training your agent
--rollout: for agent rollout

Important Environment Variables
EXPERIMENT: path to experiment file you want to use, default: $EXPERIMENT_DEFAULT
CHECKPOINT: (for rollout) path to checkpoint directory, tries to detect automatically otherwise, assumes data present at ~/ray_results.
EPISODES: (for rollout) number of episodes, default: 5
USAGE
}

detect_latest_checkpoint() {
  export DIRECTORY="$HOME/ray_results"
  export EXPERIMENT_NAME=$(cat $EXPERIMENT | grep -Ev '^ |^#' | head -n1 | awk -F':' '{print $1}')
  export LATEST_EXECUTION_DIRECTORY=$(ls -trd $DIRECTORY/$EXPERIMENT_NAME/*/ | tail -n 1)
  if [ -z $LATEST_EXECUTION_DIRECTORY ]; then
    echo "Cannot find checkpoint in ~/ray_results, have you run training phase yet?"
    exit;
  fi
  export LATEST_CHECKPOINT_DIRECTORY=$(ls -trd ${LATEST_EXECUTION_DIRECTORY%/}/*/ | tail -n 1)
  export CHECKPOINT=$(find ${LATEST_CHECKPOINT_DIRECTORY%/} | grep "tune_metadata" | head -n1 | awk -F'\.tune_metadata' '{print $1}')
}

print_banner

if [[ " $@ " =~ " --train " ]]; then
  export VALID_RUN=true
  echo "Executing: python train.py -f ${EXPERIMENT} --ray-memory ${RAY_MEMORY_LIMIT:-1500000000} --ray-num-cpus ${RAY_CPUS:-2} --ray-object-store-memory ${RAY_STORE_MEMORY:-1000000000}"
  python train.py -f ${EXPERIMENT} --ray-memory ${RAY_MEMORY_LIMIT:-1500000000} --ray-num-cpus ${RAY_CPUS:-2} --ray-object-store-memory ${RAY_STORE_MEMORY:-1000000000}
  STATUS_CODE=$?
fi


if [[ " $@ " =~ " --rollout " ]]; then
  export VALID_RUN=true
  export ROLLOUT_RUN=$(cat $EXPERIMENT | grep '  run:' | awk '{print $2}')
  export ROLLOUT_ENV=$(cat $EXPERIMENT | grep '  env:' | awk '{print $2}')

  if [ -z $CHECKPOINT ]; then
    detect_latest_checkpoint
  fi
  echo "Rollout with checkpoint: $CHECKPOINT"
  echo "Executing: python ./rollout.py $CHECKPOINT --episodes ${EPISODES:-5} --run $ROLLOUT_RUN --env $ROLLOUT_ENV"
  python ./rollout.py $CHECKPOINT --episodes ${EPISODES:-5} --run $ROLLOUT_RUN --env $ROLLOUT_ENV
  STATUS_CODE=$?
fi


if [ "$VALID_RUN" = false ] ; then
    print_usage
    STATUS_CODE=1
fi