Compare revisions

Eric Hambro · Eric Hambro · Eric Hambro · Eric Hambro · Eric Hambro · Eric Hambro
--- a/Dockerfile
+++ b/Dockerfile
-FROM nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04
-
+FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 AS nhc-dev
 ENV DEBIAN_FRONTEND=noninteractive

 COPY apt.txt /tmp/apt.txt
@@ -29,21 +28,32 @@ RUN adduser --disabled-password \
    --uid ${HOST_UID} \
    ${USER_NAME}

-USER ${USER_NAME}
-WORKDIR ${HOME_DIR}
-
-ENV CONDA_DIR ${HOME_DIR}/.conda
+WORKDIR /opt/

+# Install anaconda
+ENV CONDA_DIR /opt/conda
 RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_4.9.2-Linux-x86_64.sh \
 && bash miniconda.sh -b -p ${CONDA_DIR} \
 && . ${CONDA_DIR}/etc/profile.d/conda.sh \
 && conda clean -y -a \
 && rm -rf miniconda.sh
-
 ENV PATH ${CONDA_DIR}/bin:${PATH}

-RUN conda install cmake -y && conda clean -y -a
+# Install TorchBeast
+RUN conda install cmake cudatoolkit=10.2 pytorch -y -c pytorch -c nvidia && conda clean -y -a
+RUN git clone https://github.com/facebookresearch/torchbeast.git --recursive
+WORKDIR /opt/torchbeast
+RUN pip install -r requirements.txt
+RUN pip install ./nest
+RUN python setup.py install
+
+# Install AICrowd items
+WORKDIR ${HOME_DIR}
 COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt
 RUN pip install -r requirements.txt --no-cache-dir

+# SUBMISSION IMAGE - change user and copy files
+FROM nhc-dev AS nhc-submit
 COPY --chown=1001:1001 . ${HOME_DIR}
+USER ${USER_NAME}
+
--- a/README.md
+++ b/README.md
@@ -16,6 +16,28 @@ Quick Links:
 * [The NetHack Challenge - Starter Kit](https://gitlab.aicrowd.com/nethack/neurips-2021-the-nethack-challenge)
 * [IMPORTANT - Accept the rules before you submit](https://www.aicrowd.com/challenges/neurips-2021-nethack-challenge/challenge_rules)

+## Quick Start
+
+With Docker and x1 GPU
+```bash
+# 1. CLONE THE REPO AND DOWNLOAD BASELINE MODELS
+git clone http://gitlab.aicrowd.com/nethack/neurips-2021-the-nethack-challenge.git \
+    && cd neurips-2021-the-nethack-challenge \
+    && git lfs install \
+    && git lfs pull  
+
+# 2. START THE DOCKER IMAGE
+docker run -it -v `pwd`:/home/aicrowd --gpus 'all' fairnle/challenge:dev 
+
+# 3. TEST AN EXISTING SUBMISSION 
+python test_submission.py      # Tests ./saved_models/pretrained_0.5B
+
+# 3. TRAIN YOUR OWN
+python nethack_baselines/torchbeast/polyhydra.py batch_size=16 
+```
+
+To Troubleshoot see [here](#setting-up-details-docker).
+

 # Table of Contents
 1. [Intro to Nethack and the Nethack Challenge](#intro-to-nethack-and-the-nethack-challenge)
@@ -56,7 +78,7 @@ GitLab.

 ### How does submission work?

-The submission entrypoint is a bash script `run.sh`. When this script is 
+The submission entrypoint is a bash script `run.sh`, that runs in an environment defined by `Dockerfile`. When this script is 
 called, aicrowd will expect you to generate all your rollouts in the 
 allotted time, using `aicrowd_gym` in place of regular `gym`.  This means 
 that AIcrowd can make sure everyone is running the same environment, 
@@ -170,7 +192,8 @@ The machine where the submission will run will have following specifications:
 * 4 vCPUs
 * 16 GB RAM

-## Setting Up Details
+
+## Setting Up Details [No Docker]

 1. **Add your SSH key** to AIcrowd GitLab

@@ -224,11 +247,48 @@ The machine where the submission will run will have following specifications:

    Find more details on the [original nethack repository](https://github.com/facebookresearch/nle)

+## Setting Up Details [Docker]
+
+With Docker, setting up is very simple! Simply pull a preexisting image from the fair nle repo.
+
+```
+docker pull fairnle/challenge:dev 
+```
+This image is based of Ubuntu 18.04, with CUDA 10.2 and cudnn 7, and is the Docker image corresponding to the `nhc-dev` target in the `Dockerfile`. You can run it as follows:
+
+**Without GPUS**
+```
+docker run -it -v `pwd`:/home/aicrowd fairnle/challenge:dev
+```
+**With GPUS**
+```
+docker run -it -v `pwd`:/home/aicrowd --gpus 'all' fairnle/challenge:dev
+```
+
+*NB* On Linux, this `--gpus` argument requires you to install `nvidia-container-toolkit`, which on Ubuntu is available with `apt install`.
+
+This will take you into an image, with your current working directory mounted as a volume. At submission time, `nhc-submit` target will be built by AIcrowd, which copies all the files into the image, instead of simply mounting them.
+
+If you wish to wish to build your own dev environment from the Dockerfile, you can do this with:
+
+```
+docker build --target nhc-dev  -t your-image-name .
+```
+
+
 # Baselines

 Although we are looking to supply this repository with more baselines throughout the first month of the competition, this repository comes with a strong IMPALA-based baseline in the directory `./nethack_baselines/torchbeast`.

-Follow the instructions [here](/nethack_baselines/torchbeast/) to install and start training the model (there are even some suggestions for improvements).
+The [README](/nethack_baselines/torchbeast/READMEmd) has more info about the baselines, including to install and start training the model (there are even some suggestions for improvements).
+
+The TorchBeast baseline comes with two sets of weights - the same model trained to 250 million steps, and 500 million steps. 
+
+To download these weights, run `git lfs pull`, and check `saved_models`. 
+
+The TorchBeast agent can then be selected by setting `AGENT=TorchBeastAgent` in the `submission_config.py`, and the weights can be changed by changing the `MODEL_DIR` in `agents/torchbeast_agent.py`. 
+
+More information on git lfs can be found on [SUBMISSION.md](/docs/SUBMISSION.md). 


 # How to Test and Debug Locally

--- a/agents/torchbeast_agent.py
+++ b/agents/torchbeast_agent.py
@@ -5,7 +5,7 @@ from agents.base import BatchedAgent

 from nethack_baselines.torchbeast.models import load_model

-MODEL_DIR = "./models/torchbeast/example_run"
+MODEL_DIR = "./saved_models/torchbeast/pretrained_0.5B"


 class TorchBeastAgent(BatchedAgent):
@@ -16,8 +16,9 @@ class TorchBeastAgent(BatchedAgent):
    def __init__(self, num_envs, num_actions):
        super().__init__(num_envs, num_actions)
        self.model_dir = MODEL_DIR
-        self.device = "cuda:0"
+        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
        self.model = load_model(MODEL_DIR, self.device)
+        print(f'Using Model In: {self.model_dir}, Device: {self.device}')

        self.core_state = [
            m.to(self.device) for m in self.model.initial_state(batch_size=num_envs)
@@ -33,12 +34,12 @@ class TorchBeastAgent(BatchedAgent):
        """
        states = list(observations[0].keys())
        obs = {k: [] for k in states}
-        
+
        # Unpack List[Dicts] -> Dict[Lists]
        for o in observations:
            for k, t in o.items():
                obs[k].append(t)
-        
+
        # Convert to Tensor, Add Unroll Dim (=1), Move to GPU
        for k in states:
            obs[k] = torch.Tensor(np.stack(obs[k])[None, ...]).to(self.device)
@@ -54,8 +55,8 @@ class TorchBeastAgent(BatchedAgent):
            * return outputs as a dict of "action", "policy_logits", "baseline"
        """
        observations, dones = self.batch_inputs(observations, dones)
-    
+
        with torch.no_grad():
            outputs, self.core_state = self.model(observations, self.core_state)
-        
+
        return outputs["action"].cpu().numpy()[0]
--- a/apt.txt
+++ b/apt.txt
@@ -14,4 +14,6 @@ xvfb
 ffmpeg
 freeglut3-dev
 gcc
-g++
\ No newline at end of file
+g++
+ninja-build
+vim
--- a/nethack_baselines/torchbeast/Dockerfile
+++ b/nethack_baselines/torchbeast/Dockerfile
-# -*- mode: dockerfile -*-
-FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
-
-ARG PYTHON_VERSION=3.7
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update && apt-get install -yq \
-        bison \
-        build-essential \
-        cmake \
-        curl \
-        flex \
-        git \
-        libbz2-dev \
-        ninja-build \
-        software-properties-common \
-        wget \
-        apt-transport-https \
-        ca-certificates \
-        gnupg 
-
-# Install the latest cmake
-RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add -
-RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main'
-RUN apt-get update && apt-get --allow-unauthenticated install -yq cmake kitware-archive-keyring 
-
-# Install Conda
-WORKDIR /opt/conda_setup
-RUN curl -o miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
-     chmod +x miniconda.sh && \
-     ./miniconda.sh -b -p /opt/conda && \
-     /opt/conda/bin/conda install -y python=$PYTHON_VERSION && \
-     /opt/conda/bin/conda clean -ya
-ENV PATH /opt/conda/bin:$PATH
-
-# Create Env, Install Torch and Keep Env active 
-RUN conda init bash
-RUN conda create -n nle python=3.7
-RUN conda install -n nle  pytorch torchvision torchaudio cudatoolkit=11.1 -c pytorch -c nvidia
-ENV BASH_ENV ~/.bashrc
-SHELL ["conda", "run", "-n", "nle", "/bin/bash" ,"-c"]
-RUN python -c 'import torch'
-
-# Install TorchBeast
-WORKDIR /opt/
-RUN git clone https://github.com/facebookresearch/torchbeast.git --recursive
-
-WORKDIR /opt/torchbeast
-RUN pip install -r requirements.txt
-RUN pip install ./nest
-RUN python setup.py install
-
-# Create Workspace
-WORKDIR /opt/workspace
-RUN pip install nle \
-    hydra-core \ 
-    hydra_colorlog \
-    wandb \
-    einops
-
-RUN echo "conda activate nle" >> ~/.bashrc
-CMD ["/bin/bash"]
-
-# Docker commands:
-#   docker build -t nle  .
-#   docker run -v current_dir:/opt/workspace -it nle
--- a/nethack_baselines/torchbeast/README.md
+++ b/nethack_baselines/torchbeast/README.md
@@ -7,16 +7,41 @@ implementation of IMPALA for PyTorch.
 It comes with all the code you need to train, run and submit a model
 that is based on the results published in the original NLE paper.

-This implementation runs with 2 GPUS (one for acting and one for
+This implementation can run with 2 GPUS (one for acting and one for
 learning), and runs many simultaneous environments with dynamic
-batching.
+batching. Currently it has been configured to run with only 1 GPU.


-## Installation
+## Installation 
+
+**[Native Installation]**

 To get this running you'll need to follow the TorchBeast installation instructions for PolyBeast from the [TorchBeast repo](https://github.com/facebookresearch/torchbeast#faster-version-polybeast).

-A Dockerfile is also provided with installation of Torchbeast.
+**[Docker Installation]**
+
+You can fast track the installation of PolyBeast, by running the competitions own Dockerfile. Prebuilt images are also hosted on the Docker Hub. These commands should open an image that allows you run the baseline
+
+**To Run Existing Docker Image**
+
+`docker pull fairnle/challenge:dev`
+
+```docker run -it -v `pwd`:/home/aicrowd --gpus='all' fairnle/challenge:dev```
+
+**To Build Your Own Image**
+
+*Dev Image*  - runs with root user, doesn't copy all your files across into image
+
+`docker build -t competition --target nhc-dev .`
+
+*or Submission Image* - runs with aicrowd user, copies across all your files into image
+
+`docker build -t competition --target nhc-submit .`
+
+*Run Image*
+
+```docker run -it -v `pwd`:/home/aicrowd --gpus='all' competition```
+


 ## Running The Baseline

--- a/nethack_baselines/torchbeast/config.yaml
+++ b/nethack_baselines/torchbeast/config.yaml
@@ -73,7 +73,7 @@ unroll_length: 80            # The unroll length (time dimension).
 num_learner_threads: 1       # Number learner threads.
 num_inference_threads: 1     # Number inference threads.
 disable_cuda: false          # Disable CUDA.
-learner_device: cuda:1       # Set learner device.
+learner_device: cuda:0       # Set learner device.
 actor_device: cuda:0         # Set actor device.

 # OPTIMIZER settings. (RMS Prop)

--- a/nethack_baselines/torchbeast/models/baseline.py
+++ b/nethack_baselines/torchbeast/models/baseline.py
@@ -25,7 +25,7 @@ from .util import id_pairs_table
 import numpy as np

 NUM_GLYPHS = nethack.MAX_GLYPH
-NUM_FEATURES = nethack.BLSTATS_SHAPE[0]
+NUM_FEATURES = 25
 PAD_CHAR = 0
 NUM_CHARS = 256

@@ -420,7 +420,7 @@ class BLStatsEncoder(nn.Module):
    def forward(self, inputs):
        T, B, *_ = inputs["blstats"].shape

-        features = inputs["blstats"]
+        features = inputs["blstats"][:,:, :NUM_FEATURES]
        # -- [B' x F]
        features = features.view(T * B, -1).float()
        # -- [B x K]

--- a/nethack_baselines/torchbeast/polybeast_learner.py
+++ b/nethack_baselines/torchbeast/polybeast_learner.py
@@ -515,3 +515,6 @@ def main(flags):
            train(flags)
    elif flags.mode.startswith("test"):
        test(flags)
+    if flags.wandb:
+        wandb.finish()
+        
--- a/nethack_baselines/torchbeast/polyhydra.py
+++ b/nethack_baselines/torchbeast/polyhydra.py
@@ -143,6 +143,7 @@ def main(flags: DictConfig):
    for p in env_processes:
        p.kill()
        p.join()
+    print('Training Done!')


 if __name__ == "__main__":

--- a/notebooks/NetHackTutorial.ipynb
+++ b/notebooks/NetHackTutorial.ipynb
--- a/requirements.txt
+++ b/requirements.txt
 torch
 einops
-hydra-core
+hydra-core==1.0.6
 hydra_colorlog
 aicrowd-api
 aicrowd-gym
 numpy
 scipy
-nle
+nle>=0.7.2
 tqdm
 wandb
--- a/saved_models/torchbeast/pretrained_0.25B/checkpoint.tar
+++ b/saved_models/torchbeast/pretrained_0.25B/checkpoint.tar
--- a/saved_models/torchbeast/pretrained_0.25B/config.yaml
+++ b/saved_models/torchbeast/pretrained_0.25B/config.yaml
+name: 5
+wandb: true
+project: nethack_challenge
+entity: nethack
+group: baseline
+mock: false
+single_ttyrec: true
+num_seeds: 0
+write_profiler_trace: false
+fn_penalty_step: constant
+penalty_time: 0.0
+penalty_step: -0.01
+reward_lose: 0
+reward_win: 100
+state_counter: none
+character: '@'
+mode: train
+env: challenge
+num_actors: 256
+total_steps: 1000000000.0
+batch_size: 32
+unroll_length: 80
+num_learner_threads: 1
+num_inference_threads: 1
+disable_cuda: false
+learner_device: cuda:1
+actor_device: cuda:0
+learning_rate: 0.0002
+grad_norm_clipping: 40
+alpha: 0.99
+momentum: 0
+epsilon: 1.0e-06
+entropy_cost: 0.001
+baseline_cost: 0.5
+discounting: 0.999
+normalize_reward: true
+model: baseline
+use_lstm: true
+hidden_dim: 256
+embedding_dim: 64
+layers: 5
+crop_dim: 9
+use_index_select: true
+restrict_action_space: true
+msg:
+  hidden_dim: 64
+  embedding_dim: 32
+load_dir: null
--- a/saved_models/torchbeast/pretrained_0.5B/checkpoint.tar
+++ b/saved_models/torchbeast/pretrained_0.5B/checkpoint.tar
--- a/saved_models/torchbeast/pretrained_0.5B/config.yaml
+++ b/saved_models/torchbeast/pretrained_0.5B/config.yaml
+name: 5
+wandb: true
+project: nethack_challenge
+entity: nethack
+group: baseline
+mock: false
+single_ttyrec: true
+num_seeds: 0
+write_profiler_trace: false
+fn_penalty_step: constant
+penalty_time: 0.0
+penalty_step: -0.01
+reward_lose: 0
+reward_win: 100
+state_counter: none
+character: '@'
+mode: train
+env: challenge
+num_actors: 256
+total_steps: 1000000000.0
+batch_size: 32
+unroll_length: 80
+num_learner_threads: 1
+num_inference_threads: 1
+disable_cuda: false
+learner_device: cuda:1
+actor_device: cuda:0
+learning_rate: 0.0002
+grad_norm_clipping: 40
+alpha: 0.99
+momentum: 0
+epsilon: 1.0e-06
+entropy_cost: 0.001
+baseline_cost: 0.5
+discounting: 0.999
+normalize_reward: true
+model: baseline
+use_lstm: true
+hidden_dim: 256
+embedding_dim: 64
+layers: 5
+crop_dim: 9
+use_index_select: true
+restrict_action_space: true
+msg:
+  hidden_dim: 64
+  embedding_dim: 32
+load_dir: null
--- a/submission_config.py
+++ b/submission_config.py
@@ -14,8 +14,8 @@ from envs.wrappers import addtimelimitwrapper_fn

 class SubmissionConfig:
    ## Add your own agent class
-    AGENT = CustomAgent
-    # AGENT = TorchBeastAgent
+    # AGENT = CustomAgent
+    AGENT = TorchBeastAgent


    ## Change the NUM_ENVIRONMENTS as you need
No results found