Compare revisions

401efc66 · 7b433074 · 9c530e35 · ee841c68 · 3796ab01 · 66d164d8
--- a/.gitattributes
+++ b/.gitattributes
 *.wav filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
+submission filter=lfs diff=lfs merge=lfs -text
+submission/* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@@ -130,3 +130,5 @@ dmypy.json
 # Pyre type checker
 .pyre/

+nle_data/
+test_batched_env.py
--- a/Dockerfile
+++ b/Dockerfile
-FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
-
+FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 AS nhc-dev
 ENV DEBIAN_FRONTEND=noninteractive

-RUN apt -qq update && apt -qq install -y --no-install-recommends \
-    wget \
-    ca-certificates \
-    locales \
-    libglib2.0 \
-    libsm6 \
-    libxext6 \
-    libxrender-dev \
-    xvfb \
-    ffmpeg \
-    freeglut3-dev \
+COPY apt.txt /tmp/apt.txt
+RUN apt -qq update && apt -qq install -y --no-install-recommends `cat /tmp/apt.txt` \
 && rm -rf /var/cache/*

 # Unicode support:
@@ -38,20 +28,32 @@ RUN adduser --disabled-password \
    --uid ${HOST_UID} \
    ${USER_NAME}

-USER ${USER_NAME}
-WORKDIR ${HOME_DIR}
+WORKDIR /opt/

-ENV CONDA_DIR ${HOME_DIR}/.conda
-
-RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh \
+# Install anaconda
+ENV CONDA_DIR /opt/conda
+RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_4.9.2-Linux-x86_64.sh \
 && bash miniconda.sh -b -p ${CONDA_DIR} \
 && . ${CONDA_DIR}/etc/profile.d/conda.sh \
+ && conda clean -y -a \
 && rm -rf miniconda.sh
-
 ENV PATH ${CONDA_DIR}/bin:${PATH}

-RUN conda install cmake -y
-COPY --chown=${USER_NAME}:${USER_NAME} requirements.txt ${HOME_DIR}/requirements.txt
+# Install TorchBeast
+RUN conda install cmake cudatoolkit=10.2 pytorch -y -c pytorch -c nvidia && conda clean -y -a
+RUN git clone https://github.com/facebookresearch/torchbeast.git --recursive
+WORKDIR /opt/torchbeast
+RUN pip install -r requirements.txt
+RUN pip install ./nest
+RUN python setup.py install
+
+# Install AICrowd items
+WORKDIR ${HOME_DIR}
+COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt
 RUN pip install -r requirements.txt --no-cache-dir

-COPY --chown=${USER_NAME}:${USER_NAME} . ${HOME_DIR}
+# SUBMISSION IMAGE - change user and copy files
+FROM nhc-dev AS nhc-submit
+COPY --chown=1001:1001 . ${HOME_DIR}
+USER ${USER_NAME}
+
--- a/README.md
+++ b/README.md
-![Nethack Banner](https://raw.githubusercontent.com/facebookresearch/nle/master/dat/nle/logo.png)
+![Nethack Banner](https://aicrowd-production.s3.eu-central-1.amazonaws.com/misc/neurips-2021-nethack-challenge-media/nethack_final_link+preview_starter_kit.jpg)

-# Nethack Challenge - Starter Kit
+# **[NeurIPS 2021 - The NetHack Challenge](https://www.aicrowd.com/challenges/neurips-2021-the-nethack-challenge)** - Starter Kit

-👉 [Challenge page](https://www.aicrowd.com/challenges/neurips-2021-nethack-challenge)

-[![Discord](https://img.shields.io/discord/565639094860775436.svg)](https://discord.gg/fNRrSvZkry)
+This repository is the Nethack Challenge **Starter kit**! It contains:
+* **Instructions** for setting up your codebase to make submissions easy.
+* **Baselines** for quickly getting started training your agent.
+* **Notebooks** for introducing you to NetHack and the NLE.
+* **Documentation** for how to submit your model to the leaderboard.

+Quick Links:

-This repository is the Nethack Challenge **Submission template and Starter kit**! 
+* [The NetHack Challenge - Competition Page](https://www.aicrowd.com/challenges/neurips-2021-the-nethack-challenge)
+* [The NetHack Challenge - Discord Server](https://discord.gg/zkFWQmSWBA)
+* [The NetHack Challenge - Starter Kit](https://gitlab.aicrowd.com/nethack/neurips-2021-the-nethack-challenge)
+* [IMPORTANT - Accept the rules before you submit](https://www.aicrowd.com/challenges/neurips-2021-nethack-challenge/challenge_rules)

-Clone the repository to compete now!
+## Quick Start

-**This repository contains**:
-*  **Documentation** on how to submit your models to the leaderboard
-*  **The procedure** for best practices and information on how we evaluate your agent, etc.
-*  **Starter code** for you to get started!
+With Docker and x1 GPU
+```bash
+# 1. CLONE THE REPO AND DOWNLOAD BASELINE MODELS
+git clone http://gitlab.aicrowd.com/nethack/neurips-2021-the-nethack-challenge.git \
+    && cd neurips-2021-the-nethack-challenge \
+    && git lfs install \
+    && git lfs pull  

+# 2. START THE DOCKER IMAGE
+docker run -it -v `pwd`:/home/aicrowd --gpus 'all' fairnle/challenge:dev 
+
+# 3. TEST AN EXISTING SUBMISSION 
+python test_submission.py      # Tests ./saved_models/pretrained_0.5B
+
+# 3. TRAIN YOUR OWN
+python nethack_baselines/torchbeast/polyhydra.py batch_size=16 
+```
+
+To Troubleshoot see [here](#setting-up-details-docker).


 # Table of Contents
+1. [Intro to Nethack and the Nethack Challenge](#intro-to-nethack-and-the-nethack-challenge)
+2. [Setting up your codebase](#setting-up-your-codebase)
+3. [Baselines](#baselines)
+4. [How to test and debug locally](#how-to-test-and-debug-locally)
+5. [How to submit](#how-to-submit)
+
+# Intro to Nethack and the Nethack Challenge
+
+Your goal is to produce the best possible agent for navigating the depths
+of Nethack dungeons and emerging with the Amulet in hand! 
+You can approach this task however you please, but a good starting point 
+would be [**this notebook**](./notebooks/NetHackTutorial.ipynb) which provides
+an overview of  (1) the many dynamics at play in the game and   (2) the 
+observation and action space with which your agent will interact. 
+
+#### A high level description of the Challenge Procedure:
+1. **Sign up** to join the competition [on the AIcrowd website](https://www.aicrowd.com/challenges/neurips-2021-nethack-challenge).
+2. **Clone** this repo and start developing your solution.
+3. **Train** your models on NLE, and ensure run.sh will generate rollouts.
+4. **Submit** your trained models to [AIcrowd Gitlab](https://gitlab.aicrowd.com)
+for evaluation (full instructions below). The automated evaluation setup
+will evaluate the submissions against the NLE environment for a fixed 
+number of rollouts to compute and report the metrics on the leaderboard
+of the competition.

-1. [Competition Procedure](#competition-procedure)
-2. [How to access and use dataset](#how-to-access-and-use-dataset)
-3. [How to start participating](#how-to-start-participating)
-4. [How do I specify my software runtime / dependencies?](#how-do-i-specify-my-software-runtime-dependencies-)
-5. [What should my code structure be like ?](#what-should-my-code-structure-be-like-)
-6. [How to make submission](#how-to-make-submission)
-7. [Other concepts](#other-concepts)
-8. [Important links](#-important-links)
+![](https://images.aicrowd.com/raw_images/challenges/banner_file/423/5f69010437d25bf569c4.jpg)

+# Setting Up Your Codebase

-<p style="text-align:center"><img style="text-align:center" src="https://raw.githubusercontent.com/facebookresearch/nle/master/dat/nle/example_run.gif"></p>
+AIcrowd provides great flexibility in the details of your submission!  
+Find the answers to FAQs about submission structure below, followed by 
+the guide for setting up this starter kit and linking it to the AIcrowd 
+GitLab.

+## FAQs

-#  Competition Procedure
+### How does submission work?

-The NetHack Learning Environment (NLE) is a Reinforcement Learning environment presented at NeurIPS 2020. NLE is based on NetHack 3.6.6 and designed to provide a standard RL interface to the game, and comes with tasks that function as a first step to evaluate agents on this new environment. You can read more about NLE in the NeurIPS 2020 paper.
+The submission entrypoint is a bash script `run.sh`, that runs in an environment defined by `Dockerfile`. When this script is 
+called, aicrowd will expect you to generate all your rollouts in the 
+allotted time, using `aicrowd_gym` in place of regular `gym`.  This means 
+that AIcrowd can make sure everyone is running the same environment, 
+and can keep score!

+### What languages can I use?

-We are excited that this competition offers machine learning students, researchers and NetHack-bot builders the opportunity to participate in a grand challenge in AI without prohibitive computational costs—and we are eagerly looking forward to the wide variety of submissions.
+Since the entrypoint is a bash script `run.sh`, you can call any arbitrary
+code from this script.  However, to get you started, the environment is 
+set up to generate rollouts in Python. 

+The repo gives you a template placeholder to load your model 
+(`agents/your_agent.py`), and a config to choose which agent to load 
+(`submission_config.py`). You can then test a submission, adding all of 
+AIcrowd’s timeouts on the environment, with `python test_submission.py`

-**The following is a high level description of how this process works**
+### How do I specify my dependencies?

-![](https://i.imgur.com/xzQkwKV.jpg)
+We accept submissions with custom runtimes, so you can choose your 
+favorite! The configuration files typically include `requirements.txt` 
+(pypi packages), `apt.txt` (apt packages) or even your own `Dockerfile`.

-1. **Sign up** to join the competition [on the AIcrowd website](https://www.aicrowd.com/challenges/neurips-2021-nethack-challenge).
-2. **Clone** this repo and start developing your solution.
-3. **Train** your models for audio seperation and write prediction code in `test.py`.
-4. [**Submit**](#how-to-submit-a-model) your trained models to [AIcrowd Gitlab](https://gitlab.aicrowd.com) for evaluation [(full instructions below)](#how-to-submit-a-model). The automated evaluation setup will evaluate the submissions against the test dataset to compute and report the metrics on the leaderboard of the competition.
+You can check detailed information about the same in the [RUNTIME.md](/docs/RUNTIME.md) file.
+
+### What should my code structure look like?
+
+Please follow the example structure as it is in the starter kit for the code structure.
+The different files and directories have following meaning:
+
+```
+.
+├── aicrowd.json                  # Submission meta information - add tags for tracks here
+├── apt.txt                       # Packages to be installed inside submission environment
+├── requirements.txt              # Python packages to be installed with pip
+├── rollout.py                    # This will run rollouts on a batched agent
+├── test_submission.py            # Run this on your machine to get an estimated score
+├── run.sh                        # Submission entrypoint
+├── utilities                     # Helper scripts for setting up and submission 
+│   └── submit.sh                 # script for easy submission of your code
+├── envs                          # Operations on the env like batching and wrappers
+│   ├── batched_env.py            # Batching for multiple envs
+│   └── wrappers.py   	          # Add wrappers to your env here
+├── agents                        # Baseline agents for submission
+│   ├── batched_agent.py          # Abstraction reference batched agents
+│   ├── random_batched_agent.py	  # Batched agent that returns random actions
+│   ├── rllib_batched_agent.py	  # Batched agent that runs with the rllib baseline
+│   └── torchbeast_agent.py       # Batched agent that runs with the torchbeast baseline
+├── nethack_baselines             # Baseline agents for submission
+│    ├── other_examples  	
+│    │   └── random_rollouts.py   # Barebones random agent with no batching
+│    ├── rllib	                  # Baseline agent trained with rllib
+│    └── torchbeast               # Baseline agent trained with IMPALA on Pytorch
+└── notebooks                 
+    └── NetHackTutorial.ipynb     # Tutorial on the Nethack Learning Environment
+
+```
+
+Finally, **you must specify an AIcrowd submission JSON in `aicrowd.json` to be scored!** See [How do I actually make a submission?](#how-do-i-actually-make-a-submission) below for more details.
+
+
+### How can I get going with an existing baseline?
+
+The best current baseline is the torchbeast baseline. Follow the instructions 
+[here](/nethack_baselines/torchbeast/) to install and start training 
+the model (there are even some suggestions for improvements).
+
+To then submit your saved model, simply set the `AGENT` in 
+`submission config` to be `TorchBeastAgent`, and modify the 
+`agent/torchbeast_agent.py` to point to your saved directory.
+
+You can now test your saved model with `python test_submission.py`
+
+### How can I get going with a completely new model?
+
+Train your model as you like, and when you’re ready to submit, just adapt
+`YourAgent` in `agents/your_agent.py` to load your model and take a `batched_step`.
+
+Then just set your `AGENT` in `submission_config.py` to be this class 
+and you are ready to test with `python test_submission.py`
+
+### How do I actually make a submission?
+
+First you need to fill in you `aicrowd.json`, to give AIcrowd some info so you can be scored.
+The `aicrowd.json` of each submission should contain the following content:
+
+```json
+{
+  "challenge_id": "neurips-2021-the-nethack-challenge",
+  "authors": ["your-aicrowd-username"],
+  "description": "(optional) description about your awesome agent",
+  "gpu": true
+}
+```

-# How to run the environment
+The submission is made by adding everything including the model to git,
+tagging the submission with a git tag that starts with `submission-`, and 
+pushing to AIcrowd's GitLab. The rest is done for you!

-Install the environment from the [original nethack repository](https://github.com/facebookresearch/nle)
+More details are available [here](/docs/SUBMISSION.md).

-# How to start participating
+### Are there any hardware or time constraints?

-## Setup
+Your submission will need to complete 128 rollouts in 30 minutes. We will
+run 4 of these in parallel, and a total of 512 episodes will be used for
+evaluation. The episode will timeout and terminate if any action is
+left hanging for 300 seconds, or 10,000 steps are taken without 
+advancing the in game clock. 
+
+The machine where the submission will run will have following specifications:
+* 1 NVIDIA T4 GPU
+* 4 vCPUs
+* 16 GB RAM
+
+
+## Setting Up Details [No Docker]

 1. **Add your SSH key** to AIcrowd GitLab

-You can add your SSH Keys to your GitLab account by going to your profile settings [here](https://gitlab.aicrowd.com/profile/keys). If you do not have SSH Keys, you will first need to [generate one](https://docs.gitlab.com/ee/ssh/README.html#generating-a-new-ssh-key-pair).
+    You can add your SSH Keys to your GitLab account by going to your profile settings [here](https://gitlab.aicrowd.com/profile/keys). If you do not have SSH Keys, you will first need to [generate one](https://docs.gitlab.com/ee/ssh/README.html#generating-a-new-ssh-key-pair).

 2.  **Clone the repository**

    ```
-    git clone git@github.com:AIcrowd/neurips-2021-nethack-starter-kit.git
+    git clone git@gitlab.aicrowd.com:nethack/neurips-2021-the-nethack-challenge.git
    ```
-
-3. **Install** competition specific dependencies!
+    
+3. **Verify you have dependencies** for the Nethack Learning Environment
+
+    NLE requires `python>=3.5`, `cmake>=3.14` to be installed and available both when building the
+    package, and at runtime.
+    
+    On **MacOS**, one can use `Homebrew` as follows:
+    
+    ``` bash
+    brew install cmake
    ```
-    cd neurips-2021-nethack-starter-kit
-    pip install -r requirements.txt
+    
+    On a plain **Ubuntu 18.04** distribution, `cmake` and other dependencies
+    can be installed by doing:
+    
+    ```bash
+    # Python and most build deps
+    sudo apt-get install -y build-essential autoconf libtool pkg-config \
+        python3-dev python3-pip python3-numpy git flex bison libbz2-dev
+    
+    # recent cmake version
+    wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | sudo apt-key add -
+    sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
+    sudo apt-get update && apt-get --allow-unauthenticated install -y \
+        cmake \
+        kitware-archive-keyring
    ```

-4. Try out random prediction codebase present in `test.py`.
-
+4. **Install** competition specific dependencies!

-## How do I specify my software runtime / dependencies ?
+    We advise using a conda environment for this:
+    ```bash
+    # Optional: Create a conda env
+    conda create -n nle_challenge python=3.8 'cmake>=3.15'
+    conda activate nle_challenge
+    pip install -r requirements.txt
+    ```
+    If `pip install` fails with errors when installing NLE, please see installation requirements at https://github.com/facebookresearch/nle.

-We accept submissions with custom runtime, so you don't need to worry about which libraries or framework to pick from.
+5. **Run rollouts** with a random agent with `python test_submission.py`.

-The configuration files typically include `requirements.txt` (pypi packages), `environment.yml` (conda environment), `apt.txt` (apt packages) or even your own `Dockerfile`.
+    Find more details on the [original nethack repository](https://github.com/facebookresearch/nle)

-You can check detailed information about the same in the 👉 [RUNTIME.md](/docs/RUNTIME.md) file.
+## Setting Up Details [Docker]

-## What should my code structure be like ?
+With Docker, setting up is very simple! Simply pull a preexisting image from the fair nle repo.

-Please follow the example structure as it is in the starter kit for the code structure.
-The different files and directories have following meaning:
+```
+docker pull fairnle/challenge:dev 
+```
+This image is based of Ubuntu 18.04, with CUDA 10.2 and cudnn 7, and is the Docker image corresponding to the `nhc-dev` target in the `Dockerfile`. You can run it as follows:

+**Without GPUS**
 ```
-.
-├── aicrowd.json           # Submission meta information - like your username
-├── apt.txt                # Packages to be installed inside docker image
-├── data                   # Your local dataset copy - you don't need to upload it (read DATASET.md)
-├── requirements.txt       # Python packages to be installed
-├── test.py                # IMPORTANT: Your testing/prediction code, must be derived from NethackSubmission (example in test.py)
-└── utility                # The utility scripts to provide smoother experience to you.
-    ├── docker_build.sh
-    ├── docker_run.sh
-    ├── environ.sh
+docker run -it -v `pwd`:/home/aicrowd fairnle/challenge:dev
+```
+**With GPUS**
+```
+docker run -it -v `pwd`:/home/aicrowd --gpus 'all' fairnle/challenge:dev
 ```

-Finally, **you must specify an AIcrowd submission JSON in `aicrowd.json` to be scored!** 
+*NB* On Linux, this `--gpus` argument requires you to install `nvidia-container-toolkit`, which on Ubuntu is available with `apt install`.

-The `aicrowd.json` of each submission should contain the following content:
+This will take you into an image, with your current working directory mounted as a volume. At submission time, `nhc-submit` target will be built by AIcrowd, which copies all the files into the image, instead of simply mounting them.
+
+If you wish to wish to build your own dev environment from the Dockerfile, you can do this with:

-```json
-{
-  "challenge_id": "evaluations-api-neurips-nethack",
-  "authors": ["your-aicrowd-username"],
-  "description": "(optional) description about your awesome agent",
-  "external_dataset_used": false
-}
 ```
+docker build --target nhc-dev  -t your-image-name .
+```
+

-This JSON is used to map your submission to the challenge - so please remember to use the correct `challenge_id` as specified above.
+# Baselines

-## How to make submission
+Although we are looking to supply this repository with more baselines throughout the first month of the competition, this repository comes with a strong IMPALA-based baseline in the directory `./nethack_baselines/torchbeast`.

-👉 [SUBMISSION.md](/docs/SUBMISSION.md)
+The [README](/nethack_baselines/torchbeast/READMEmd) has more info about the baselines, including to install and start training the model (there are even some suggestions for improvements).

-**Best of Luck** :tada: :tada:
+The TorchBeast baseline comes with two sets of weights - the same model trained to 250 million steps, and 500 million steps. 

-# Other Concepts
+To download these weights, run `git lfs pull`, and check `saved_models`. 

-## Hardware and Time constraints
+The TorchBeast agent can then be selected by setting `AGENT=TorchBeastAgent` in the `submission_config.py`, and the weights can be changed by changing the `MODEL_DIR` in `agents/torchbeast_agent.py`. 

-To be added.
+More information on git lfs can be found on [SUBMISSION.md](/docs/SUBMISSION.md). 

-## Local Run

-To be added.
+# How to Test and Debug Locally

-## Contributing
+The best way to test your model is to run your submission locally.

-You can share your solutions or any other baselines by contributing directly to this repository by opening merge request.
+You can do this naively by simply running  `python rollout.py` or you can simulate the extra timeout wrappers that AIcrowd will implement by using `python test_submission.py`. 

- Add your implemntation as `test_<approach-name>.py`
- Test it out using `python test_<approach-name>.py`
- Add any documentation for your approach at top of your file.
- Import it in `predict.py`
- Create merge request! 🎉🎉🎉 
+# How to Submit
+
+More information on submissions can be found at our [SUBMISSION.md](/docs/SUBMISSION.md).

 ## Contributors

+- [Dipam Chakraborty](https://www.aicrowd.com/participants/dipam)
 - [Shivam Khandelwal](https://www.aicrowd.com/participants/shivam)
+- [Eric Hambro](https://www.aicrowd.com/participants/eric_hammy)
+- [Danielle Rothermel](https://www.aicrowd.com/participants/danielle_rothermel)
 - [Jyotish Poonganam](https://www.aicrowd.com/participants/jyotish)
- [Dipam chakraborty](https://www.aicrowd.com/participants/dipam)
-
-# 📎 Important links


-💪 &nbsp;Challenge Page: https://www.aicrowd.com/challenges/neurips-2021-nethack-challenge
+# 📎 Important links

-🗣️ &nbsp;Discussion Forum: https://www.aicrowd.com/challenges/neurips-2021-nethack-challenge/discussion
+- 💪 Challenge Page: https://www.aicrowd.com/challenges/neurips-2021-the-nethack-challenge
+- 🗣️ Discussion Forum: https://www.aicrowd.com/challenges/neurips-2021-the-nethack-challenge/discussion
+- 🏆 Leaderboard: https://www.aicrowd.com/challenges/neurips-2021-the-nethack-challenge/leaderboards

-🏆 &nbsp;Leaderboard: https://www.aicrowd.com/challenges/neurips-2021-nethack-challenge/leaderboards
+**Best of Luck** 🎉 🎉
--- a/agents/base.py
+++ b/agents/base.py
+from abc import ABC, abstractmethod
+
+class BatchedAgent(ABC):
+    """
+    This is an abstract base clase for you to load your models and perform rollouts on a
+    batched set of environments.
+    """
+    def __init__(self, num_envs: int , num_actions: int):
+        self.num_envs = num_envs
+        self.num_actions = num_actions
+
+    @abstractmethod
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+
+        :param observations: a list of observations 
+        :param rewards: a list of rewards 
+        :param dones: a list of dones 
+        :param observations: a list of infos
+        
+        returns: an iterable of actions 
+        """
+        pass
+
--- a/agents/custom_agent.py
+++ b/agents/custom_agent.py
+import numpy as np
+
+from agents.base import BatchedAgent
+
+
+class CustomAgent(BatchedAgent):
+    """A example agent... that simple acts randomly. Adapt to your needs!"""
+
+    def __init__(self, num_envs, num_actions):
+        """Set up and load you model here"""
+        super().__init__(num_envs, num_actions)
+        self.seeded_state = np.random.RandomState(42)
+
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+
+        Each argument is a list of the respective gym output.
+        Returns an iterable of actions.
+        """
+        actions = self.seeded_state.randint(self.num_actions, size=self.num_envs)
+        return actions
--- a/agents/torchbeast_agent.py
+++ b/agents/torchbeast_agent.py
+import torch
+import numpy as np
+
+from agents.base import BatchedAgent
+
+from nethack_baselines.torchbeast.models import load_model
+
+MODEL_DIR = "./saved_models/torchbeast/pretrained_0.5B"
+
+
+class TorchBeastAgent(BatchedAgent):
+    """
+    A BatchedAgent using the TorchBeast Model
+    """
+
+    def __init__(self, num_envs, num_actions):
+        super().__init__(num_envs, num_actions)
+        self.model_dir = MODEL_DIR
+        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        self.model = load_model(MODEL_DIR, self.device)
+        print(f'Using Model In: {self.model_dir}, Device: {self.device}')
+
+        self.core_state = [
+            m.to(self.device) for m in self.model.initial_state(batch_size=num_envs)
+        ]
+
+    def batch_inputs(self, observations, dones):
+        """
+        Convert lists of observations, rewards, dones, infos to tensors for TorchBeast.
+
+        TorchBeast models:
+            * take tensors in the form: [T, B, ...]: B:= batch, T:= unroll (=1)
+            * take "done" as a BOOLEAN observation
+        """
+        states = list(observations[0].keys())
+        obs = {k: [] for k in states}
+
+        # Unpack List[Dicts] -> Dict[Lists]
+        for o in observations:
+            for k, t in o.items():
+                obs[k].append(t)
+
+        # Convert to Tensor, Add Unroll Dim (=1), Move to GPU
+        for k in states:
+            obs[k] = torch.Tensor(np.stack(obs[k])[None, ...]).to(self.device)
+        obs["done"] = torch.Tensor(np.array(dones)[None, ...]).bool().to(self.device)
+        return obs, dones
+
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+
+        Torchbeast models:
+            * take the core (LSTM) state as input, and return as output
+            * return outputs as a dict of "action", "policy_logits", "baseline"
+        """
+        observations, dones = self.batch_inputs(observations, dones)
+
+        with torch.no_grad():
+            outputs, self.core_state = self.model(observations, self.core_state)
+
+        return outputs["action"].cpu().numpy()[0]
--- a/aicrowd.json
+++ b/aicrowd.json
 {
-  "challenge_id": "neurips-2021-nethack-challenge",
+  "challenge_id": "neurips-2021-the-nethack-challenge",
  "authors": [
    "aicrowd-bot"
  ],
-  "external_dataset_used": false
+  "description": "(optional) description about your awesome agent",
+  "gpu": true
 }
+
--- a/apt.txt
+++ b/apt.txt
 build-essential
 git
-cmake
+flex
+bison
+libbz2-dev
+wget
+ca-certificates
+locales
+libglib2.0
+libsm6
+libxext6
+libxrender-dev
+xvfb
+ffmpeg
+freeglut3-dev
+gcc
+g++
+ninja-build
+vim
--- a/docs/RUNTIME.md
+++ b/docs/RUNTIME.md
@@ -14,8 +14,8 @@ Few of the most common ways are as follows:
    * **Create your new conda environment**

        ```sh
-        conda create --name music_demixing_challenge
-        conda activate music_demixing_challenge
+        conda create --name nle
+        conda activate nle
        ```

  * **Your code specific dependencies**

--- a/docs/SUBMISSION.md
+++ b/docs/SUBMISSION.md
@@ -15,6 +15,12 @@ You can modify the existing `rollout.py` OR copy it (to say `your_code.py`) and
 Before you submit make sure that you have saved your models, which are needed by your inference code.
 In case your files are larger in size you can use `git-lfs` to upload them. More details [here](https://discourse.aicrowd.com/t/how-to-upload-large-files-size-to-your-submission/2304).

+## Testing your model locally
+
+The best way to test your model is to run your submission locally.
+
+You can do this naively by simply running `python rollout.py` or you can simulate the extra timeout wrappers that AIcrowd will implement by using `python test_submission.py`.
+
 ## How to submit a trained model!

 To make a submission, you will have to create a **private** repository on [https://gitlab.aicrowd.com/](https://gitlab.aicrowd.com/).

--- a/environment.yml
+++ b/environment.yml
-name: nle
-
-dependencies:
-  - python=3.8
-  - cmake=3.14
-  - numpy
-  - pip:
-    - aicrowd-gym
-    - -r file:requirements.txt
--- a/envs/__init__.py
+++ b/envs/__init__.py
+from gym.envs.registration import register
+
+register('NetHackChallengeBatched-v0', 
+            entry_point='nle_batched_env.NetHackChallengeBatchedEnv')
--- a/envs/batched_env.py
+++ b/envs/batched_env.py
+import aicrowd_gym
+import numpy as np
+
+from collections.abc import Iterable
+
+class BatchedEnv:
+    def __init__(self, env_make_fn, num_envs=32):
+        """
+        Creates multiple copies of the environment with the same env_make_fn function
+        """
+        self.num_envs = num_envs
+        self.envs = [env_make_fn() for _ in range(self.num_envs)]
+        self.num_actions = self.envs[0].action_space.n
+
+    def batch_step(self, actions):
+        """
+        Applies each action to each env in the same order as self.envs
+        Actions should be iterable and have the same length as self.envs
+        Returns lists of obsevations, rewards, dones, infos
+        """
+        assert isinstance(
+            actions, Iterable), f"actions with type {type(actions)} is not iterable"
+        assert len(
+            actions) == self.num_envs, f"actions has length {len(actions)} which different from num_envs"
+
+        observations, rewards, dones, infos = [], [], [], []
+        for env, a in zip(self.envs, actions):
+            observation, reward, done, info = env.step(a)
+            if done:
+                observation = env.reset()
+            observations.append(observation)
+            rewards.append(reward)
+            dones.append(done)
+            infos.append(info)
+
+        return observations, rewards, dones, infos
+
+    def batch_reset(self):
+        """
+        Resets all the environments in self.envs
+        """
+        observation = [env.reset() for env in self.envs]
+        return observation
+
+
+if __name__ == '__main__':
+
+    num_envs = 4
+    batched_env = BatchedEnv(
+        env_make_fn=lambda:aicrowd_gym.make('NetHackChallenge-v0'), 
+        num_envs=4
+    )
+    
+    observations = batched_env.batch_reset()
+    num_actions = batched_env.envs[0].action_space.n
+    for _ in range(50):
+        actions = np.random.randint(num_actions, size=num_envs)
+        observations, rewards, dones, infos = batched_env.batch_step(actions)
+        for done_idx in np.where(dones)[0]:
+            observations[done_idx] = batched_env.single_env_reset(done_idx) 
--- a/envs/wrappers.py
+++ b/envs/wrappers.py
+import aicrowd_gym
+import nle
+from gym.wrappers import TimeLimit
+
+
+def create_env():
+    """This is the environment that will be assessed by AIcrowd."""
+    return aicrowd_gym.make("NetHackChallenge-v0")
+
+
+def addtimelimitwrapper_fn():
+    """
+    An example of how to add wrappers to the nethack_make_fn
+    Should return a gym env which wraps the nethack gym env
+    """
+    env = create_env()
+    env = TimeLimit(env, max_episode_steps=10_000_000)
+    return env
\ No newline at end of file
--- a/evaluator/__init__.py
+++ b/evaluator/__init__.py
--- a/evaluator/aicrowd_helpers.py
+++ b/evaluator/aicrowd_helpers.py
-#!/usr/bin/env python
-import aicrowd_api
-import os
-
-########################################################################
-# Instatiate Event Notifier
-########################################################################
-aicrowd_events = aicrowd_api.events.AIcrowdEvents()
-
-
-def execution_start():
-    ########################################################################
-    # Register Evaluation Start event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_started",
-                payload={
-                    "event_type": "airborne_detection:execution_started"
-                    }
-                )
-
-def execution_running():
-    ########################################################################
-    # Register Evaluation Start event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_progress",
-                payload={
-                    "event_type": "airborne_detection:execution_progress",
-                    "progress": 0.0
-                    }
-                )
-
-
-def execution_progress(progress):
-    ########################################################################
-    # Register Evaluation Progress event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_progress",
-                payload={
-                    "event_type": "airborne_detection:execution_progress",
-                    "progress" : progress
-                    }
-                )
-
-def execution_success():
-    ########################################################################
-    # Register Evaluation Complete event
-    ########################################################################
-    predictions_output_path = os.getenv("PREDICTIONS_OUTPUT_PATH", False)
-
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_SUCCESS,
-                message="execution_success",
-                payload={
-                    "event_type": "airborne_detection:execution_success",
-                    "predictions_output_path" : predictions_output_path
-                    },
-                blocking=True
-                )
-
-def execution_error(error):
-    ########################################################################
-    # Register Evaluation Complete event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_ERROR,
-                message="execution_error",
-                payload={ #Arbitrary Payload
-                    "event_type": "airborne_detection:execution_error",
-                    "error" : error
-                    },
-                blocking=True
-                )
-
-def is_grading():
-    return os.getenv("AICROWD_IS_GRADING", False)
--- a/evaluator/music_demixing.py
+++ b/evaluator/music_demixing.py
-######################################################################################
-### This is a read-only file to allow participants to run their code locally.      ###
-### It will be over-writter during the evaluation, Please do not make any changes  ###
-### to this file.                                                                  ###
-######################################################################################
-
-import traceback
-import os
-import signal
-from contextlib import contextmanager
-from os import listdir
-from os.path import isfile, join
-
-import soundfile as sf
-import numpy as np
-from evaluator import aicrowd_helpers
-
-
-class TimeoutException(Exception): pass
-
-
-@contextmanager
-def time_limit(seconds):
-    def signal_handler(signum, frame):
-        raise TimeoutException("Prediction timed out!")
-
-    signal.signal(signal.SIGALRM, signal_handler)
-    signal.alarm(seconds)
-    try:
-        yield
-    finally:
-        signal.alarm(0)
-
-
-class MusicDemixingPredictor:
-    def __init__(self):
-        self.test_data_path = os.getenv("TEST_DATASET_PATH", os.getcwd() + "/data/test/")
-        self.results_data_path = os.getenv("RESULTS_DATASET_PATH", os.getcwd() + "/data/results/")
-        self.inference_setup_timeout = int(os.getenv("INFERENCE_SETUP_TIMEOUT_SECONDS", "900"))
-        self.inference_per_music_timeout = int(os.getenv("INFERENCE_PER_MUSIC_TIMEOUT_SECONDS", "240"))
-        self.partial_run = os.getenv("PARTIAL_RUN_MUSIC_NAMES", None)
-        self.results = []
-        self.current_music_name = None
-
-    def get_all_music_names(self):
-        valid_music_names = None
-        if self.partial_run:
-            valid_music_names = self.partial_run.split(',')
-        music_names = []
-        for folder in listdir(self.test_data_path):
-            if not isfile(join(self.test_data_path, folder)):
-                if valid_music_names is None or folder in valid_music_names:
-                    music_names.append(folder)
-        return music_names
-
-    def get_music_folder_location(self, music_name):
-        return join(self.test_data_path, music_name)
-
-    def get_music_file_location(self, music_name, instrument=None):
-        if instrument is None:
-            instrument = "mixture"
-            return join(self.test_data_path, music_name, instrument + ".wav")
-
-        if not os.path.exists(self.results_data_path):
-            os.makedirs(self.results_data_path)
-        if not os.path.exists(join(self.results_data_path, music_name)):
-            os.makedirs(join(self.results_data_path, music_name))
-
-        return join(self.results_data_path, music_name, instrument + ".wav")
-
-    def scoring(self):
-        """
-        Add scoring function in the starter kit for participant's reference
-        """
-        def sdr(references, estimates):
-            # compute SDR for one song
-            delta = 1e-7  # avoid numerical errors
-            num = np.sum(np.square(references), axis=(1, 2))
-            den = np.sum(np.square(references - estimates), axis=(1, 2))
-            num += delta
-            den += delta
-            return 10 * np.log10(num  / den)
-
-        music_names = self.get_all_music_names()
-        instruments = ["bass", "drums", "other", "vocals"]
-        scores = {}
-        for music_name in music_names:
-            print("Evaluating for: %s" % music_name)
-            scores[music_name] = {}
-            references = []
-            estimates = []
-            for instrument in instruments:
-                reference_file = join(self.test_data_path, music_name, instrument + ".wav")
-                estimate_file = self.get_music_file_location(music_name, instrument)
-                reference, _ = sf.read(reference_file)
-                estimate, _ = sf.read(estimate_file)
-                references.append(reference)
-                estimates.append(estimate)
-            references = np.stack(references)
-            estimates = np.stack(estimates)
-            references = references.astype(np.float32)
-            estimates = estimates.astype(np.float32)
-            song_score = sdr(references, estimates).tolist()
-            scores[music_name]["sdr_bass"] = song_score[0]
-            scores[music_name]["sdr_drums"] = song_score[1]
-            scores[music_name]["sdr_other"] = song_score[2]
-            scores[music_name]["sdr_vocals"] = song_score[3]
-            scores[music_name]["sdr"] = np.mean(song_score)
-        return scores
-
-
-    def evaluation(self):
-        """
-        Admin function: Runs the whole evaluation
-        """
-        aicrowd_helpers.execution_start()
-        try:
-            with time_limit(self.inference_setup_timeout):
-                self.prediction_setup()
-        except NotImplementedError:
-            print("prediction_setup doesn't exist for this run, skipping...")
-
-        aicrowd_helpers.execution_running()
-
-        music_names = self.get_all_music_names()
-
-        for music_name in music_names:
-            with time_limit(self.inference_per_music_timeout):
-                self.prediction(mixture_file_path=self.get_music_file_location(music_name),
-                                bass_file_path=self.get_music_file_location(music_name, "bass"),
-                                drums_file_path=self.get_music_file_location(music_name, "drums"),
-                                other_file_path=self.get_music_file_location(music_name, "other"),
-                                vocals_file_path=self.get_music_file_location(music_name, "vocals"),
-                )
-                
-            if not self.verify_results(music_name):
-                raise Exception("verification failed, demixed files not found.")
-        aicrowd_helpers.execution_success()
-
-    def run(self):
-        try:
-            self.evaluation()
-        except Exception as e:
-            error = traceback.format_exc()
-            print(error)
-            aicrowd_helpers.execution_error(error)
-            if not aicrowd_helpers.is_grading():
-                raise e
-
-    def prediction_setup(self):
-        """
-        You can do any preprocessing required for your codebase here : 
-            like loading your models into memory, etc.
-        """
-        raise NotImplementedError
-
-    def prediction(self, music_name, mixture_file_path, bass_file_path, drums_file_path, other_file_path,
-                   vocals_file_path):
-        """
-        This function will be called for all the flight during the evaluation.
-        NOTE: In case you want to load your model, please do so in `inference_setup` function.
-        """
-        raise NotImplementedError
-
-    def verify_results(self, music_name):
-        """
-        This function will be called to check all the files exist and other verification needed.
-        (like length of the wav files)
-        """
-        valid = True
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "vocals"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "bass"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "drums"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "other"))
-        return valid
--- a/nethack_baselines/other_examples/random_rollouts.py
+++ b/nethack_baselines/other_examples/random_rollouts.py
+# This is intended as an example of a barebones submission
+# Do not that not using BatchedEnv not meet the timeout requirement.
+
+import aicrowd_gym
+import nle
+
+def main():
+    """
+    This function will be called for training phase.
+    """
+
+    # This allows us to limit the features of the environment 
+    # that we don't want participants to use during the submission
+    env = aicrowd_gym.make("NetHackChallenge-v0") 
+
+    env.reset()
+    done = False
+    episode_count = 0
+    while episode_count < 200:
+        _, _, done, _ = env.step(env.action_space.sample())
+        if done:
+            episode_count += 1
+            print(episode_count)
+            env.reset()
+
+if __name__ == "__main__":
+    main()
--- a/nethack_baselines/rllib/README.md
+++ b/nethack_baselines/rllib/README.md
+Placeholder
No results found