Compare revisions

23fa11ee · dd8cb9a5 · c9068b25 · fa25b4d4 · 401efc66 · 7b433074
--- a/.gitattributes
+++ b/.gitattributes
 *.wav filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
+submission filter=lfs diff=lfs merge=lfs -text
+submission/* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@@ -130,3 +130,5 @@ dmypy.json
 # Pyre type checker
 .pyre/

+nle_data/
+test_batched_env.py
--- a/Dockerfile
+++ b/Dockerfile
+FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 AS nhc-dev
+ENV DEBIAN_FRONTEND=noninteractive
+
+COPY apt.txt /tmp/apt.txt
+RUN apt -qq update && apt -qq install -y --no-install-recommends `cat /tmp/apt.txt` \
+ && rm -rf /var/cache/*
+
+# Unicode support:
+RUN locale-gen en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LANGUAGE en_US:en
+ENV LC_ALL en_US.UTF-8
+
+# Create user home directory
+ENV USER_NAME aicrowd
+ENV HOME_DIR /home/$USER_NAME
+
+# Replace HOST_UID/HOST_GUID with your user / group id
+ENV HOST_UID 1001
+ENV HOST_GID 1001
+
+# Use bash as default shell, rather than sh
+ENV SHELL /bin/bash
+
+# Set up user
+RUN adduser --disabled-password \
+    --gecos "Default user" \
+    --uid ${HOST_UID} \
+    ${USER_NAME}
+
+WORKDIR /opt/
+
+# Install anaconda
+ENV CONDA_DIR /opt/conda
+RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_4.9.2-Linux-x86_64.sh \
+ && bash miniconda.sh -b -p ${CONDA_DIR} \
+ && . ${CONDA_DIR}/etc/profile.d/conda.sh \
+ && conda clean -y -a \
+ && rm -rf miniconda.sh
+ENV PATH ${CONDA_DIR}/bin:${PATH}
+
+# Install TorchBeast
+RUN conda install cmake cudatoolkit=10.2 pytorch -y -c pytorch -c nvidia && conda clean -y -a
+RUN git clone https://github.com/facebookresearch/torchbeast.git --recursive
+WORKDIR /opt/torchbeast
+RUN pip install -r requirements.txt
+RUN pip install ./nest
+RUN python setup.py install
+
+# Install AICrowd items
+WORKDIR ${HOME_DIR}
+COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt
+RUN pip install -r requirements.txt --no-cache-dir
+
+# SUBMISSION IMAGE - change user and copy files
+FROM nhc-dev AS nhc-submit
+COPY --chown=1001:1001 . ${HOME_DIR}
+USER ${USER_NAME}
+
--- a/README.md
+++ b/README.md
--- a/agents/base.py
+++ b/agents/base.py
+from abc import ABC, abstractmethod
+
+class BatchedAgent(ABC):
+    """
+    This is an abstract base clase for you to load your models and perform rollouts on a
+    batched set of environments.
+    """
+    def __init__(self, num_envs: int , num_actions: int):
+        self.num_envs = num_envs
+        self.num_actions = num_actions
+
+    @abstractmethod
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+
+        :param observations: a list of observations 
+        :param rewards: a list of rewards 
+        :param dones: a list of dones 
+        :param observations: a list of infos
+        
+        returns: an iterable of actions 
+        """
+        pass
+
--- a/agents/custom_agent.py
+++ b/agents/custom_agent.py
+import numpy as np
+
+from agents.base import BatchedAgent
+
+
+class CustomAgent(BatchedAgent):
+    """A example agent... that simple acts randomly. Adapt to your needs!"""
+
+    def __init__(self, num_envs, num_actions):
+        """Set up and load you model here"""
+        super().__init__(num_envs, num_actions)
+        self.seeded_state = np.random.RandomState(42)
+
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+
+        Each argument is a list of the respective gym output.
+        Returns an iterable of actions.
+        """
+        actions = self.seeded_state.randint(self.num_actions, size=self.num_envs)
+        return actions
--- a/agents/torchbeast_agent.py
+++ b/agents/torchbeast_agent.py
+import torch
+import numpy as np
+
+from agents.base import BatchedAgent
+
+from nethack_baselines.torchbeast.models import load_model
+
+MODEL_DIR = "./saved_models/torchbeast/pretrained_0.5B"
+
+
+class TorchBeastAgent(BatchedAgent):
+    """
+    A BatchedAgent using the TorchBeast Model
+    """
+
+    def __init__(self, num_envs, num_actions):
+        super().__init__(num_envs, num_actions)
+        self.model_dir = MODEL_DIR
+        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        self.model = load_model(MODEL_DIR, self.device)
+        print(f'Using Model In: {self.model_dir}, Device: {self.device}')
+
+        self.core_state = [
+            m.to(self.device) for m in self.model.initial_state(batch_size=num_envs)
+        ]
+
+    def batch_inputs(self, observations, dones):
+        """
+        Convert lists of observations, rewards, dones, infos to tensors for TorchBeast.
+
+        TorchBeast models:
+            * take tensors in the form: [T, B, ...]: B:= batch, T:= unroll (=1)
+            * take "done" as a BOOLEAN observation
+        """
+        states = list(observations[0].keys())
+        obs = {k: [] for k in states}
+
+        # Unpack List[Dicts] -> Dict[Lists]
+        for o in observations:
+            for k, t in o.items():
+                obs[k].append(t)
+
+        # Convert to Tensor, Add Unroll Dim (=1), Move to GPU
+        for k in states:
+            obs[k] = torch.Tensor(np.stack(obs[k])[None, ...]).to(self.device)
+        obs["done"] = torch.Tensor(np.array(dones)[None, ...]).bool().to(self.device)
+        return obs, dones
+
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+
+        Torchbeast models:
+            * take the core (LSTM) state as input, and return as output
+            * return outputs as a dict of "action", "policy_logits", "baseline"
+        """
+        observations, dones = self.batch_inputs(observations, dones)
+
+        with torch.no_grad():
+            outputs, self.core_state = self.model(observations, self.core_state)
+
+        return outputs["action"].cpu().numpy()[0]
--- a/aicrowd.json
+++ b/aicrowd.json
 {
-  "challenge_id": "evaluations-api-neurips-nethack",,
+  "challenge_id": "neurips-2021-the-nethack-challenge",
  "authors": [
    "aicrowd-bot"
  ],
-  "external_dataset_used": false
+  "description": "(optional) description about your awesome agent",
+  "gpu": true
 }
+
--- a/apt.txt
+++ b/apt.txt
 build-essential
 git
-cmake
+flex
+bison
+libbz2-dev
+wget
+ca-certificates
+locales
+libglib2.0
+libsm6
+libxext6
+libxrender-dev
+xvfb
+ffmpeg
+freeglut3-dev
+gcc
+g++
+ninja-build
+vim
--- a/docs/RUNTIME.md
+++ b/docs/RUNTIME.md
@@ -14,8 +14,8 @@ Few of the most common ways are as follows:
    * **Create your new conda environment**

        ```sh
-        conda create --name music_demixing_challenge
-        conda activate music_demixing_challenge
+        conda create --name nle
+        conda activate nle
        ```

  * **Your code specific dependencies**

--- a/docs/SUBMISSION.md
+++ b/docs/SUBMISSION.md
@@ -15,6 +15,12 @@ You can modify the existing `rollout.py` OR copy it (to say `your_code.py`) and
 Before you submit make sure that you have saved your models, which are needed by your inference code.
 In case your files are larger in size you can use `git-lfs` to upload them. More details [here](https://discourse.aicrowd.com/t/how-to-upload-large-files-size-to-your-submission/2304).

+## Testing your model locally
+
+The best way to test your model is to run your submission locally.
+
+You can do this naively by simply running `python rollout.py` or you can simulate the extra timeout wrappers that AIcrowd will implement by using `python test_submission.py`.
+
 ## How to submit a trained model!

 To make a submission, you will have to create a **private** repository on [https://gitlab.aicrowd.com/](https://gitlab.aicrowd.com/).

--- a/environment.yml
+++ b/environment.yml
-name: nle
-
-dependencies:
-  - python=3.8
-  - cmake=3.14
-  - numpy
-  - pip:
-    - aicrowd_api
-    - aicrowd-gym
-    - nle
-    - gym
--- a/envs/__init__.py
+++ b/envs/__init__.py
+from gym.envs.registration import register
+
+register('NetHackChallengeBatched-v0', 
+            entry_point='nle_batched_env.NetHackChallengeBatchedEnv')
--- a/envs/batched_env.py
+++ b/envs/batched_env.py
+import aicrowd_gym
+import numpy as np
+
+from collections.abc import Iterable
+
+class BatchedEnv:
+    def __init__(self, env_make_fn, num_envs=32):
+        """
+        Creates multiple copies of the environment with the same env_make_fn function
+        """
+        self.num_envs = num_envs
+        self.envs = [env_make_fn() for _ in range(self.num_envs)]
+        self.num_actions = self.envs[0].action_space.n
+
+    def batch_step(self, actions):
+        """
+        Applies each action to each env in the same order as self.envs
+        Actions should be iterable and have the same length as self.envs
+        Returns lists of obsevations, rewards, dones, infos
+        """
+        assert isinstance(
+            actions, Iterable), f"actions with type {type(actions)} is not iterable"
+        assert len(
+            actions) == self.num_envs, f"actions has length {len(actions)} which different from num_envs"
+
+        observations, rewards, dones, infos = [], [], [], []
+        for env, a in zip(self.envs, actions):
+            observation, reward, done, info = env.step(a)
+            if done:
+                observation = env.reset()
+            observations.append(observation)
+            rewards.append(reward)
+            dones.append(done)
+            infos.append(info)
+
+        return observations, rewards, dones, infos
+
+    def batch_reset(self):
+        """
+        Resets all the environments in self.envs
+        """
+        observation = [env.reset() for env in self.envs]
+        return observation
+
+
+if __name__ == '__main__':
+
+    num_envs = 4
+    batched_env = BatchedEnv(
+        env_make_fn=lambda:aicrowd_gym.make('NetHackChallenge-v0'), 
+        num_envs=4
+    )
+    
+    observations = batched_env.batch_reset()
+    num_actions = batched_env.envs[0].action_space.n
+    for _ in range(50):
+        actions = np.random.randint(num_actions, size=num_envs)
+        observations, rewards, dones, infos = batched_env.batch_step(actions)
+        for done_idx in np.where(dones)[0]:
+            observations[done_idx] = batched_env.single_env_reset(done_idx) 
--- a/envs/wrappers.py
+++ b/envs/wrappers.py
+import aicrowd_gym
+import nle
+from gym.wrappers import TimeLimit
+
+
+def create_env():
+    """This is the environment that will be assessed by AIcrowd."""
+    return aicrowd_gym.make("NetHackChallenge-v0")
+
+
+def addtimelimitwrapper_fn():
+    """
+    An example of how to add wrappers to the nethack_make_fn
+    Should return a gym env which wraps the nethack gym env
+    """
+    env = create_env()
+    env = TimeLimit(env, max_episode_steps=10_000_000)
+    return env
\ No newline at end of file
--- a/evaluator/__init__.py
+++ b/evaluator/__init__.py
--- a/evaluator/aicrowd_helpers.py
+++ b/evaluator/aicrowd_helpers.py
-#!/usr/bin/env python
-import aicrowd_api
-import os
-
-########################################################################
-# Instatiate Event Notifier
-########################################################################
-aicrowd_events = aicrowd_api.events.AIcrowdEvents()
-
-
-def execution_start():
-    ########################################################################
-    # Register Evaluation Start event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_started",
-                payload={
-                    "event_type": "airborne_detection:execution_started"
-                    }
-                )
-
-def execution_running():
-    ########################################################################
-    # Register Evaluation Start event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_progress",
-                payload={
-                    "event_type": "airborne_detection:execution_progress",
-                    "progress": 0.0
-                    }
-                )
-
-
-def execution_progress(progress):
-    ########################################################################
-    # Register Evaluation Progress event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_progress",
-                payload={
-                    "event_type": "airborne_detection:execution_progress",
-                    "progress" : progress
-                    }
-                )
-
-def execution_success():
-    ########################################################################
-    # Register Evaluation Complete event
-    ########################################################################
-    predictions_output_path = os.getenv("PREDICTIONS_OUTPUT_PATH", False)
-
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_SUCCESS,
-                message="execution_success",
-                payload={
-                    "event_type": "airborne_detection:execution_success",
-                    "predictions_output_path" : predictions_output_path
-                    },
-                blocking=True
-                )
-
-def execution_error(error):
-    ########################################################################
-    # Register Evaluation Complete event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_ERROR,
-                message="execution_error",
-                payload={ #Arbitrary Payload
-                    "event_type": "airborne_detection:execution_error",
-                    "error" : error
-                    },
-                blocking=True
-                )
-
-def is_grading():
-    return os.getenv("AICROWD_IS_GRADING", False)
--- a/evaluator/music_demixing.py
+++ b/evaluator/music_demixing.py
-######################################################################################
-### This is a read-only file to allow participants to run their code locally.      ###
-### It will be over-writter during the evaluation, Please do not make any changes  ###
-### to this file.                                                                  ###
-######################################################################################
-
-import traceback
-import os
-import signal
-from contextlib import contextmanager
-from os import listdir
-from os.path import isfile, join
-
-import soundfile as sf
-import numpy as np
-from evaluator import aicrowd_helpers
-
-
-class TimeoutException(Exception): pass
-
-
-@contextmanager
-def time_limit(seconds):
-    def signal_handler(signum, frame):
-        raise TimeoutException("Prediction timed out!")
-
-    signal.signal(signal.SIGALRM, signal_handler)
-    signal.alarm(seconds)
-    try:
-        yield
-    finally:
-        signal.alarm(0)
-
-
-class MusicDemixingPredictor:
-    def __init__(self):
-        self.test_data_path = os.getenv("TEST_DATASET_PATH", os.getcwd() + "/data/test/")
-        self.results_data_path = os.getenv("RESULTS_DATASET_PATH", os.getcwd() + "/data/results/")
-        self.inference_setup_timeout = int(os.getenv("INFERENCE_SETUP_TIMEOUT_SECONDS", "900"))
-        self.inference_per_music_timeout = int(os.getenv("INFERENCE_PER_MUSIC_TIMEOUT_SECONDS", "240"))
-        self.partial_run = os.getenv("PARTIAL_RUN_MUSIC_NAMES", None)
-        self.results = []
-        self.current_music_name = None
-
-    def get_all_music_names(self):
-        valid_music_names = None
-        if self.partial_run:
-            valid_music_names = self.partial_run.split(',')
-        music_names = []
-        for folder in listdir(self.test_data_path):
-            if not isfile(join(self.test_data_path, folder)):
-                if valid_music_names is None or folder in valid_music_names:
-                    music_names.append(folder)
-        return music_names
-
-    def get_music_folder_location(self, music_name):
-        return join(self.test_data_path, music_name)
-
-    def get_music_file_location(self, music_name, instrument=None):
-        if instrument is None:
-            instrument = "mixture"
-            return join(self.test_data_path, music_name, instrument + ".wav")
-
-        if not os.path.exists(self.results_data_path):
-            os.makedirs(self.results_data_path)
-        if not os.path.exists(join(self.results_data_path, music_name)):
-            os.makedirs(join(self.results_data_path, music_name))
-
-        return join(self.results_data_path, music_name, instrument + ".wav")
-
-    def scoring(self):
-        """
-        Add scoring function in the starter kit for participant's reference
-        """
-        def sdr(references, estimates):
-            # compute SDR for one song
-            delta = 1e-7  # avoid numerical errors
-            num = np.sum(np.square(references), axis=(1, 2))
-            den = np.sum(np.square(references - estimates), axis=(1, 2))
-            num += delta
-            den += delta
-            return 10 * np.log10(num  / den)
-
-        music_names = self.get_all_music_names()
-        instruments = ["bass", "drums", "other", "vocals"]
-        scores = {}
-        for music_name in music_names:
-            print("Evaluating for: %s" % music_name)
-            scores[music_name] = {}
-            references = []
-            estimates = []
-            for instrument in instruments:
-                reference_file = join(self.test_data_path, music_name, instrument + ".wav")
-                estimate_file = self.get_music_file_location(music_name, instrument)
-                reference, _ = sf.read(reference_file)
-                estimate, _ = sf.read(estimate_file)
-                references.append(reference)
-                estimates.append(estimate)
-            references = np.stack(references)
-            estimates = np.stack(estimates)
-            references = references.astype(np.float32)
-            estimates = estimates.astype(np.float32)
-            song_score = sdr(references, estimates).tolist()
-            scores[music_name]["sdr_bass"] = song_score[0]
-            scores[music_name]["sdr_drums"] = song_score[1]
-            scores[music_name]["sdr_other"] = song_score[2]
-            scores[music_name]["sdr_vocals"] = song_score[3]
-            scores[music_name]["sdr"] = np.mean(song_score)
-        return scores
-
-
-    def evaluation(self):
-        """
-        Admin function: Runs the whole evaluation
-        """
-        aicrowd_helpers.execution_start()
-        try:
-            with time_limit(self.inference_setup_timeout):
-                self.prediction_setup()
-        except NotImplementedError:
-            print("prediction_setup doesn't exist for this run, skipping...")
-
-        aicrowd_helpers.execution_running()
-
-        music_names = self.get_all_music_names()
-
-        for music_name in music_names:
-            with time_limit(self.inference_per_music_timeout):
-                self.prediction(mixture_file_path=self.get_music_file_location(music_name),
-                                bass_file_path=self.get_music_file_location(music_name, "bass"),
-                                drums_file_path=self.get_music_file_location(music_name, "drums"),
-                                other_file_path=self.get_music_file_location(music_name, "other"),
-                                vocals_file_path=self.get_music_file_location(music_name, "vocals"),
-                )
-                
-            if not self.verify_results(music_name):
-                raise Exception("verification failed, demixed files not found.")
-        aicrowd_helpers.execution_success()
-
-    def run(self):
-        try:
-            self.evaluation()
-        except Exception as e:
-            error = traceback.format_exc()
-            print(error)
-            aicrowd_helpers.execution_error(error)
-            if not aicrowd_helpers.is_grading():
-                raise e
-
-    def prediction_setup(self):
-        """
-        You can do any preprocessing required for your codebase here : 
-            like loading your models into memory, etc.
-        """
-        raise NotImplementedError
-
-    def prediction(self, music_name, mixture_file_path, bass_file_path, drums_file_path, other_file_path,
-                   vocals_file_path):
-        """
-        This function will be called for all the flight during the evaluation.
-        NOTE: In case you want to load your model, please do so in `inference_setup` function.
-        """
-        raise NotImplementedError
-
-    def verify_results(self, music_name):
-        """
-        This function will be called to check all the files exist and other verification needed.
-        (like length of the wav files)
-        """
-        valid = True
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "vocals"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "bass"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "drums"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "other"))
-        return valid
--- a/nethack_baselines/other_examples/random_rollouts.py
+++ b/nethack_baselines/other_examples/random_rollouts.py
+# This is intended as an example of a barebones submission
+# Do not that not using BatchedEnv not meet the timeout requirement.
+
+import aicrowd_gym
+import nle
+
+def main():
+    """
+    This function will be called for training phase.
+    """
+
+    # This allows us to limit the features of the environment 
+    # that we don't want participants to use during the submission
+    env = aicrowd_gym.make("NetHackChallenge-v0") 
+
+    env.reset()
+    done = False
+    episode_count = 0
+    while episode_count < 200:
+        _, _, done, _ = env.step(env.action_space.sample())
+        if done:
+            episode_count += 1
+            print(episode_count)
+            env.reset()
+
+if __name__ == "__main__":
+    main()
--- a/nethack_baselines/rllib/README.md
+++ b/nethack_baselines/rllib/README.md
+Placeholder
No results found