Compare revisions

e2d2a0a3 · d0d91d2b · dfbddb45 · 80a228d2 · f72602ab · 724b51ea
--- a/.gitattributes
+++ b/.gitattributes
 *.wav filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
+submission filter=lfs diff=lfs merge=lfs -text
+submission/* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@@ -130,3 +130,5 @@ dmypy.json
 # Pyre type checker
 .pyre/

+nle_data/
+test_batched_env.py
--- a/Dockerfile
+++ b/Dockerfile
-FROM nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04
-
+FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 AS nhc-dev
 ENV DEBIAN_FRONTEND=noninteractive

 COPY apt.txt /tmp/apt.txt
@@ -29,21 +28,32 @@ RUN adduser --disabled-password \
    --uid ${HOST_UID} \
    ${USER_NAME}

-USER ${USER_NAME}
-WORKDIR ${HOME_DIR}
-
-ENV CONDA_DIR ${HOME_DIR}/.conda
+WORKDIR /opt/

+# Install anaconda
+ENV CONDA_DIR /opt/conda
 RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_4.9.2-Linux-x86_64.sh \
 && bash miniconda.sh -b -p ${CONDA_DIR} \
 && . ${CONDA_DIR}/etc/profile.d/conda.sh \
 && conda clean -y -a \
 && rm -rf miniconda.sh
-
 ENV PATH ${CONDA_DIR}/bin:${PATH}

-RUN conda install cmake -y && conda clean -y -a
+# Install TorchBeast
+RUN conda install cmake cudatoolkit=10.2 pytorch -y -c pytorch -c nvidia && conda clean -y -a
+RUN git clone https://github.com/facebookresearch/torchbeast.git --recursive
+WORKDIR /opt/torchbeast
+RUN pip install -r requirements.txt
+RUN pip install ./nest
+RUN python setup.py install
+
+# Install AICrowd items
+WORKDIR ${HOME_DIR}
 COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt
 RUN pip install -r requirements.txt --no-cache-dir

-COPY --chown=1001:1001 . ${HOME_DIR}
\ No newline at end of file
+# SUBMISSION IMAGE - change user and copy files
+FROM nhc-dev AS nhc-submit
+COPY --chown=1001:1001 . ${HOME_DIR}
+USER ${USER_NAME}
+
--- a/README.md
+++ b/README.md
--- a/agents/base.py
+++ b/agents/base.py
+from abc import ABC, abstractmethod
+
+class BatchedAgent(ABC):
+    """
+    This is an abstract base clase for you to load your models and perform rollouts on a
+    batched set of environments.
+    """
+    def __init__(self, num_envs: int , num_actions: int):
+        self.num_envs = num_envs
+        self.num_actions = num_actions
+
+    @abstractmethod
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+
+        :param observations: a list of observations 
+        :param rewards: a list of rewards 
+        :param dones: a list of dones 
+        :param observations: a list of infos
+        
+        returns: an iterable of actions 
+        """
+        pass
+
--- a/agents/custom_agent.py
+++ b/agents/custom_agent.py
+import numpy as np
+
+from agents.base import BatchedAgent
+
+
+class CustomAgent(BatchedAgent):
+    """A example agent... that simple acts randomly. Adapt to your needs!"""
+
+    def __init__(self, num_envs, num_actions):
+        """Set up and load you model here"""
+        super().__init__(num_envs, num_actions)
+        self.seeded_state = np.random.RandomState(42)
+
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+
+        Each argument is a list of the respective gym output.
+        Returns an iterable of actions.
+        """
+        actions = self.seeded_state.randint(self.num_actions, size=self.num_envs)
+        return actions
--- a/agents/torchbeast_agent.py
+++ b/agents/torchbeast_agent.py
+import torch
+import numpy as np
+
+from agents.base import BatchedAgent
+
+from nethack_baselines.torchbeast.models import load_model
+
+MODEL_DIR = "./saved_models/torchbeast/pretrained_0.5B"
+
+
+class TorchBeastAgent(BatchedAgent):
+    """
+    A BatchedAgent using the TorchBeast Model
+    """
+
+    def __init__(self, num_envs, num_actions):
+        super().__init__(num_envs, num_actions)
+        self.model_dir = MODEL_DIR
+        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        self.model = load_model(MODEL_DIR, self.device)
+        print(f'Using Model In: {self.model_dir}, Device: {self.device}')
+
+        self.core_state = [
+            m.to(self.device) for m in self.model.initial_state(batch_size=num_envs)
+        ]
+
+    def batch_inputs(self, observations, dones):
+        """
+        Convert lists of observations, rewards, dones, infos to tensors for TorchBeast.
+
+        TorchBeast models:
+            * take tensors in the form: [T, B, ...]: B:= batch, T:= unroll (=1)
+            * take "done" as a BOOLEAN observation
+        """
+        states = list(observations[0].keys())
+        obs = {k: [] for k in states}
+
+        # Unpack List[Dicts] -> Dict[Lists]
+        for o in observations:
+            for k, t in o.items():
+                obs[k].append(t)
+
+        # Convert to Tensor, Add Unroll Dim (=1), Move to GPU
+        for k in states:
+            obs[k] = torch.Tensor(np.stack(obs[k])[None, ...]).to(self.device)
+        obs["done"] = torch.Tensor(np.array(dones)[None, ...]).bool().to(self.device)
+        return obs, dones
+
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+
+        Torchbeast models:
+            * take the core (LSTM) state as input, and return as output
+            * return outputs as a dict of "action", "policy_logits", "baseline"
+        """
+        observations, dones = self.batch_inputs(observations, dones)
+
+        with torch.no_grad():
+            outputs, self.core_state = self.model(observations, self.core_state)
+
+        return outputs["action"].cpu().numpy()[0]
--- a/aicrowd.json
+++ b/aicrowd.json
 {
-  "challenge_id": "neurips-2021-nethack-challenge",
+  "challenge_id": "neurips-2021-the-nethack-challenge",
  "authors": [
    "aicrowd-bot"
  ],
-  "external_dataset_used": false
+  "description": "(optional) description about your awesome agent",
+  "gpu": true
 }
+
--- a/apt.txt
+++ b/apt.txt
@@ -14,4 +14,6 @@ xvfb
 ffmpeg
 freeglut3-dev
 gcc
-g++
\ No newline at end of file
+g++
+ninja-build
+vim
--- a/docs/SUBMISSION.md
+++ b/docs/SUBMISSION.md
@@ -15,6 +15,12 @@ You can modify the existing `rollout.py` OR copy it (to say `your_code.py`) and
 Before you submit make sure that you have saved your models, which are needed by your inference code.
 In case your files are larger in size you can use `git-lfs` to upload them. More details [here](https://discourse.aicrowd.com/t/how-to-upload-large-files-size-to-your-submission/2304).

+## Testing your model locally
+
+The best way to test your model is to run your submission locally.
+
+You can do this naively by simply running `python rollout.py` or you can simulate the extra timeout wrappers that AIcrowd will implement by using `python test_submission.py`.
+
 ## How to submit a trained model!

 To make a submission, you will have to create a **private** repository on [https://gitlab.aicrowd.com/](https://gitlab.aicrowd.com/).

--- a/envs/__init__.py
+++ b/envs/__init__.py
+from gym.envs.registration import register
+
+register('NetHackChallengeBatched-v0', 
+            entry_point='nle_batched_env.NetHackChallengeBatchedEnv')
--- a/envs/batched_env.py
+++ b/envs/batched_env.py
+import aicrowd_gym
+import numpy as np
+
+from collections.abc import Iterable
+
+class BatchedEnv:
+    def __init__(self, env_make_fn, num_envs=32):
+        """
+        Creates multiple copies of the environment with the same env_make_fn function
+        """
+        self.num_envs = num_envs
+        self.envs = [env_make_fn() for _ in range(self.num_envs)]
+        self.num_actions = self.envs[0].action_space.n
+
+    def batch_step(self, actions):
+        """
+        Applies each action to each env in the same order as self.envs
+        Actions should be iterable and have the same length as self.envs
+        Returns lists of obsevations, rewards, dones, infos
+        """
+        assert isinstance(
+            actions, Iterable), f"actions with type {type(actions)} is not iterable"
+        assert len(
+            actions) == self.num_envs, f"actions has length {len(actions)} which different from num_envs"
+
+        observations, rewards, dones, infos = [], [], [], []
+        for env, a in zip(self.envs, actions):
+            observation, reward, done, info = env.step(a)
+            if done:
+                observation = env.reset()
+            observations.append(observation)
+            rewards.append(reward)
+            dones.append(done)
+            infos.append(info)
+
+        return observations, rewards, dones, infos
+
+    def batch_reset(self):
+        """
+        Resets all the environments in self.envs
+        """
+        observation = [env.reset() for env in self.envs]
+        return observation
+
+
+if __name__ == '__main__':
+
+    num_envs = 4
+    batched_env = BatchedEnv(
+        env_make_fn=lambda:aicrowd_gym.make('NetHackChallenge-v0'), 
+        num_envs=4
+    )
+    
+    observations = batched_env.batch_reset()
+    num_actions = batched_env.envs[0].action_space.n
+    for _ in range(50):
+        actions = np.random.randint(num_actions, size=num_envs)
+        observations, rewards, dones, infos = batched_env.batch_step(actions)
+        for done_idx in np.where(dones)[0]:
+            observations[done_idx] = batched_env.single_env_reset(done_idx) 
--- a/envs/wrappers.py
+++ b/envs/wrappers.py
+import aicrowd_gym
+import nle
+from gym.wrappers import TimeLimit
+
+
+def create_env():
+    """This is the environment that will be assessed by AIcrowd."""
+    return aicrowd_gym.make("NetHackChallenge-v0")
+
+
+def addtimelimitwrapper_fn():
+    """
+    An example of how to add wrappers to the nethack_make_fn
+    Should return a gym env which wraps the nethack gym env
+    """
+    env = create_env()
+    env = TimeLimit(env, max_episode_steps=10_000_000)
+    return env
\ No newline at end of file
--- a/evaluator/__init__.py
+++ b/evaluator/__init__.py
--- a/evaluator/aicrowd_helpers.py
+++ b/evaluator/aicrowd_helpers.py
-#!/usr/bin/env python
-import aicrowd_api
-import os
-
-########################################################################
-# Instatiate Event Notifier
-########################################################################
-aicrowd_events = aicrowd_api.events.AIcrowdEvents()
-
-
-def execution_start():
-    ########################################################################
-    # Register Evaluation Start event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_started",
-                payload={
-                    "event_type": "airborne_detection:execution_started"
-                    }
-                )
-
-def execution_running():
-    ########################################################################
-    # Register Evaluation Start event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_progress",
-                payload={
-                    "event_type": "airborne_detection:execution_progress",
-                    "progress": 0.0
-                    }
-                )
-
-
-def execution_progress(progress):
-    ########################################################################
-    # Register Evaluation Progress event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_progress",
-                payload={
-                    "event_type": "airborne_detection:execution_progress",
-                    "progress" : progress
-                    }
-                )
-
-def execution_success():
-    ########################################################################
-    # Register Evaluation Complete event
-    ########################################################################
-    predictions_output_path = os.getenv("PREDICTIONS_OUTPUT_PATH", False)
-
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_SUCCESS,
-                message="execution_success",
-                payload={
-                    "event_type": "airborne_detection:execution_success",
-                    "predictions_output_path" : predictions_output_path
-                    },
-                blocking=True
-                )
-
-def execution_error(error):
-    ########################################################################
-    # Register Evaluation Complete event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_ERROR,
-                message="execution_error",
-                payload={ #Arbitrary Payload
-                    "event_type": "airborne_detection:execution_error",
-                    "error" : error
-                    },
-                blocking=True
-                )
-
-def is_grading():
-    return os.getenv("AICROWD_IS_GRADING", False)
--- a/evaluator/music_demixing.py
+++ b/evaluator/music_demixing.py
-######################################################################################
-### This is a read-only file to allow participants to run their code locally.      ###
-### It will be over-writter during the evaluation, Please do not make any changes  ###
-### to this file.                                                                  ###
-######################################################################################
-
-import traceback
-import os
-import signal
-from contextlib import contextmanager
-from os import listdir
-from os.path import isfile, join
-
-import soundfile as sf
-import numpy as np
-from evaluator import aicrowd_helpers
-
-
-class TimeoutException(Exception): pass
-
-
-@contextmanager
-def time_limit(seconds):
-    def signal_handler(signum, frame):
-        raise TimeoutException("Prediction timed out!")
-
-    signal.signal(signal.SIGALRM, signal_handler)
-    signal.alarm(seconds)
-    try:
-        yield
-    finally:
-        signal.alarm(0)
-
-
-class MusicDemixingPredictor:
-    def __init__(self):
-        self.test_data_path = os.getenv("TEST_DATASET_PATH", os.getcwd() + "/data/test/")
-        self.results_data_path = os.getenv("RESULTS_DATASET_PATH", os.getcwd() + "/data/results/")
-        self.inference_setup_timeout = int(os.getenv("INFERENCE_SETUP_TIMEOUT_SECONDS", "900"))
-        self.inference_per_music_timeout = int(os.getenv("INFERENCE_PER_MUSIC_TIMEOUT_SECONDS", "240"))
-        self.partial_run = os.getenv("PARTIAL_RUN_MUSIC_NAMES", None)
-        self.results = []
-        self.current_music_name = None
-
-    def get_all_music_names(self):
-        valid_music_names = None
-        if self.partial_run:
-            valid_music_names = self.partial_run.split(',')
-        music_names = []
-        for folder in listdir(self.test_data_path):
-            if not isfile(join(self.test_data_path, folder)):
-                if valid_music_names is None or folder in valid_music_names:
-                    music_names.append(folder)
-        return music_names
-
-    def get_music_folder_location(self, music_name):
-        return join(self.test_data_path, music_name)
-
-    def get_music_file_location(self, music_name, instrument=None):
-        if instrument is None:
-            instrument = "mixture"
-            return join(self.test_data_path, music_name, instrument + ".wav")
-
-        if not os.path.exists(self.results_data_path):
-            os.makedirs(self.results_data_path)
-        if not os.path.exists(join(self.results_data_path, music_name)):
-            os.makedirs(join(self.results_data_path, music_name))
-
-        return join(self.results_data_path, music_name, instrument + ".wav")
-
-    def scoring(self):
-        """
-        Add scoring function in the starter kit for participant's reference
-        """
-        def sdr(references, estimates):
-            # compute SDR for one song
-            delta = 1e-7  # avoid numerical errors
-            num = np.sum(np.square(references), axis=(1, 2))
-            den = np.sum(np.square(references - estimates), axis=(1, 2))
-            num += delta
-            den += delta
-            return 10 * np.log10(num  / den)
-
-        music_names = self.get_all_music_names()
-        instruments = ["bass", "drums", "other", "vocals"]
-        scores = {}
-        for music_name in music_names:
-            print("Evaluating for: %s" % music_name)
-            scores[music_name] = {}
-            references = []
-            estimates = []
-            for instrument in instruments:
-                reference_file = join(self.test_data_path, music_name, instrument + ".wav")
-                estimate_file = self.get_music_file_location(music_name, instrument)
-                reference, _ = sf.read(reference_file)
-                estimate, _ = sf.read(estimate_file)
-                references.append(reference)
-                estimates.append(estimate)
-            references = np.stack(references)
-            estimates = np.stack(estimates)
-            references = references.astype(np.float32)
-            estimates = estimates.astype(np.float32)
-            song_score = sdr(references, estimates).tolist()
-            scores[music_name]["sdr_bass"] = song_score[0]
-            scores[music_name]["sdr_drums"] = song_score[1]
-            scores[music_name]["sdr_other"] = song_score[2]
-            scores[music_name]["sdr_vocals"] = song_score[3]
-            scores[music_name]["sdr"] = np.mean(song_score)
-        return scores
-
-
-    def evaluation(self):
-        """
-        Admin function: Runs the whole evaluation
-        """
-        aicrowd_helpers.execution_start()
-        try:
-            with time_limit(self.inference_setup_timeout):
-                self.prediction_setup()
-        except NotImplementedError:
-            print("prediction_setup doesn't exist for this run, skipping...")
-
-        aicrowd_helpers.execution_running()
-
-        music_names = self.get_all_music_names()
-
-        for music_name in music_names:
-            with time_limit(self.inference_per_music_timeout):
-                self.prediction(mixture_file_path=self.get_music_file_location(music_name),
-                                bass_file_path=self.get_music_file_location(music_name, "bass"),
-                                drums_file_path=self.get_music_file_location(music_name, "drums"),
-                                other_file_path=self.get_music_file_location(music_name, "other"),
-                                vocals_file_path=self.get_music_file_location(music_name, "vocals"),
-                )
-                
-            if not self.verify_results(music_name):
-                raise Exception("verification failed, demixed files not found.")
-        aicrowd_helpers.execution_success()
-
-    def run(self):
-        try:
-            self.evaluation()
-        except Exception as e:
-            error = traceback.format_exc()
-            print(error)
-            aicrowd_helpers.execution_error(error)
-            if not aicrowd_helpers.is_grading():
-                raise e
-
-    def prediction_setup(self):
-        """
-        You can do any preprocessing required for your codebase here : 
-            like loading your models into memory, etc.
-        """
-        raise NotImplementedError
-
-    def prediction(self, music_name, mixture_file_path, bass_file_path, drums_file_path, other_file_path,
-                   vocals_file_path):
-        """
-        This function will be called for all the flight during the evaluation.
-        NOTE: In case you want to load your model, please do so in `inference_setup` function.
-        """
-        raise NotImplementedError
-
-    def verify_results(self, music_name):
-        """
-        This function will be called to check all the files exist and other verification needed.
-        (like length of the wav files)
-        """
-        valid = True
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "vocals"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "bass"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "drums"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "other"))
-        return valid
--- a/nethack_baselines/other_examples/random_rollouts.py
+++ b/nethack_baselines/other_examples/random_rollouts.py
+# This is intended as an example of a barebones submission
+# Do not that not using BatchedEnv not meet the timeout requirement.
+
+import aicrowd_gym
+import nle
+
+def main():
+    """
+    This function will be called for training phase.
+    """
+
+    # This allows us to limit the features of the environment 
+    # that we don't want participants to use during the submission
+    env = aicrowd_gym.make("NetHackChallenge-v0") 
+
+    env.reset()
+    done = False
+    episode_count = 0
+    while episode_count < 200:
+        _, _, done, _ = env.step(env.action_space.sample())
+        if done:
+            episode_count += 1
+            print(episode_count)
+            env.reset()
+
+if __name__ == "__main__":
+    main()
--- a/nethack_baselines/rllib/README.md
+++ b/nethack_baselines/rllib/README.md
+Placeholder
--- a/nethack_baselines/torchbeast/README.md
+++ b/nethack_baselines/torchbeast/README.md
+# TorchBeast NetHackChallenge Benchmark
+
+This is a baseline model for the NetHack Challenge based on
+[TorchBeast](https://github.com/facebookresearch/torchbeast) - FAIR's
+implementation of IMPALA for PyTorch.
+
+It comes with all the code you need to train, run and submit a model
+that is based on the results published in the original NLE paper.
+
+This implementation can run with 2 GPUS (one for acting and one for
+learning), and runs many simultaneous environments with dynamic
+batching. Currently it has been configured to run with only 1 GPU.
+
+
+## Installation 
+
+**[Native Installation]**
+
+To get this running you'll need to follow the TorchBeast installation instructions for PolyBeast from the [TorchBeast repo](https://github.com/facebookresearch/torchbeast#faster-version-polybeast).
+
+**[Docker Installation]**
+
+You can fast track the installation of PolyBeast, by running the competitions own Dockerfile. Prebuilt images are also hosted on the Docker Hub. These commands should open an image that allows you run the baseline
+
+**To Run Existing Docker Image**
+
+`docker pull fairnle/challenge:dev`
+
+```docker run -it -v `pwd`:/home/aicrowd --gpus='all' fairnle/challenge:dev```
+
+**To Build Your Own Image**
+
+*Dev Image*  - runs with root user, doesn't copy all your files across into image
+
+`docker build -t competition --target nhc-dev .`
+
+*or Submission Image* - runs with aicrowd user, copies across all your files into image
+
+`docker build -t competition --target nhc-submit .`
+
+*Run Image*
+
+```docker run -it -v `pwd`:/home/aicrowd --gpus='all' competition```
+
+
+
+## Running The Baseline
+
+Once installed, in this directory run:
+
+`python polyhydra.py`
+
+To change parameters, edit `config.yaml`, or to override parameters
+from the command-line run:
+
+`python polyhydra.py embedding_dim=16`
+
+The training will save checkpoints to a new directory (`outputs`) and
+should the environments create any outputs, they will be saved to
+`nle_data` - (by default recordings of episodes are switched off to
+save space).
+
+The default polybeast runs on 2 GPUs, one for the learner and one for
+the actors. However, with only one GPU you can run still run
+polybeast - just override the `actor_device` argument:
+
+`python polyhydra.py actor_device=cpu`
+
+NOTE: if you get a "Too many open files" error, try: `ulimit -Sn 10000`.
+
+## Making a submission
+
+In the output directory of your trained model, you should find two files, `checkpoint.tar` and `config.yaml`. Add both of them to your submission repo. Then change the `MODEL_DIR` variable in `agents/torchbeast_agent.py` to point to the directory where these files are located. And finally, simply set the `AGENT` in `submission_config.py` to be 'TorchBeastAgent' so that your torchbeast agent variation is used for the submission.
+
+After that, follow [these instructions](/docs/SUBMISSION.md) to submit your model to AIcrowd!
+
+## Repo Structure
+
+```
+baselines/torchbeast
+├── core/
+├── models/                #  <- Models HERE
+├── util/
+├── config.yaml            #  <- Flags HERE
+├── polybeast_env.py       #  <- Training Env HERE
+├── polybeast_learner.py   #  <- Training Loop HERE
+└── polyhydra.py           #  <- main() HERE
+
+```
+
+The structure is simple, compartmentalising the environment setup,
+training loop and models in to different files. You can tweak any of
+these separately, and add parameters to the flags (which are passed
+around).
+
+
+## About the Model
+
+This model (`BaselineNet`) we provide is simple and all in
+`models/baseline.py`.
+
+* It encodes the dungeon into a fixed-size representation
+  (`GlyphEncoder`)
+* It encodes the topline message into a fixed-size representation
+  (`MessageEncoder`)
+* It encodes the bottom line statistics (eg armour class, health) into
+  a fixed-size representation (`BLStatsEncoder`)
+* It concatenates all these outputs into a fixed size, runs this
+  through a fully connected layer, and into an LSTM.
+* The outputs of the LSTM go through policy and baseline heads (since
+  this is an actor-critic alorithm)
+
+As you can see there is a lot of data to play with in this game, and
+plenty to try, both in modelling and in the learning algorithms used.
+
+
+## Improvement Ideas
+
+*Here are some ideas we haven't tried yet, but might be easy places to start. Happy tinkering!*
+
+
+### Model Improvements (`baseline.py`)
+
+* The model is currently not using the terminal observations
+  (`tty_chars`, `tty_colors`, `tty_cursor`), so it has no idea about
+  menus - could this we make use of this somehow?
+* The bottom-line stats are very informative, but very simply encoded
+  in `BLStatsEncoder` - is there a better way to do this?
+* The `GlyphEncoder` builds a embedding for the glyphs, and then takes
+  a crop of these centered around the player icon coordinates
+  (`@`). Should the crop be reusing these the same embedding matrix?
+* The current model constrains the vast action space to a smaller
+  subset of actions. Is it too constrained? Or not constrained enough?
+
+###  Environment Improvements (`polybeast_env.py`)
+
+* Opening menus (such as when spellcasting) do not advance the in game
+  timer. However, models can also get stuck in menus as you have to
+  learn what buttons to press to close the menu. Can changing the
+  penalty for not advancing the in-game timer improve the result?
+* The NetHackChallenge assesses the score on random character
+  assignments. Might it be easier to learn on just a few of these at
+  the beginning of training?
+
+### Algorithm/Optimisation Improvements (`polybeast_learner.py`)
+
+* Can we add some intrinsic rewards to help our agents learn?
+* Should we add penalties for disincentivise pathological behaviour we
+  observe?
+* Can we improve the model by using a different optimizer?
--- a/nethack_baselines/torchbeast/config.yaml
+++ b/nethack_baselines/torchbeast/config.yaml
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+defaults:
+- hydra/job_logging: colorlog
+- hydra/hydra_logging: colorlog
+# - hydra/launcher: submitit_slurm
+
+# # To Be Used With hydra submitit_slurm if you have SLURM cluster
+# # pip install hydra-core hydra_colorlog
+# # can set these on the commandline too, e.g. `hydra.launcher.partition=dev`
+# hydra:
+#   launcher:
+#     timeout_min: 4300
+#     cpus_per_task: 20  
+#     gpus_per_node: 2
+#     tasks_per_node: 1
+#     mem_gb: 20
+#     nodes: 1
+#     partition: dev
+#     comment: null  
+#     max_num_timeout: 5  # will requeue on timeout or preemption
+
+
+name: null  # can use this to have multiple runs with same params, eg name=1,2,3,4,5
+
+## WANDB settings
+wandb: false                 # Enable wandb logging.
+project: nethack_challenge   # The wandb project name.
+entity: user1                # The wandb user to log to.
+group: group1                # The wandb group for the run.
+
+# POLYBEAST ENV settings
+mock: false                  # Use mock environment instead of NetHack.
+single_ttyrec: true          # Record ttyrec only for actor 0.
+num_seeds: 0                 # If larger than 0, samples fixed number of environment seeds to be used.'
+write_profiler_trace: false  # Collect and write a profiler trace for chrome://tracing/.
+fn_penalty_step: constant    # Function to accumulate penalty.
+penalty_time: 0.0            # Penalty per time step in the episode.
+penalty_step: -0.01          # Penalty per step in the episode.
+reward_lose: 0               # Reward for losing (dying before finding the staircase).
+reward_win: 100              # Reward for winning (finding the staircase).
+state_counter: none          # Method for counting state visits. Default none.
+character: 'mon-hum-neu-mal' # Specification of the NetHack character.
+                              ## typical characters we use
+                                # 'mon-hum-neu-mal'
+                                # 'val-dwa-law-fem'
+                                # 'wiz-elf-cha-mal'
+                                # 'tou-hum-neu-fem'
+                                # '@'   # random (used in Challenge assessment)
+
+# RUN settings.
+mode: train                  # Training or test mode.
+env: challenge               # Name of Gym environment to create.
+                             # # env (task) names: challenge, staircase, pet, 
+                             #     eat, gold, score, scout, oracle
+
+# TRAINING settings.
+num_actors: 256              # Number of actors.
+total_steps: 1e9             # Total environment steps to train for. Will be cast to int.
+batch_size: 32               # Learner batch size.
+unroll_length: 80            # The unroll length (time dimension).
+num_learner_threads: 1       # Number learner threads.
+num_inference_threads: 1     # Number inference threads.
+disable_cuda: false          # Disable CUDA.
+learner_device: cuda:0       # Set learner device.
+actor_device: cuda:0         # Set actor device.
+
+# OPTIMIZER settings. (RMS Prop)
+learning_rate: 0.0002        # Learning rate.
+grad_norm_clipping: 40       # Global gradient norm clip.
+alpha: 0.99                  # RMSProp smoothing constant.
+momentum: 0                  # RMSProp momentum.
+epsilon: 0.000001            # RMSProp epsilon.
+
+# LOSS settings.
+entropy_cost: 0.001          # Entropy cost/multiplier.
+baseline_cost: 0.5           # Baseline cost/multiplier.
+discounting: 0.999           # Discounting factor.
+normalize_reward: true       # Normalizes reward by dividing by running stdev from mean.
+
+# MODEL settings.
+model: baseline              # Name of model to build (see models/__init__.py).
+use_lstm: true               # Use LSTM in agent model.
+hidden_dim: 256              # Size of hidden representations.
+embedding_dim: 64            # Size of glyph embeddings.
+layers: 5                    # Number of ConvNet Layers for Glyph Model
+crop_dim: 9                  # Size of crop (c x c)
+use_index_select: true       # Whether to use index_select instead of embedding lookup (for speed reasons).
+restrict_action_space: True  # Use a restricted ACTION SPACE (only nethack.USEFUL_ACTIONS)
+
+msg:                      
+  hidden_dim: 64             # Hidden dimension for message encoder.
+  embedding_dim: 32          # Embedding dimension for characters in message encoder.
+
+# TEST settings.    
+load_dir: null               # Path to load a model from for testing
No results found