Compare revisions

823c2139 · 7e90f877 · 297b01c6 · 23fa11ee · dd8cb9a5 · c9068b25
--- a/.gitattributes
+++ b/.gitattributes
 *.wav filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
+submission filter=lfs diff=lfs merge=lfs -text
+submission/* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@@ -130,3 +130,5 @@ dmypy.json
 # Pyre type checker
 .pyre/
+nle_data/
+test_batched_env.py
--- a/Dockerfile
+++ b/Dockerfile
+FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 AS nhc-dev
+ENV DEBIAN_FRONTEND=noninteractive
+COPY apt.txt /tmp/apt.txt
+RUN apt -qq update && apt -qq install -y --no-install-recommends `cat /tmp/apt.txt` \
+ && rm -rf /var/cache/*
+# Unicode support:
+RUN locale-gen en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LANGUAGE en_US:en
+ENV LC_ALL en_US.UTF-8
+# Create user home directory
+ENV USER_NAME aicrowd
+ENV HOME_DIR /home/$USER_NAME
+# Replace HOST_UID/HOST_GUID with your user / group id
+ENV HOST_UID 1001
+ENV HOST_GID 1001
+# Use bash as default shell, rather than sh
+ENV SHELL /bin/bash
+# Set up user
+RUN adduser --disabled-password \
+    --gecos "Default user" \
+    --uid ${HOST_UID} \
+    ${USER_NAME}
+WORKDIR /opt/
+# Install anaconda
+ENV CONDA_DIR /opt/conda
+RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_4.9.2-Linux-x86_64.sh \
+ && bash miniconda.sh -b -p ${CONDA_DIR} \
+ && . ${CONDA_DIR}/etc/profile.d/conda.sh \
+ && conda clean -y -a \
+ && rm -rf miniconda.sh
+ENV PATH ${CONDA_DIR}/bin:${PATH}
+# Install TorchBeast
+RUN conda install cmake cudatoolkit=10.2 pytorch -y -c pytorch -c nvidia && conda clean -y -a
+RUN git clone https://github.com/facebookresearch/torchbeast.git --recursive
+WORKDIR /opt/torchbeast
+RUN pip install -r requirements.txt
+RUN pip install ./nest
+RUN python setup.py install
+# Install AICrowd items
+WORKDIR ${HOME_DIR}
+COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt
+RUN pip install -r requirements.txt --no-cache-dir
+# SUBMISSION IMAGE - change user and copy files
+FROM nhc-dev AS nhc-submit
+COPY --chown=1001:1001 . ${HOME_DIR}
+USER ${USER_NAME}
--- a/README.md
+++ b/README.md
--- a/agents/base.py
+++ b/agents/base.py
+from abc import ABC, abstractmethod
+class BatchedAgent(ABC):
+    """
+    This is an abstract base clase for you to load your models and perform rollouts on a
+    batched set of environments.
+    """
+    def __init__(self, num_envs: int , num_actions: int):
+        self.num_envs = num_envs
+        self.num_actions = num_actions
+    @abstractmethod
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+        :param observations: a list of observations 
+        :param rewards: a list of rewards 
+        :param dones: a list of dones 
+        :param observations: a list of infos
+        returns: an iterable of actions 
+        """
+        pass
--- a/agents/custom_agent.py
+++ b/agents/custom_agent.py
+import numpy as np
+from agents.base import BatchedAgent
+class CustomAgent(BatchedAgent):
+    """A example agent... that simple acts randomly. Adapt to your needs!"""
+    def __init__(self, num_envs, num_actions):
+        """Set up and load you model here"""
+        super().__init__(num_envs, num_actions)
+        self.seeded_state = np.random.RandomState(42)
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+        Each argument is a list of the respective gym output.
+        Returns an iterable of actions.
+        """
+        actions = self.seeded_state.randint(self.num_actions, size=self.num_envs)
+        return actions
--- a/agents/torchbeast_agent.py
+++ b/agents/torchbeast_agent.py
+import torch
+import numpy as np
+from agents.base import BatchedAgent
+from nethack_baselines.torchbeast.models import load_model
+MODEL_DIR = "./saved_models/torchbeast/pretrained_0.5B"
+class TorchBeastAgent(BatchedAgent):
+    """
+    A BatchedAgent using the TorchBeast Model
+    """
+    def __init__(self, num_envs, num_actions):
+        super().__init__(num_envs, num_actions)
+        self.model_dir = MODEL_DIR
+        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        self.model = load_model(MODEL_DIR, self.device)
+        print(f'Using Model In: {self.model_dir}, Device: {self.device}')
+        self.core_state = [
+            m.to(self.device) for m in self.model.initial_state(batch_size=num_envs)
+        ]
+    def batch_inputs(self, observations, dones):
+        """
+        Convert lists of observations, rewards, dones, infos to tensors for TorchBeast.
+        TorchBeast models:
+            * take tensors in the form: [T, B, ...]: B:= batch, T:= unroll (=1)
+            * take "done" as a BOOLEAN observation
+        """
+        states = list(observations[0].keys())
+        obs = {k: [] for k in states}
+        # Unpack List[Dicts] -> Dict[Lists]
+        for o in observations:
+            for k, t in o.items():
+                obs[k].append(t)
+        # Convert to Tensor, Add Unroll Dim (=1), Move to GPU
+        for k in states:
+            obs[k] = torch.Tensor(np.stack(obs[k])[None, ...]).to(self.device)
+        obs["done"] = torch.Tensor(np.array(dones)[None, ...]).bool().to(self.device)
+        return obs, dones
+    def batched_step(self, observations, rewards, dones, infos):
+        """
+        Perform a batched step on lists of environment outputs.
+        Torchbeast models:
+            * take the core (LSTM) state as input, and return as output
+            * return outputs as a dict of "action", "policy_logits", "baseline"
+        """
+        observations, dones = self.batch_inputs(observations, dones)
+        with torch.no_grad():
+            outputs, self.core_state = self.model(observations, self.core_state)
+        return outputs["action"].cpu().numpy()[0]
--- a/aicrowd.json
+++ b/aicrowd.json
 {
-  "challenge_id": "evaluations-api-neurips-nethack",,
+  "challenge_id": "neurips-2021-the-nethack-challenge",
  "authors": [
    "aicrowd-bot"
  ],
-  "external_dataset_used": false
+  "description": "(optional) description about your awesome agent",
+  "gpu": true
 }
--- a/apt.txt
+++ b/apt.txt
 build-essential
 git
+flex
+bison
+libbz2-dev
+wget
+ca-certificates
+locales
+libglib2.0
+libsm6
+libxext6
+libxrender-dev
+xvfb
+ffmpeg
+freeglut3-dev
+gcc
+g++
+ninja-build
+vim
--- a/docs/RUNTIME.md
+++ b/docs/RUNTIME.md
@@ -14,8 +14,8 @@ Few of the most common ways are as follows:
    * **Create your new conda environment**
        ```sh
-        conda create --name music_demixing_challenge
+        conda create --name nle
-        conda activate music_demixing_challenge
+        conda activate nle
        ```
  * **Your code specific dependencies**

--- a/docs/SUBMISSION.md
+++ b/docs/SUBMISSION.md
@@ -7,9 +7,7 @@ This file will help you in making your first submission.
 The evaluator will execute `run.sh` for generating predictions, so please remember to include it in your submission!
-The inline documentation of `test.py` will guide you with interfacing with the codebase properly. You can check TODOs inside it to learn about the functions you need to implement.
+You can modify the existing `rollout.py` OR copy it (to say `your_code.py`) and change it.
-You can modify the existing `test.py` OR copy it (to say `your_code.py`) and change it.
 ## IMPORTANT: Saving Models before submission!
@@ -17,6 +15,12 @@ You can modify the existing `test.py` OR copy it (to say `your_code.py`) and cha
 Before you submit make sure that you have saved your models, which are needed by your inference code.
 In case your files are larger in size you can use `git-lfs` to upload them. More details [here](https://discourse.aicrowd.com/t/how-to-upload-large-files-size-to-your-submission/2304).
+## Testing your model locally
+The best way to test your model is to run your submission locally.
+You can do this naively by simply running `python rollout.py` or you can simulate the extra timeout wrappers that AIcrowd will implement by using `python test_submission.py`.
 ## How to submit a trained model!
 To make a submission, you will have to create a **private** repository on [https://gitlab.aicrowd.com/](https://gitlab.aicrowd.com/).

--- a/envs/__init__.py
+++ b/envs/__init__.py
+from gym.envs.registration import register
+register('NetHackChallengeBatched-v0', 
+            entry_point='nle_batched_env.NetHackChallengeBatchedEnv')
--- a/envs/batched_env.py
+++ b/envs/batched_env.py
+import aicrowd_gym
+import numpy as np
+from collections.abc import Iterable
+class BatchedEnv:
+    def __init__(self, env_make_fn, num_envs=32):
+        """
+        Creates multiple copies of the environment with the same env_make_fn function
+        """
+        self.num_envs = num_envs
+        self.envs = [env_make_fn() for _ in range(self.num_envs)]
+        self.num_actions = self.envs[0].action_space.n
+    def batch_step(self, actions):
+        """
+        Applies each action to each env in the same order as self.envs
+        Actions should be iterable and have the same length as self.envs
+        Returns lists of obsevations, rewards, dones, infos
+        """
+        assert isinstance(
+            actions, Iterable), f"actions with type {type(actions)} is not iterable"
+        assert len(
+            actions) == self.num_envs, f"actions has length {len(actions)} which different from num_envs"
+        observations, rewards, dones, infos = [], [], [], []
+        for env, a in zip(self.envs, actions):
+            observation, reward, done, info = env.step(a)
+            if done:
+                observation = env.reset()
+            observations.append(observation)
+            rewards.append(reward)
+            dones.append(done)
+            infos.append(info)
+        return observations, rewards, dones, infos
+    def batch_reset(self):
+        """
+        Resets all the environments in self.envs
+        """
+        observation = [env.reset() for env in self.envs]
+        return observation
+if __name__ == '__main__':
+    num_envs = 4
+    batched_env = BatchedEnv(
+        env_make_fn=lambda:aicrowd_gym.make('NetHackChallenge-v0'), 
+        num_envs=4
+    )
+    observations = batched_env.batch_reset()
+    num_actions = batched_env.envs[0].action_space.n
+    for _ in range(50):
+        actions = np.random.randint(num_actions, size=num_envs)
+        observations, rewards, dones, infos = batched_env.batch_step(actions)
+        for done_idx in np.where(dones)[0]:
+            observations[done_idx] = batched_env.single_env_reset(done_idx) 
--- a/envs/wrappers.py
+++ b/envs/wrappers.py
+import aicrowd_gym
+import nle
+from gym.wrappers import TimeLimit
+def create_env():
+    """This is the environment that will be assessed by AIcrowd."""
+    return aicrowd_gym.make("NetHackChallenge-v0")
+def addtimelimitwrapper_fn():
+    """
+    An example of how to add wrappers to the nethack_make_fn
+    Should return a gym env which wraps the nethack gym env
+    """
+    env = create_env()
+    env = TimeLimit(env, max_episode_steps=10_000_000)
+    return env
\ No newline at end of file
--- a/evaluator/__init__.py
+++ b/evaluator/__init__.py
--- a/evaluator/aicrowd_helpers.py
+++ b/evaluator/aicrowd_helpers.py
-#!/usr/bin/env python
-import aicrowd_api
-import os
-########################################################################
-# Instatiate Event Notifier
-########################################################################
-aicrowd_events = aicrowd_api.events.AIcrowdEvents()
-def execution_start():
-    ########################################################################
-    # Register Evaluation Start event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_started",
-                payload={
-                    "event_type": "airborne_detection:execution_started"
-                    }
-                )
-def execution_running():
-    ########################################################################
-    # Register Evaluation Start event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_progress",
-                payload={
-                    "event_type": "airborne_detection:execution_progress",
-                    "progress": 0.0
-                    }
-                )
-def execution_progress(progress):
-    ########################################################################
-    # Register Evaluation Progress event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_INFO,
-                message="execution_progress",
-                payload={
-                    "event_type": "airborne_detection:execution_progress",
-                    "progress" : progress
-                    }
-                )
-def execution_success():
-    ########################################################################
-    # Register Evaluation Complete event
-    ########################################################################
-    predictions_output_path = os.getenv("PREDICTIONS_OUTPUT_PATH", False)
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_SUCCESS,
-                message="execution_success",
-                payload={
-                    "event_type": "airborne_detection:execution_success",
-                    "predictions_output_path" : predictions_output_path
-                    },
-                blocking=True
-                )
-def execution_error(error):
-    ########################################################################
-    # Register Evaluation Complete event
-    ########################################################################
-    aicrowd_events.register_event(
-                event_type=aicrowd_events.AICROWD_EVENT_ERROR,
-                message="execution_error",
-                payload={ #Arbitrary Payload
-                    "event_type": "airborne_detection:execution_error",
-                    "error" : error
-                    },
-                blocking=True
-                )
-def is_grading():
-    return os.getenv("AICROWD_IS_GRADING", False)
--- a/evaluator/music_demixing.py
+++ b/evaluator/music_demixing.py
-######################################################################################
-### This is a read-only file to allow participants to run their code locally.      ###
-### It will be over-writter during the evaluation, Please do not make any changes  ###
-### to this file.                                                                  ###
-######################################################################################
-import traceback
-import os
-import signal
-from contextlib import contextmanager
-from os import listdir
-from os.path import isfile, join
-import soundfile as sf
-import numpy as np
-from evaluator import aicrowd_helpers
-class TimeoutException(Exception): pass
-@contextmanager
-def time_limit(seconds):
-    def signal_handler(signum, frame):
-        raise TimeoutException("Prediction timed out!")
-    signal.signal(signal.SIGALRM, signal_handler)
-    signal.alarm(seconds)
-    try:
-        yield
-    finally:
-        signal.alarm(0)
-class MusicDemixingPredictor:
-    def __init__(self):
-        self.test_data_path = os.getenv("TEST_DATASET_PATH", os.getcwd() + "/data/test/")
-        self.results_data_path = os.getenv("RESULTS_DATASET_PATH", os.getcwd() + "/data/results/")
-        self.inference_setup_timeout = int(os.getenv("INFERENCE_SETUP_TIMEOUT_SECONDS", "900"))
-        self.inference_per_music_timeout = int(os.getenv("INFERENCE_PER_MUSIC_TIMEOUT_SECONDS", "240"))
-        self.partial_run = os.getenv("PARTIAL_RUN_MUSIC_NAMES", None)
-        self.results = []
-        self.current_music_name = None
-    def get_all_music_names(self):
-        valid_music_names = None
-        if self.partial_run:
-            valid_music_names = self.partial_run.split(',')
-        music_names = []
-        for folder in listdir(self.test_data_path):
-            if not isfile(join(self.test_data_path, folder)):
-                if valid_music_names is None or folder in valid_music_names:
-                    music_names.append(folder)
-        return music_names
-    def get_music_folder_location(self, music_name):
-        return join(self.test_data_path, music_name)
-    def get_music_file_location(self, music_name, instrument=None):
-        if instrument is None:
-            instrument = "mixture"
-            return join(self.test_data_path, music_name, instrument + ".wav")
-        if not os.path.exists(self.results_data_path):
-            os.makedirs(self.results_data_path)
-        if not os.path.exists(join(self.results_data_path, music_name)):
-            os.makedirs(join(self.results_data_path, music_name))
-        return join(self.results_data_path, music_name, instrument + ".wav")
-    def scoring(self):
-        """
-        Add scoring function in the starter kit for participant's reference
-        """
-        def sdr(references, estimates):
-            # compute SDR for one song
-            delta = 1e-7  # avoid numerical errors
-            num = np.sum(np.square(references), axis=(1, 2))
-            den = np.sum(np.square(references - estimates), axis=(1, 2))
-            num += delta
-            den += delta
-            return 10 * np.log10(num  / den)
-        music_names = self.get_all_music_names()
-        instruments = ["bass", "drums", "other", "vocals"]
-        scores = {}
-        for music_name in music_names:
-            print("Evaluating for: %s" % music_name)
-            scores[music_name] = {}
-            references = []
-            estimates = []
-            for instrument in instruments:
-                reference_file = join(self.test_data_path, music_name, instrument + ".wav")
-                estimate_file = self.get_music_file_location(music_name, instrument)
-                reference, _ = sf.read(reference_file)
-                estimate, _ = sf.read(estimate_file)
-                references.append(reference)
-                estimates.append(estimate)
-            references = np.stack(references)
-            estimates = np.stack(estimates)
-            references = references.astype(np.float32)
-            estimates = estimates.astype(np.float32)
-            song_score = sdr(references, estimates).tolist()
-            scores[music_name]["sdr_bass"] = song_score[0]
-            scores[music_name]["sdr_drums"] = song_score[1]
-            scores[music_name]["sdr_other"] = song_score[2]
-            scores[music_name]["sdr_vocals"] = song_score[3]
-            scores[music_name]["sdr"] = np.mean(song_score)
-        return scores
-    def evaluation(self):
-        """
-        Admin function: Runs the whole evaluation
-        """
-        aicrowd_helpers.execution_start()
-        try:
-            with time_limit(self.inference_setup_timeout):
-                self.prediction_setup()
-        except NotImplementedError:
-            print("prediction_setup doesn't exist for this run, skipping...")
-        aicrowd_helpers.execution_running()
-        music_names = self.get_all_music_names()
-        for music_name in music_names:
-            with time_limit(self.inference_per_music_timeout):
-                self.prediction(mixture_file_path=self.get_music_file_location(music_name),
-                                bass_file_path=self.get_music_file_location(music_name, "bass"),
-                                drums_file_path=self.get_music_file_location(music_name, "drums"),
-                                other_file_path=self.get_music_file_location(music_name, "other"),
-                                vocals_file_path=self.get_music_file_location(music_name, "vocals"),
-                )
-            if not self.verify_results(music_name):
-                raise Exception("verification failed, demixed files not found.")
-        aicrowd_helpers.execution_success()
-    def run(self):
-        try:
-            self.evaluation()
-        except Exception as e:
-            error = traceback.format_exc()
-            print(error)
-            aicrowd_helpers.execution_error(error)
-            if not aicrowd_helpers.is_grading():
-                raise e
-    def prediction_setup(self):
-        """
-        You can do any preprocessing required for your codebase here : 
-            like loading your models into memory, etc.
-        """
-        raise NotImplementedError
-    def prediction(self, music_name, mixture_file_path, bass_file_path, drums_file_path, other_file_path,
-                   vocals_file_path):
-        """
-        This function will be called for all the flight during the evaluation.
-        NOTE: In case you want to load your model, please do so in `inference_setup` function.
-        """
-        raise NotImplementedError
-    def verify_results(self, music_name):
-        """
-        This function will be called to check all the files exist and other verification needed.
-        (like length of the wav files)
-        """
-        valid = True
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "vocals"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "bass"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "drums"))
-        valid = valid and os.path.isfile(self.get_music_file_location(music_name, "other"))
-        return valid
--- a/nethack_baselines/other_examples/random_rollouts.py
+++ b/nethack_baselines/other_examples/random_rollouts.py
+# This is intended as an example of a barebones submission
+# Do not that not using BatchedEnv not meet the timeout requirement.
+import aicrowd_gym
+import nle
+def main():
+    """
+    This function will be called for training phase.
+    """
+    # This allows us to limit the features of the environment 
+    # that we don't want participants to use during the submission
+    env = aicrowd_gym.make("NetHackChallenge-v0") 
+    env.reset()
+    done = False
+    episode_count = 0
+    while episode_count < 200:
+        _, _, done, _ = env.step(env.action_space.sample())
+        if done:
+            episode_count += 1
+            print(episode_count)
+            env.reset()
+if __name__ == "__main__":
+    main()
--- a/nethack_baselines/rllib/README.md
+++ b/nethack_baselines/rllib/README.md
+Placeholder
--- a/nethack_baselines/torchbeast/README.md
+++ b/nethack_baselines/torchbeast/README.md
+# TorchBeast NetHackChallenge Benchmark
+This is a baseline model for the NetHack Challenge based on
+[TorchBeast](https://github.com/facebookresearch/torchbeast) - FAIR's
+implementation of IMPALA for PyTorch.
+It comes with all the code you need to train, run and submit a model
+that is based on the results published in the original NLE paper.
+This implementation can run with 2 GPUS (one for acting and one for
+learning), and runs many simultaneous environments with dynamic
+batching. Currently it has been configured to run with only 1 GPU.
+## Installation 
+**[Native Installation]**
+To get this running you'll need to follow the TorchBeast installation instructions for PolyBeast from the [TorchBeast repo](https://github.com/facebookresearch/torchbeast#faster-version-polybeast).
+**[Docker Installation]**
+You can fast track the installation of PolyBeast, by running the competitions own Dockerfile. Prebuilt images are also hosted on the Docker Hub. These commands should open an image that allows you run the baseline
+**To Run Existing Docker Image**
+`docker pull fairnle/challenge:dev`
+```docker run -it -v `pwd`:/home/aicrowd --gpus='all' fairnle/challenge:dev```
+**To Build Your Own Image**
+*Dev Image*  - runs with root user, doesn't copy all your files across into image
+`docker build -t competition --target nhc-dev .`
+*or Submission Image* - runs with aicrowd user, copies across all your files into image
+`docker build -t competition --target nhc-submit .`
+*Run Image*
+```docker run -it -v `pwd`:/home/aicrowd --gpus='all' competition```
+## Running The Baseline
+Once installed, in this directory run:
+`python polyhydra.py`
+To change parameters, edit `config.yaml`, or to override parameters
+from the command-line run:
+`python polyhydra.py embedding_dim=16`
+The training will save checkpoints to a new directory (`outputs`) and
+should the environments create any outputs, they will be saved to
+`nle_data` - (by default recordings of episodes are switched off to
+save space).
+The default polybeast runs on 2 GPUs, one for the learner and one for
+the actors. However, with only one GPU you can run still run
+polybeast - just override the `actor_device` argument:
+`python polyhydra.py actor_device=cpu`
+NOTE: if you get a "Too many open files" error, try: `ulimit -Sn 10000`.
+## Making a submission
+In the output directory of your trained model, you should find two files, `checkpoint.tar` and `config.yaml`. Add both of them to your submission repo. Then change the `MODEL_DIR` variable in `agents/torchbeast_agent.py` to point to the directory where these files are located. And finally, simply set the `AGENT` in `submission_config.py` to be 'TorchBeastAgent' so that your torchbeast agent variation is used for the submission.
+After that, follow [these instructions](/docs/SUBMISSION.md) to submit your model to AIcrowd!
+## Repo Structure
+```
+baselines/torchbeast
+├── core/
+├── models/                #  <- Models HERE
+├── util/
+├── config.yaml            #  <- Flags HERE
+├── polybeast_env.py       #  <- Training Env HERE
+├── polybeast_learner.py   #  <- Training Loop HERE
+└── polyhydra.py           #  <- main() HERE
+```
+The structure is simple, compartmentalising the environment setup,
+training loop and models in to different files. You can tweak any of
+these separately, and add parameters to the flags (which are passed
+around).
+## About the Model
+This model (`BaselineNet`) we provide is simple and all in
+`models/baseline.py`.
+* It encodes the dungeon into a fixed-size representation
+  (`GlyphEncoder`)
+* It encodes the topline message into a fixed-size representation
+  (`MessageEncoder`)
+* It encodes the bottom line statistics (eg armour class, health) into
+  a fixed-size representation (`BLStatsEncoder`)
+* It concatenates all these outputs into a fixed size, runs this
+  through a fully connected layer, and into an LSTM.
+* The outputs of the LSTM go through policy and baseline heads (since
+  this is an actor-critic alorithm)
+As you can see there is a lot of data to play with in this game, and
+plenty to try, both in modelling and in the learning algorithms used.
+## Improvement Ideas
+*Here are some ideas we haven't tried yet, but might be easy places to start. Happy tinkering!*
+### Model Improvements (`baseline.py`)
+* The model is currently not using the terminal observations
+  (`tty_chars`, `tty_colors`, `tty_cursor`), so it has no idea about
+  menus - could this we make use of this somehow?
+* The bottom-line stats are very informative, but very simply encoded
+  in `BLStatsEncoder` - is there a better way to do this?
+* The `GlyphEncoder` builds a embedding for the glyphs, and then takes
+  a crop of these centered around the player icon coordinates
+  (`@`). Should the crop be reusing these the same embedding matrix?
+* The current model constrains the vast action space to a smaller
+  subset of actions. Is it too constrained? Or not constrained enough?
+###  Environment Improvements (`polybeast_env.py`)
+* Opening menus (such as when spellcasting) do not advance the in game
+  timer. However, models can also get stuck in menus as you have to
+  learn what buttons to press to close the menu. Can changing the
+  penalty for not advancing the in-game timer improve the result?
+* The NetHackChallenge assesses the score on random character
+  assignments. Might it be easier to learn on just a few of these at
+  the beginning of training?
+### Algorithm/Optimisation Improvements (`polybeast_learner.py`)
+* Can we add some intrinsic rewards to help our agents learn?
+* Should we add penalties for disincentivise pathological behaviour we
+  observe?
+* Can we improve the model by using a different optimizer?
No results found