Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • yoon_jaeseok/neurips-2021-the-nethack-challenge
  • gjuceviciute/neurips-2021-the-nethack-challenge
  • bagcangman/neurips-2021-the-nethack-challenge
  • leocd/neurips-2021-the-nethack-challenge
  • froot_joos21/neurips-2021-the-nethack-challenge
  • clint_herron/neurips-2021-the-nethack-challenge
  • debjoy_saha/neurips-2021-the-nethack-challenge
  • tinys/neurips-2021-the-nethack-challenge
  • daan/neurips-2021-the-nethack-challenge
  • matthew_zellman/neurips-2021-the-nethack-challenge
  • christophe_cerisara/neurips-2021-the-nethack-challenge
  • tl_boright/neurips-2021-the-nethack-challenge
  • kvzhao/neurips-2021-the-nethack-challenge
  • nethack/neurips-2021-the-nethack-challenge
14 results
Show changes
Commits on Source (107)
Showing
with 757 additions and 355 deletions
*.wav filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
submission filter=lfs diff=lfs merge=lfs -text
submission/* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
......@@ -130,3 +130,5 @@ dmypy.json
# Pyre type checker
.pyre/
nle_data/
test_batched_env.py
FROM nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04
FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 AS nhc-dev
ENV DEBIAN_FRONTEND=noninteractive
COPY apt.txt /tmp/apt.txt
......@@ -29,21 +28,32 @@ RUN adduser --disabled-password \
--uid ${HOST_UID} \
${USER_NAME}
USER ${USER_NAME}
WORKDIR ${HOME_DIR}
ENV CONDA_DIR ${HOME_DIR}/.conda
WORKDIR /opt/
# Install anaconda
ENV CONDA_DIR /opt/conda
RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_4.9.2-Linux-x86_64.sh \
&& bash miniconda.sh -b -p ${CONDA_DIR} \
&& . ${CONDA_DIR}/etc/profile.d/conda.sh \
&& conda clean -y -a \
&& rm -rf miniconda.sh
ENV PATH ${CONDA_DIR}/bin:${PATH}
RUN conda install cmake -y && conda clean -y -a
# Install TorchBeast
RUN conda install cmake cudatoolkit=10.2 pytorch -y -c pytorch -c nvidia && conda clean -y -a
RUN git clone https://github.com/facebookresearch/torchbeast.git --recursive
WORKDIR /opt/torchbeast
RUN pip install -r requirements.txt
RUN pip install ./nest
RUN python setup.py install
# Install AICrowd items
WORKDIR ${HOME_DIR}
COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt
RUN pip install -r requirements.txt --no-cache-dir
COPY --chown=1001:1001 . ${HOME_DIR}
\ No newline at end of file
# SUBMISSION IMAGE - change user and copy files
FROM nhc-dev AS nhc-submit
COPY --chown=1001:1001 . ${HOME_DIR}
USER ${USER_NAME}
This diff is collapsed.
from abc import ABC, abstractmethod
class BatchedAgent(ABC):
"""
This is an abstract base clase for you to load your models and perform rollouts on a
batched set of environments.
"""
def __init__(self, num_envs: int , num_actions: int):
self.num_envs = num_envs
self.num_actions = num_actions
@abstractmethod
def batched_step(self, observations, rewards, dones, infos):
"""
Perform a batched step on lists of environment outputs.
:param observations: a list of observations
:param rewards: a list of rewards
:param dones: a list of dones
:param observations: a list of infos
returns: an iterable of actions
"""
pass
import numpy as np
from agents.base import BatchedAgent
class CustomAgent(BatchedAgent):
"""A example agent... that simple acts randomly. Adapt to your needs!"""
def __init__(self, num_envs, num_actions):
"""Set up and load you model here"""
super().__init__(num_envs, num_actions)
self.seeded_state = np.random.RandomState(42)
def batched_step(self, observations, rewards, dones, infos):
"""
Perform a batched step on lists of environment outputs.
Each argument is a list of the respective gym output.
Returns an iterable of actions.
"""
actions = self.seeded_state.randint(self.num_actions, size=self.num_envs)
return actions
import torch
import numpy as np
from agents.base import BatchedAgent
from nethack_baselines.torchbeast.models import load_model
MODEL_DIR = "./saved_models/torchbeast/pretrained_0.5B"
class TorchBeastAgent(BatchedAgent):
"""
A BatchedAgent using the TorchBeast Model
"""
def __init__(self, num_envs, num_actions):
super().__init__(num_envs, num_actions)
self.model_dir = MODEL_DIR
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
self.model = load_model(MODEL_DIR, self.device)
print(f'Using Model In: {self.model_dir}, Device: {self.device}')
self.core_state = [
m.to(self.device) for m in self.model.initial_state(batch_size=num_envs)
]
def batch_inputs(self, observations, dones):
"""
Convert lists of observations, rewards, dones, infos to tensors for TorchBeast.
TorchBeast models:
* take tensors in the form: [T, B, ...]: B:= batch, T:= unroll (=1)
* take "done" as a BOOLEAN observation
"""
states = list(observations[0].keys())
obs = {k: [] for k in states}
# Unpack List[Dicts] -> Dict[Lists]
for o in observations:
for k, t in o.items():
obs[k].append(t)
# Convert to Tensor, Add Unroll Dim (=1), Move to GPU
for k in states:
obs[k] = torch.Tensor(np.stack(obs[k])[None, ...]).to(self.device)
obs["done"] = torch.Tensor(np.array(dones)[None, ...]).bool().to(self.device)
return obs, dones
def batched_step(self, observations, rewards, dones, infos):
"""
Perform a batched step on lists of environment outputs.
Torchbeast models:
* take the core (LSTM) state as input, and return as output
* return outputs as a dict of "action", "policy_logits", "baseline"
"""
observations, dones = self.batch_inputs(observations, dones)
with torch.no_grad():
outputs, self.core_state = self.model(observations, self.core_state)
return outputs["action"].cpu().numpy()[0]
{
"challenge_id": "neurips-2021-nethack-challenge",
"challenge_id": "neurips-2021-the-nethack-challenge",
"authors": [
"aicrowd-bot"
],
"external_dataset_used": false
"description": "(optional) description about your awesome agent",
"gpu": true
}
......@@ -14,4 +14,6 @@ xvfb
ffmpeg
freeglut3-dev
gcc
g++
\ No newline at end of file
g++
ninja-build
vim
......@@ -15,6 +15,12 @@ You can modify the existing `rollout.py` OR copy it (to say `your_code.py`) and
Before you submit make sure that you have saved your models, which are needed by your inference code.
In case your files are larger in size you can use `git-lfs` to upload them. More details [here](https://discourse.aicrowd.com/t/how-to-upload-large-files-size-to-your-submission/2304).
## Testing your model locally
The best way to test your model is to run your submission locally.
You can do this naively by simply running `python rollout.py` or you can simulate the extra timeout wrappers that AIcrowd will implement by using `python test_submission.py`.
## How to submit a trained model!
To make a submission, you will have to create a **private** repository on [https://gitlab.aicrowd.com/](https://gitlab.aicrowd.com/).
......
from gym.envs.registration import register
register('NetHackChallengeBatched-v0',
entry_point='nle_batched_env.NetHackChallengeBatchedEnv')
import aicrowd_gym
import numpy as np
from collections.abc import Iterable
class BatchedEnv:
def __init__(self, env_make_fn, num_envs=32):
"""
Creates multiple copies of the environment with the same env_make_fn function
"""
self.num_envs = num_envs
self.envs = [env_make_fn() for _ in range(self.num_envs)]
self.num_actions = self.envs[0].action_space.n
def batch_step(self, actions):
"""
Applies each action to each env in the same order as self.envs
Actions should be iterable and have the same length as self.envs
Returns lists of obsevations, rewards, dones, infos
"""
assert isinstance(
actions, Iterable), f"actions with type {type(actions)} is not iterable"
assert len(
actions) == self.num_envs, f"actions has length {len(actions)} which different from num_envs"
observations, rewards, dones, infos = [], [], [], []
for env, a in zip(self.envs, actions):
observation, reward, done, info = env.step(a)
if done:
observation = env.reset()
observations.append(observation)
rewards.append(reward)
dones.append(done)
infos.append(info)
return observations, rewards, dones, infos
def batch_reset(self):
"""
Resets all the environments in self.envs
"""
observation = [env.reset() for env in self.envs]
return observation
if __name__ == '__main__':
num_envs = 4
batched_env = BatchedEnv(
env_make_fn=lambda:aicrowd_gym.make('NetHackChallenge-v0'),
num_envs=4
)
observations = batched_env.batch_reset()
num_actions = batched_env.envs[0].action_space.n
for _ in range(50):
actions = np.random.randint(num_actions, size=num_envs)
observations, rewards, dones, infos = batched_env.batch_step(actions)
for done_idx in np.where(dones)[0]:
observations[done_idx] = batched_env.single_env_reset(done_idx)
import aicrowd_gym
import nle
from gym.wrappers import TimeLimit
def create_env():
"""This is the environment that will be assessed by AIcrowd."""
return aicrowd_gym.make("NetHackChallenge-v0")
def addtimelimitwrapper_fn():
"""
An example of how to add wrappers to the nethack_make_fn
Should return a gym env which wraps the nethack gym env
"""
env = create_env()
env = TimeLimit(env, max_episode_steps=10_000_000)
return env
\ No newline at end of file
#!/usr/bin/env python
import aicrowd_api
import os
########################################################################
# Instatiate Event Notifier
########################################################################
aicrowd_events = aicrowd_api.events.AIcrowdEvents()
def execution_start():
########################################################################
# Register Evaluation Start event
########################################################################
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_INFO,
message="execution_started",
payload={
"event_type": "airborne_detection:execution_started"
}
)
def execution_running():
########################################################################
# Register Evaluation Start event
########################################################################
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_INFO,
message="execution_progress",
payload={
"event_type": "airborne_detection:execution_progress",
"progress": 0.0
}
)
def execution_progress(progress):
########################################################################
# Register Evaluation Progress event
########################################################################
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_INFO,
message="execution_progress",
payload={
"event_type": "airborne_detection:execution_progress",
"progress" : progress
}
)
def execution_success():
########################################################################
# Register Evaluation Complete event
########################################################################
predictions_output_path = os.getenv("PREDICTIONS_OUTPUT_PATH", False)
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_SUCCESS,
message="execution_success",
payload={
"event_type": "airborne_detection:execution_success",
"predictions_output_path" : predictions_output_path
},
blocking=True
)
def execution_error(error):
########################################################################
# Register Evaluation Complete event
########################################################################
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_ERROR,
message="execution_error",
payload={ #Arbitrary Payload
"event_type": "airborne_detection:execution_error",
"error" : error
},
blocking=True
)
def is_grading():
return os.getenv("AICROWD_IS_GRADING", False)
######################################################################################
### This is a read-only file to allow participants to run their code locally. ###
### It will be over-writter during the evaluation, Please do not make any changes ###
### to this file. ###
######################################################################################
import traceback
import os
import signal
from contextlib import contextmanager
from os import listdir
from os.path import isfile, join
import soundfile as sf
import numpy as np
from evaluator import aicrowd_helpers
class TimeoutException(Exception): pass
@contextmanager
def time_limit(seconds):
def signal_handler(signum, frame):
raise TimeoutException("Prediction timed out!")
signal.signal(signal.SIGALRM, signal_handler)
signal.alarm(seconds)
try:
yield
finally:
signal.alarm(0)
class MusicDemixingPredictor:
def __init__(self):
self.test_data_path = os.getenv("TEST_DATASET_PATH", os.getcwd() + "/data/test/")
self.results_data_path = os.getenv("RESULTS_DATASET_PATH", os.getcwd() + "/data/results/")
self.inference_setup_timeout = int(os.getenv("INFERENCE_SETUP_TIMEOUT_SECONDS", "900"))
self.inference_per_music_timeout = int(os.getenv("INFERENCE_PER_MUSIC_TIMEOUT_SECONDS", "240"))
self.partial_run = os.getenv("PARTIAL_RUN_MUSIC_NAMES", None)
self.results = []
self.current_music_name = None
def get_all_music_names(self):
valid_music_names = None
if self.partial_run:
valid_music_names = self.partial_run.split(',')
music_names = []
for folder in listdir(self.test_data_path):
if not isfile(join(self.test_data_path, folder)):
if valid_music_names is None or folder in valid_music_names:
music_names.append(folder)
return music_names
def get_music_folder_location(self, music_name):
return join(self.test_data_path, music_name)
def get_music_file_location(self, music_name, instrument=None):
if instrument is None:
instrument = "mixture"
return join(self.test_data_path, music_name, instrument + ".wav")
if not os.path.exists(self.results_data_path):
os.makedirs(self.results_data_path)
if not os.path.exists(join(self.results_data_path, music_name)):
os.makedirs(join(self.results_data_path, music_name))
return join(self.results_data_path, music_name, instrument + ".wav")
def scoring(self):
"""
Add scoring function in the starter kit for participant's reference
"""
def sdr(references, estimates):
# compute SDR for one song
delta = 1e-7 # avoid numerical errors
num = np.sum(np.square(references), axis=(1, 2))
den = np.sum(np.square(references - estimates), axis=(1, 2))
num += delta
den += delta
return 10 * np.log10(num / den)
music_names = self.get_all_music_names()
instruments = ["bass", "drums", "other", "vocals"]
scores = {}
for music_name in music_names:
print("Evaluating for: %s" % music_name)
scores[music_name] = {}
references = []
estimates = []
for instrument in instruments:
reference_file = join(self.test_data_path, music_name, instrument + ".wav")
estimate_file = self.get_music_file_location(music_name, instrument)
reference, _ = sf.read(reference_file)
estimate, _ = sf.read(estimate_file)
references.append(reference)
estimates.append(estimate)
references = np.stack(references)
estimates = np.stack(estimates)
references = references.astype(np.float32)
estimates = estimates.astype(np.float32)
song_score = sdr(references, estimates).tolist()
scores[music_name]["sdr_bass"] = song_score[0]
scores[music_name]["sdr_drums"] = song_score[1]
scores[music_name]["sdr_other"] = song_score[2]
scores[music_name]["sdr_vocals"] = song_score[3]
scores[music_name]["sdr"] = np.mean(song_score)
return scores
def evaluation(self):
"""
Admin function: Runs the whole evaluation
"""
aicrowd_helpers.execution_start()
try:
with time_limit(self.inference_setup_timeout):
self.prediction_setup()
except NotImplementedError:
print("prediction_setup doesn't exist for this run, skipping...")
aicrowd_helpers.execution_running()
music_names = self.get_all_music_names()
for music_name in music_names:
with time_limit(self.inference_per_music_timeout):
self.prediction(mixture_file_path=self.get_music_file_location(music_name),
bass_file_path=self.get_music_file_location(music_name, "bass"),
drums_file_path=self.get_music_file_location(music_name, "drums"),
other_file_path=self.get_music_file_location(music_name, "other"),
vocals_file_path=self.get_music_file_location(music_name, "vocals"),
)
if not self.verify_results(music_name):
raise Exception("verification failed, demixed files not found.")
aicrowd_helpers.execution_success()
def run(self):
try:
self.evaluation()
except Exception as e:
error = traceback.format_exc()
print(error)
aicrowd_helpers.execution_error(error)
if not aicrowd_helpers.is_grading():
raise e
def prediction_setup(self):
"""
You can do any preprocessing required for your codebase here :
like loading your models into memory, etc.
"""
raise NotImplementedError
def prediction(self, music_name, mixture_file_path, bass_file_path, drums_file_path, other_file_path,
vocals_file_path):
"""
This function will be called for all the flight during the evaluation.
NOTE: In case you want to load your model, please do so in `inference_setup` function.
"""
raise NotImplementedError
def verify_results(self, music_name):
"""
This function will be called to check all the files exist and other verification needed.
(like length of the wav files)
"""
valid = True
valid = valid and os.path.isfile(self.get_music_file_location(music_name, "vocals"))
valid = valid and os.path.isfile(self.get_music_file_location(music_name, "bass"))
valid = valid and os.path.isfile(self.get_music_file_location(music_name, "drums"))
valid = valid and os.path.isfile(self.get_music_file_location(music_name, "other"))
return valid
# This is intended as an example of a barebones submission
# Do not that not using BatchedEnv not meet the timeout requirement.
import aicrowd_gym
import nle
def main():
"""
This function will be called for training phase.
"""
# This allows us to limit the features of the environment
# that we don't want participants to use during the submission
env = aicrowd_gym.make("NetHackChallenge-v0")
env.reset()
done = False
episode_count = 0
while episode_count < 200:
_, _, done, _ = env.step(env.action_space.sample())
if done:
episode_count += 1
print(episode_count)
env.reset()
if __name__ == "__main__":
main()
Placeholder
# TorchBeast NetHackChallenge Benchmark
This is a baseline model for the NetHack Challenge based on
[TorchBeast](https://github.com/facebookresearch/torchbeast) - FAIR's
implementation of IMPALA for PyTorch.
It comes with all the code you need to train, run and submit a model
that is based on the results published in the original NLE paper.
This implementation can run with 2 GPUS (one for acting and one for
learning), and runs many simultaneous environments with dynamic
batching. Currently it has been configured to run with only 1 GPU.
## Installation
**[Native Installation]**
To get this running you'll need to follow the TorchBeast installation instructions for PolyBeast from the [TorchBeast repo](https://github.com/facebookresearch/torchbeast#faster-version-polybeast).
**[Docker Installation]**
You can fast track the installation of PolyBeast, by running the competitions own Dockerfile. Prebuilt images are also hosted on the Docker Hub. These commands should open an image that allows you run the baseline
**To Run Existing Docker Image**
`docker pull fairnle/challenge:dev`
```docker run -it -v `pwd`:/home/aicrowd --gpus='all' fairnle/challenge:dev```
**To Build Your Own Image**
*Dev Image* - runs with root user, doesn't copy all your files across into image
`docker build -t competition --target nhc-dev .`
*or Submission Image* - runs with aicrowd user, copies across all your files into image
`docker build -t competition --target nhc-submit .`
*Run Image*
```docker run -it -v `pwd`:/home/aicrowd --gpus='all' competition```
## Running The Baseline
Once installed, in this directory run:
`python polyhydra.py`
To change parameters, edit `config.yaml`, or to override parameters
from the command-line run:
`python polyhydra.py embedding_dim=16`
The training will save checkpoints to a new directory (`outputs`) and
should the environments create any outputs, they will be saved to
`nle_data` - (by default recordings of episodes are switched off to
save space).
The default polybeast runs on 2 GPUs, one for the learner and one for
the actors. However, with only one GPU you can run still run
polybeast - just override the `actor_device` argument:
`python polyhydra.py actor_device=cpu`
NOTE: if you get a "Too many open files" error, try: `ulimit -Sn 10000`.
## Making a submission
In the output directory of your trained model, you should find two files, `checkpoint.tar` and `config.yaml`. Add both of them to your submission repo. Then change the `MODEL_DIR` variable in `agents/torchbeast_agent.py` to point to the directory where these files are located. And finally, simply set the `AGENT` in `submission_config.py` to be 'TorchBeastAgent' so that your torchbeast agent variation is used for the submission.
After that, follow [these instructions](/docs/SUBMISSION.md) to submit your model to AIcrowd!
## Repo Structure
```
baselines/torchbeast
├── core/
├── models/ # <- Models HERE
├── util/
├── config.yaml # <- Flags HERE
├── polybeast_env.py # <- Training Env HERE
├── polybeast_learner.py # <- Training Loop HERE
└── polyhydra.py # <- main() HERE
```
The structure is simple, compartmentalising the environment setup,
training loop and models in to different files. You can tweak any of
these separately, and add parameters to the flags (which are passed
around).
## About the Model
This model (`BaselineNet`) we provide is simple and all in
`models/baseline.py`.
* It encodes the dungeon into a fixed-size representation
(`GlyphEncoder`)
* It encodes the topline message into a fixed-size representation
(`MessageEncoder`)
* It encodes the bottom line statistics (eg armour class, health) into
a fixed-size representation (`BLStatsEncoder`)
* It concatenates all these outputs into a fixed size, runs this
through a fully connected layer, and into an LSTM.
* The outputs of the LSTM go through policy and baseline heads (since
this is an actor-critic alorithm)
As you can see there is a lot of data to play with in this game, and
plenty to try, both in modelling and in the learning algorithms used.
## Improvement Ideas
*Here are some ideas we haven't tried yet, but might be easy places to start. Happy tinkering!*
### Model Improvements (`baseline.py`)
* The model is currently not using the terminal observations
(`tty_chars`, `tty_colors`, `tty_cursor`), so it has no idea about
menus - could this we make use of this somehow?
* The bottom-line stats are very informative, but very simply encoded
in `BLStatsEncoder` - is there a better way to do this?
* The `GlyphEncoder` builds a embedding for the glyphs, and then takes
a crop of these centered around the player icon coordinates
(`@`). Should the crop be reusing these the same embedding matrix?
* The current model constrains the vast action space to a smaller
subset of actions. Is it too constrained? Or not constrained enough?
### Environment Improvements (`polybeast_env.py`)
* Opening menus (such as when spellcasting) do not advance the in game
timer. However, models can also get stuck in menus as you have to
learn what buttons to press to close the menu. Can changing the
penalty for not advancing the in-game timer improve the result?
* The NetHackChallenge assesses the score on random character
assignments. Might it be easier to learn on just a few of these at
the beginning of training?
### Algorithm/Optimisation Improvements (`polybeast_learner.py`)
* Can we add some intrinsic rewards to help our agents learn?
* Should we add penalties for disincentivise pathological behaviour we
observe?
* Can we improve the model by using a different optimizer?
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
defaults:
- hydra/job_logging: colorlog
- hydra/hydra_logging: colorlog
# - hydra/launcher: submitit_slurm
# # To Be Used With hydra submitit_slurm if you have SLURM cluster
# # pip install hydra-core hydra_colorlog
# # can set these on the commandline too, e.g. `hydra.launcher.partition=dev`
# hydra:
# launcher:
# timeout_min: 4300
# cpus_per_task: 20
# gpus_per_node: 2
# tasks_per_node: 1
# mem_gb: 20
# nodes: 1
# partition: dev
# comment: null
# max_num_timeout: 5 # will requeue on timeout or preemption
name: null # can use this to have multiple runs with same params, eg name=1,2,3,4,5
## WANDB settings
wandb: false # Enable wandb logging.
project: nethack_challenge # The wandb project name.
entity: user1 # The wandb user to log to.
group: group1 # The wandb group for the run.
# POLYBEAST ENV settings
mock: false # Use mock environment instead of NetHack.
single_ttyrec: true # Record ttyrec only for actor 0.
num_seeds: 0 # If larger than 0, samples fixed number of environment seeds to be used.'
write_profiler_trace: false # Collect and write a profiler trace for chrome://tracing/.
fn_penalty_step: constant # Function to accumulate penalty.
penalty_time: 0.0 # Penalty per time step in the episode.
penalty_step: -0.01 # Penalty per step in the episode.
reward_lose: 0 # Reward for losing (dying before finding the staircase).
reward_win: 100 # Reward for winning (finding the staircase).
state_counter: none # Method for counting state visits. Default none.
character: 'mon-hum-neu-mal' # Specification of the NetHack character.
## typical characters we use
# 'mon-hum-neu-mal'
# 'val-dwa-law-fem'
# 'wiz-elf-cha-mal'
# 'tou-hum-neu-fem'
# '@' # random (used in Challenge assessment)
# RUN settings.
mode: train # Training or test mode.
env: challenge # Name of Gym environment to create.
# # env (task) names: challenge, staircase, pet,
# eat, gold, score, scout, oracle
# TRAINING settings.
num_actors: 256 # Number of actors.
total_steps: 1e9 # Total environment steps to train for. Will be cast to int.
batch_size: 32 # Learner batch size.
unroll_length: 80 # The unroll length (time dimension).
num_learner_threads: 1 # Number learner threads.
num_inference_threads: 1 # Number inference threads.
disable_cuda: false # Disable CUDA.
learner_device: cuda:0 # Set learner device.
actor_device: cuda:0 # Set actor device.
# OPTIMIZER settings. (RMS Prop)
learning_rate: 0.0002 # Learning rate.
grad_norm_clipping: 40 # Global gradient norm clip.
alpha: 0.99 # RMSProp smoothing constant.
momentum: 0 # RMSProp momentum.
epsilon: 0.000001 # RMSProp epsilon.
# LOSS settings.
entropy_cost: 0.001 # Entropy cost/multiplier.
baseline_cost: 0.5 # Baseline cost/multiplier.
discounting: 0.999 # Discounting factor.
normalize_reward: true # Normalizes reward by dividing by running stdev from mean.
# MODEL settings.
model: baseline # Name of model to build (see models/__init__.py).
use_lstm: true # Use LSTM in agent model.
hidden_dim: 256 # Size of hidden representations.
embedding_dim: 64 # Size of glyph embeddings.
layers: 5 # Number of ConvNet Layers for Glyph Model
crop_dim: 9 # Size of crop (c x c)
use_index_select: true # Whether to use index_select instead of embedding lookup (for speed reasons).
restrict_action_space: True # Use a restricted ACTION SPACE (only nethack.USEFUL_ACTIONS)
msg:
hidden_dim: 64 # Hidden dimension for message encoder.
embedding_dim: 32 # Embedding dimension for characters in message encoder.
# TEST settings.
load_dir: null # Path to load a model from for testing