Commit a88bde13 authored by joseph_suarez's avatar joseph_suarez Committed by Siddhartha Laghuvarapu

Add links and clean up starter kit docs

rllib reqs

Fix python path in evaluator rollouts

Add neuralmmo files, fix rollot
parent 4c277cf4
......@@ -128,6 +128,6 @@ dmypy.json
.pyre/
#NeuralMMO repository
neural-mmo/forge/embyr
neural-mmo/docs
neural-mmo/resource
neuralmmo/resource/maps
'''Main file for the neural-mmo/projekt demo
/projeckt contains all necessary RLlib wrappers to train and
evaluate capable policies on Neural MMO as well as rendering,
logging, and visualization tools.
Associated docs and tutorials are hosted on jsuarez5341.github.io.'''
from pdb import set_trace as T
import numpy as np
from fire import Fire
import projekt
from neural_mmo.forge.blade.core import terrain
from neural_mmo.forge.trinity.scripted import baselines
from neural_mmo.forge.trinity.visualize import BokehServer
from neural_mmo.forge.trinity.evaluator import Evaluator
def createPolicies(config, mapPolicy):
'''Generate RLlib policies'''
obs = wrapper.observationSpace(config)
atns = wrapper.actionSpace(config)
policies = {}
for i in range(config.NPOLICIES):
params = {
"agent_id": i,
"obs_space_dict": obs,
"act_space_dict": atns}
key = mapPolicy(i)
policies[key] = (None, obs, atns, params)
return policies
def loadTrainer(config):
'''Create monolithic RLlib trainer object'''
torch.set_num_threads(1)
ray.init(local_mode=config.LOCAL_MODE)
#Register custom env
ray.tune.registry.register_env("Neural_MMO",
lambda config: wrapper.RLlibEnv(config))
#Create policies
rllib.models.ModelCatalog.register_custom_model('godsword', wrapper.RLlibPolicy)
mapPolicy = lambda agentID: 'policy_{}'.format(agentID % config.NPOLICIES)
policies = createPolicies(config, mapPolicy)
#Instantiate monolithic RLlib Trainer object.
return wrapper.SanePPOTrainer(config={
'num_workers': config.NUM_WORKERS,
'num_gpus_per_worker': config.NUM_GPUS_PER_WORKER,
'num_gpus': config.NUM_GPUS,
'num_envs_per_worker': 1,
'train_batch_size': config.TRAIN_BATCH_SIZE // 2,
'rollout_fragment_length': config.ROLLOUT_FRAGMENT_LENGTH,
'sgd_minibatch_size': config.SGD_MINIBATCH_SIZE,
'num_sgd_iter': config.NUM_SGD_ITER,
'framework': 'torch',
'horizon': np.inf,
'soft_horizon': False,
'no_done_at_end': False,
'callbacks': wrapper.RLlibLogCallbacks,
'env_config': {
'config': config
},
'multiagent': {
'policies': policies,
'policy_mapping_fn': mapPolicy,
'count_steps_by': 'env_steps'
},
'model': {
'custom_model': 'godsword',
'custom_model_config': {'config': config},
'max_seq_len': config.LSTM_BPTT_HORIZON
},
})
def loadEvaluator(config):
'''Create test/render evaluator'''
if config.SCRIPTED:
return Evaluator(config, getattr(baselines, config.SCRIPTED))
else:
return wrapper.RLlibEvaluator(config, loadModel(config))
def loadModel(config):
'''Load NN weights and optimizer state'''
trainer = loadTrainer(config)
utils.modelSize(trainer.defaultModel())
if config.LOAD:
trainer.restore()
return trainer
class Anvil():
'''Neural MMO CLI powered by Google Fire
Main file for the RLlib demo included with Neural MMO.
Usage:
python Forge.py <COMMAND> --config=<CONFIG> --ARG1=<ARG1> ...
The User API documents core env flags. Additional config options specific
to this demo are available in projekt/config.py.
The --config flag may be used to load an entire group of options at once.
The Debug, SmallMaps, and LargeMaps options are included in this demo with
the latter being the default -- or write your own in projekt/config.py
'''
def __init__(self, **kwargs):
if 'help' in kwargs:
kwargs.pop('help')
if 'config' in kwargs:
config = kwargs.pop('config')
config = getattr(projekt.config, config)()
else:
config = projekt.config.LargeMaps()
config.override(**kwargs)
self.config = config
if not config.SCRIPTED:
global torch, ray, rllib, wrapper, utils
from neural_mmo.forge.ethyr.torch import utils
import torch
import ray
from ray import rllib
from projekt import rllib_wrapper as wrapper
def train(self, **kwargs):
'''Train a model starting with the current value of --MODEL'''
loadModel(self.config).train()
def evaluate(self, **kwargs):
'''Evaluate a model on --EVAL_MAPS maps'''
self.config.EVALUATE = True
loadEvaluator(self.config).evaluate(self.config.GENERALIZE)
def render(self, **kwargs):
'''Start a WebSocket server that autoconnects to the 3D Unity client'''
self.config.RENDER = True
loadEvaluator(self.config).render()
def generate(self, **kwargs):
'''Generate game maps for the current --config setting'''
terrain.MapGenerator(self.config).generate()
def visualize(self, **kwargs):
'''Training/Evaluation results Web dashboard'''
BokehServer(self.config)
def main():
def Display(lines, out):
text = "\n".join(lines) + "\n"
out.write(text)
from fire import core
core.Display = Display
Fire(Anvil)
if __name__ == "__main__":
main()
......@@ -13,6 +13,10 @@ This repository contains:
- Information on **evaluating your agents locally**, **baselines** and some best practises to have hassle free submissions.
- **Starter code** for you to get started!
[IMPORTANT - Accept the rules before you submit](https://www.aicrowd.com/challenges/the-neural-mmo-challenge/challenge_rules)
# Table of contents
- [📚 Competition procedure](#-competition-procedure)
......@@ -55,13 +59,16 @@ Clone the starter kit repository and install the dependencies.
```bash
git clone https://gitlab.aicrowd.com/neural-mmo/neural-mmo-starter-kit
cd neural-mmo-starter-kit
pip install neural-mmo
#Optional: Install ML dependencies
pip install -U -r requirements.txt
```
Generate the Neural MMO environment maps.
```bash
neural-mmo-forge generate
python Forge.py generate --config=CompetitionRound1
```
# 🛠 Preparing your submission
......@@ -94,6 +101,8 @@ python evaluator/rollout.py
**File/Directory** | **Description**
--- | ---
[`projekt`](projekt) | Per-round environment configs and RLlib demo.
[`Forge.py`](Forge.py) | Main file for baselines and demos
[`agents`](agents) | Directory containing different scripted bots, baseline agent and bots performing random actions. We recommend that you add your agents to this directory.
[`config.py`](config.py) | File containing the configuration options for local evaluation. We will use the same player agent you specify here during the evaluation.
[`utils/submit.sh`](utils/submit.sh) | Helper script to submit your repository to [AIcrowd GitLab](https://gitlab.aicrowd.com).
......
from evaluator.base_agent import NeuralMMOAgent
from neural_mmo import projekt
import projekt
from neural_mmo.Forge import loadModel
from neuralmmo.Forge import loadModel
class NeuralBaselineAgent(NeuralMMOAgent):
def __init__(self):
self.agent_type = 'neural'
self.config = projekt.config.CompetitionRound1()
self.trainer = loadModel(self.config)
......
......@@ -7,7 +7,7 @@ from neural_mmo.forge.blade.io.action.static import Action
from flexdict import FlexDict
from collections import defaultdict
import gym
from neural_mmo import projekt
import projekt
def get_action_spaces():
config = projekt.config.SmallMaps()
......@@ -20,6 +20,7 @@ def get_action_spaces():
class RandomNeuralMMOAgent(NeuralMMOAgent):
def __init__(self):
self.agent_type = 'neural'
self.action_space = get_action_spaces()
def register_reset(self, observations):
......
from evaluator.base_agent import NeuralMMOAgent
from neural_mmo.forge.trinity.scripted import baselines
from neural_mmo import projekt
import projekt
class BaselineForageAgent(NeuralMMOAgent):
def __init__(self):
self.agent_type = 'scripted'
self.agent = getattr(baselines, 'Forage')(projekt.config.SmallMaps())
def register_reset(self, observations):
......@@ -16,6 +17,7 @@ class BaselineForageAgent(NeuralMMOAgent):
class BaselineCombatAgent(NeuralMMOAgent):
def __init__(self):
self.agent_type = 'scripted'
self.agent = getattr(baselines, 'Combat')(projekt.config.SmallMaps())
def register_reset(self, observations):
......@@ -28,6 +30,7 @@ class BaselineCombatAgent(NeuralMMOAgent):
class BaselineRandomAgent(NeuralMMOAgent):
def __init__(self):
self.agent_type = 'scripted'
self.agent = getattr(baselines, 'Random')(projekt.config.SmallMaps())
def register_reset(self, observations):
......
# Train an agent
## Train an agent
- If you want to change the config for the agent, check out [`neural-mmo/projekt/config.py`](https://github.com/jsuarez5341/neural-mmo/blob/master/neural_mmo/projekt/config.py).
- We recommend that you subclass `CompetitionRound1` for the first round.
- The [neural-mmo documentation](https://jsuarez5341.github.io/neural-mmo/build/html/rst/userguide.html) contains a ton of additional information about the environment and previous work upon it.
Training your first agent is super easy with this one line command. It might take a long time though, but you can try it out for a few epochs.
## Example Scripted Agents
We provide a [simple wrapper](https://jsuarez5341.github.io/neural-mmo/build/html/rst/forge.trinity.scripted.io.html) to allow you to extract information from the observations.
You can find various scripted baselines [here](https://github.com/jsuarez5341/neural-mmo/tree/master/forge/trinity/scripted).
Wrap them as shown in /agents for compatibility with our submission system.
```bash
python Forge.py train --config=SmallMultimodalSkills --LOAD=False
```
- If you want to change the config for the agent, check out `neural-mmo/projekt/config.py`
## Example Learned Agent
- We recommend you to subclass the `CompetitionRound1` class and add your config changes.
We provide a simple recurrent [baseline model](https://github.com/jsuarez5341/neural-mmo/blob/master/neural_mmo/forge/ethyr/torch/policy/baseline.py) and all associated [RLlib training code](https://github.com/jsuarez5341/neural-mmo/blob/master/neural_mmo/projekt/rllib_wrapper.py).
You can retrain the baseline agent as shown below. It takes a couple days to converge with a good gpu and 32 cores, but you can try it out for a few epochs.
```bash
neural-mmo-forge train --config=SmallMultimodalSkills --LOAD=False
```
- Lot more details about training and the environment are available in the [neural-mmo documentation](https://jsuarez5341.github.io/neural-mmo/build/html/rst/userguide.html)
from abc import ABC, abstractmethod
class NeuralMMOAgent(ABC):
agent_type = "scripted"
class NeuralMMOAgent():
def __init__(self):
self.agent_type = None
@abstractmethod
def register_reset(self, observations):
pass
raise NotImplementedError
@abstractmethod
def compute_action(self, observations, info):
pass
raise NotImplementedError
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parent.parent))
from neural_mmo.forge.ethyr.torch import utils
from neural_mmo.forge.trinity.env import Env
from neural_mmo.forge.blade.io.action import static as Action
from neural_mmo import projekt
from evaluator.helpers import load_agents
import neuralmmo.projekt as projekt
from evaluator.helpers import *
from config import LocalEvaluationConfig
import random
import copy
......@@ -31,7 +37,7 @@ def run_episode(player_index, agents, N_TIME_STEPS):
agent_entid_map = dict(zip(range(len(agents)), entids))
entid_agent_map = {x[1]: x[0] for x in agent_entid_map.items()}
for idx, agent in enumerate(agents):
if agent.type == "neural":
if agent.agent_type == "neural":
neural_agents.add(agent_entid_map[idx])
actions = {}
......@@ -55,6 +61,8 @@ def run_episode(player_index, agents, N_TIME_STEPS):
for entid in sorted(alive_agents):
if entid not in list(obs.keys()):
dead_agents.append(entid)
if entid in neural_agents:
neural_agents.remove(entid)
alive_agents = list(obs.keys())
actions = {}
......@@ -100,8 +108,9 @@ def print_statistics(player_statistics, episode):
if __name__ == "__main__":
player_agent, opponent_agents = load_agents(LocalEvaluationConfig)
player_agent = player_agent[0]
N_EPISODES = 10
N_TIME_STEPS = 102
N_TIME_STEPS = 10
for episode in range(N_EPISODES):
agents, player_index = assign_agents(player_agent, opponent_agents)
statistics = run_episode(player_index, agents, N_TIME_STEPS)
......
'''Main file for the neural-mmo/projekt demo
/projeckt contains all necessary RLlib wrappers to train and
evaluate capable policies on Neural MMO as well as rendering,
logging, and visualization tools.
Associated docs and tutorials are hosted on jsuarez5341.github.io.'''
from pdb import set_trace as T
import numpy as np
from fire import Fire
import projekt
from neural_mmo.forge.blade.core import terrain
from neural_mmo.forge.trinity.scripted import baselines
from neural_mmo.forge.trinity.visualize import BokehServer
from neural_mmo.forge.trinity.evaluator import Evaluator
def createPolicies(config, mapPolicy):
'''Generate RLlib policies'''
obs = wrapper.observationSpace(config)
atns = wrapper.actionSpace(config)
policies = {}
for i in range(config.NPOLICIES):
params = {
"agent_id": i,
"obs_space_dict": obs,
"act_space_dict": atns}
key = mapPolicy(i)
policies[key] = (None, obs, atns, params)
return policies
def loadTrainer(config):
'''Create monolithic RLlib trainer object'''
torch.set_num_threads(1)
ray.init(local_mode=config.LOCAL_MODE)
#Register custom env
ray.tune.registry.register_env("Neural_MMO",
lambda config: wrapper.RLlibEnv(config))
#Create policies
rllib.models.ModelCatalog.register_custom_model('godsword', wrapper.RLlibPolicy)
mapPolicy = lambda agentID: 'policy_{}'.format(agentID % config.NPOLICIES)
policies = createPolicies(config, mapPolicy)
#Instantiate monolithic RLlib Trainer object.
return wrapper.SanePPOTrainer(config={
'num_workers': config.NUM_WORKERS,
'num_gpus_per_worker': config.NUM_GPUS_PER_WORKER,
'num_gpus': config.NUM_GPUS,
'num_envs_per_worker': 1,
'train_batch_size': config.TRAIN_BATCH_SIZE // 2,
'rollout_fragment_length': config.ROLLOUT_FRAGMENT_LENGTH,
'sgd_minibatch_size': config.SGD_MINIBATCH_SIZE,
'num_sgd_iter': config.NUM_SGD_ITER,
'framework': 'torch',
'horizon': np.inf,
'soft_horizon': False,
'no_done_at_end': False,
'callbacks': wrapper.RLlibLogCallbacks,
'env_config': {
'config': config
},
'multiagent': {
'policies': policies,
'policy_mapping_fn': mapPolicy,
'count_steps_by': 'env_steps'
},
'model': {
'custom_model': 'godsword',
'custom_model_config': {'config': config},
'max_seq_len': config.LSTM_BPTT_HORIZON
},
})
def loadEvaluator(config):
'''Create test/render evaluator'''
if config.SCRIPTED:
return Evaluator(config, getattr(baselines, config.SCRIPTED))
else:
return wrapper.RLlibEvaluator(config, loadModel(config))
def loadModel(config):
'''Load NN weights and optimizer state'''
trainer = loadTrainer(config)
utils.modelSize(trainer.defaultModel())
if config.LOAD:
trainer.restore()
return trainer
class Anvil():
'''Neural MMO CLI powered by Google Fire
Main file for the RLlib demo included with Neural MMO.
Usage:
python Forge.py <COMMAND> --config=<CONFIG> --ARG1=<ARG1> ...
The User API documents core env flags. Additional config options specific
to this demo are available in projekt.
The --config flag may be used to load an entire group of options at once.
The Debug, SmallMaps, and LargeMaps options are included in this demo with
the latter being the default -- or write your own in projekt
'''
def __init__(self, **kwargs):
if 'help' in kwargs:
kwargs.pop('help')
if 'config' in kwargs:
config = kwargs.pop('config')
config = getattr(projekt.config, config)()
else:
config = projekt.config.LargeMaps()
config.override(**kwargs)
self.config = config
def imports(self):
'''conditional rl imports'''
global torch, ray, rllib, wrapper, utils
from neural_mmo.forge.ethyr.torch import utils
import torch
import ray
from ray import rllib
from projekt import rllib_wrapper as wrapper
def train(self, **kwargs):
'''Train a model starting with the current value of --MODEL'''
self.imports()
loadModel(self.config).train()
def evaluate(self, **kwargs):
'''Evaluate a model on --EVAL_MAPS maps'''
self.config.EVALUATE = True
if not self.config.SCRIPTED:
self.imports()
loadEvaluator(self.config).evaluate(self.config.GENERALIZE)
def render(self, **kwargs):
'''Start a WebSocket server that autoconnects to the 3D Unity client'''
self.config.RENDER = True
if not self.config.SCRIPTED:
self.imports()
loadEvaluator(self.config).render()
def generate(self, **kwargs):
'''Generate game maps for the current --config setting'''
terrain.MapGenerator(self.config).generate()
def visualize(self, **kwargs):
'''Training/Evaluation results Web dashboard'''
BokehServer(self.config)
def main():
def Display(lines, out):
text = "\n".join(lines) + "\n"
out.write(text)
from fire import core
core.Display = Display
Fire(Anvil)
if __name__ == "__main__":
main()
from pdb import set_trace as T
from neural_mmo.forge.blade import core
from neural_mmo.forge.blade.core import config
class RLlibConfig:
'''Base config for RLlib Models
Extends core Config, which contains environment, evaluation,
and non-RLlib-specific learning parameters'''
@property
def MODEL(self):
return self.__class__.__name__
#Hardware Scale
NUM_GPUS_PER_WORKER = 0
NUM_GPUS = 1
NUM_WORKERS = 1
LOCAL_MODE = False
LOAD = True
#Memory/Batch Scale
TRAIN_EPOCHS = 10000
LSTM_BPTT_HORIZON = 16
NUM_SGD_ITER = 1
#Model
SCRIPTED = None
N_AGENT_OBS = 100
NPOLICIES = 1
HIDDEN = 64
EMBED = 64
#Reward
TEAM_SPIRIT = 0.0
ACHIEVEMENT_SCALE = 1.0/15.0
class LargeMaps(core.Config, RLlibConfig, config.AllGameSystems):
'''Large scale Neural MMO training setting
Features up to 1000 concurrent agents and 1000 concurrent NPCs,
1km x 1km maps, and 5/10k timestep train/eval horizons
This is the default setting as of v1.5 and allows for large
scale multiagent research even on relatively modest hardware'''
#Memory/Batch Scale
NUM_WORKERS = 1 #16
TRAIN_BATCH_SIZE = 32 * NUM_WORKERS #Bug? This gets doubled
ROLLOUT_FRAGMENT_LENGTH = 32
SGD_MINIBATCH_SIZE = 256
#Horizon
TRAIN_HORIZON = 8192
EVALUATION_HORIZON = 8192
class SmallMaps(config.SmallMaps, RLlibConfig, config.AllGameSystems):
'''Small scale Neural MMO training setting
Features up to 128 concurrent agents and 32 concurrent NPCs,
60x60 maps (excluding the border), and 1000 timestep train/eval horizons.
This setting is modeled off of v1.1-v1.4 It is appropriate as a quick train
task for new ideas, a transfer target for agents trained on large maps,
or as a primary research target for PCG methods.'''
#Memory/Batch Scale
NUM_WORKERS = 6 #32
TRAIN_BATCH_SIZE = 256 * NUM_WORKERS #Bug? This gets doubled
ROLLOUT_FRAGMENT_LENGTH = 256
SGD_MINIBATCH_SIZE = min(256, TRAIN_BATCH_SIZE)
#Horizon
TRAIN_HORIZON = 1024
EVALUATION_HORIZON = 1024
class Debug(SmallMaps, config.AllGameSystems):
'''Debug Neural MMO training setting
A version of the SmallMap setting with greatly reduced batch parameters.
Only intended as a tool for identifying bugs in the model or environment'''
LOAD = False
LOCAL_MODE = True
NUM_WORKERS = 1
SGD_MINIBATCH_SIZE = 100
TRAIN_BATCH_SIZE = 400
TRAIN_HORIZON = 200
EVALUATION_HORIZON = 50
HIDDEN = 2
EMBED = 2
### AICrowd competition settings
class Competition(config.AllGameSystems, config.Achievement): pass
class CompetitionRound1(SmallMaps, Competition):
@property
def SPAWN(self):
return self.SPAWN_CONCURRENT
NENT = 128
NPOP = 1
class CompetitionRound2(SmallMaps, Competition):
@property
def SPAWN(self):
return self.SPAWN_CONCURRENT
NENT = 128
NPOP = 16
COOPERATIVE = True
class CompetitionRound3(LargeMaps, Competition):
@property
def SPAWN(self):
return self.SPAWN_CONCURRENT
NENT = 1024
NPOP = 32
COOPERATIVE = True