Changes for v1.5-wandb

parent bd283a48
'''Main file for the neural-mmo/projekt demo
/projeckt contains all necessary RLlib wrappers to train and
evaluate capable policies on Neural MMO as well as rendering,
logging, and visualization tools.
Associated docs and tutorials are hosted on jsuarez5341.github.io.'''
from pdb import set_trace as T
from fire import Fire
from copy import deepcopy
import os
import numpy as np
import torch
from fire import Fire
import ray
from ray import rllib, tune
from ray.tune import CLIReporter
from ray.tune.integration.wandb import WandbLoggerCallback
from projekt import rllib_wrapper as wrapper
import projekt
from neural_mmo.forge.blade.core import terrain
from projekt import config as base_config
from neural_mmo.forge.trinity.scripted import baselines
from neural_mmo.forge.trinity.visualize import BokehServer
from neural_mmo.forge.trinity.evaluator import Evaluator
from neural_mmo.forge.blade.io.action.static import Action
from neural_mmo.forge.ethyr.torch import utils
class ConsoleLog(CLIReporter):
def report(self, trials, done, *sys_info):
os.system('cls' if os.name == 'nt' else 'clear')
super().report(trials, done, *sys_info)
def createPolicies(config, mapPolicy):
'''Generate RLlib policies'''
obs = wrapper.observationSpace(config)
atns = wrapper.actionSpace(config)
policies = {}
def run_tune_experiment(config):
'''Ray[RLlib, Tune] integration for Neural MMO
Setup custom environment, observations/actions, policies,
and parallel training/evaluation'''
ray.init(local_mode=config.LOCAL_MODE)
#Obs and actions
obs = wrapper.observationSpace(config)
atns = wrapper.actionSpace(config)
#Register custom env and policies
ray.tune.registry.register_env("Neural_MMO",
lambda config: wrapper.RLlibEnv(config))
rllib.models.ModelCatalog.register_custom_model(
'godsword', wrapper.RLlibPolicy)
mapPolicy = lambda agentID : 'policy_{}'.format(
agentID % config.NPOLICIES)
policies = {}
for i in range(config.NPOLICIES):
params = {
"agent_id": i,
......@@ -33,32 +58,18 @@ def createPolicies(config, mapPolicy):
key = mapPolicy(i)
policies[key] = (None, obs, atns, params)
return policies
def loadTrainer(config):
'''Create monolithic RLlib trainer object'''
torch.set_num_threads(1)
ray.init(local_mode=config.LOCAL_MODE,
_memory=2000 * 1024 * 1024,
object_store_memory=200 * 1024 * 1024,
)
#Register custom env
ray.tune.registry.register_env("Neural_MMO",
lambda config: wrapper.RLlibEnv(config))
#Evaluation config
eval_config = deepcopy(config)
eval_config.EVALUATE = True
eval_config.AGENTS = eval_config.EVAL_AGENTS
#Create policies
rllib.models.ModelCatalog.register_custom_model('godsword', wrapper.RLlibPolicy)
mapPolicy = lambda agentID: 'policy_{}'.format(agentID % config.NPOLICIES)
policies = createPolicies(config, mapPolicy)
#Instantiate monolithic RLlib Trainer object.
return wrapper.SanePPOTrainer(config={
#Create rllib config
rllib_config={
'num_workers': config.NUM_WORKERS,
'num_gpus_per_worker': config.NUM_GPUS_PER_WORKER,
'num_gpus': config.NUM_GPUS,
'num_envs_per_worker': 1,
'train_batch_size': config.TRAIN_BATCH_SIZE // 2,
'train_batch_size': config.TRAIN_BATCH_SIZE,
'rollout_fragment_length': config.ROLLOUT_FRAGMENT_LENGTH,
'sgd_minibatch_size': config.SGD_MINIBATCH_SIZE,
'num_sgd_iter': config.NUM_SGD_ITER,
......@@ -66,111 +77,110 @@ def loadTrainer(config):
'horizon': np.inf,
'soft_horizon': False,
'no_done_at_end': False,
'callbacks': wrapper.RLlibLogCallbacks,
'env': 'Neural_MMO',
'env_config': {
'config': config
},
'evaluation_config': {
'env_config': {
'config': eval_config
},
},
'multiagent': {
'policies': policies,
'policy_mapping_fn': mapPolicy,
'count_steps_by': 'env_steps'
'count_steps_by': 'agent_steps'
},
'model': {
'custom_model': 'godsword',
'custom_model_config': {'config': config},
'max_seq_len': config.LSTM_BPTT_HORIZON
},
})
def loadEvaluator(config):
'''Create test/render evaluator'''
if config.SCRIPTED:
return Evaluator(config, getattr(baselines, config.SCRIPTED))
else:
return wrapper.RLlibEvaluator(config, loadModel(config))
def loadModel(config):
'''Load NN weights and optimizer state'''
imports()
trainer = loadTrainer(config)
utils.modelSize(trainer.defaultModel())
if config.LOAD:
trainer.restore()
return trainer
def imports():
'''conditional rl imports'''
global torch, ray, rllib, wrapper, utils
from neural_mmo.forge.ethyr.torch import utils
import torch
import ray
from ray import rllib
from projekt import rllib_wrapper as wrapper
'render_env': config.RENDER,
'callbacks': wrapper.RLlibLogCallbacks,
'evaluation_interval': config.EVALUATION_INTERVAL,
'evaluation_num_episodes': config.EVALUATION_NUM_EPISODES,
'evaluation_num_workers': config.EVALUATION_NUM_WORKERS,
'evaluation_parallel_to_training': config.EVALUATION_PARALLEL,
}
tune.run(wrapper.RLlibTrainer,
config = rllib_config,
name = config.__class__.__name__,
verbose = config.LOG_LEVEL,
stop = {'training_iteration': config.TRAINING_ITERATIONS},
resume = config.RESUME,
restore= config.RESTORE,
local_dir = 'experiments',
keep_checkpoints_num = config.KEEP_CHECKPOINTS_NUM,
checkpoint_freq = config.CHECKPOINT_FREQ,
checkpoint_at_end = True,
trial_dirname_creator = lambda _: 'Run',
progress_reporter = ConsoleLog(),
reuse_actors = True,
callbacks=[WandbLoggerCallback(
project = 'NeuralMMO',
api_key_file = 'wandb_api_key',
log_config = False)],
)
class Anvil():
'''Neural MMO CLI powered by Google Fire
Main file for the RLlib demo included with Neural MMO.
Usage:
python Forge.py <COMMAND> --config=<CONFIG> --ARG1=<ARG1> ...
The User API documents core env flags. Additional config options specific
to this demo are available in projekt.
to this demo are available in projekt/config.py.
The --config flag may be used to load an entire group of options at once.
The Debug, SmallMaps, and LargeMaps options are included in this demo with
the latter being the default -- or write your own in projekt
Select one of the defaults from projekt/config.py or write your own.
'''
def __init__(self, **kwargs):
if 'help' in kwargs:
kwargs.pop('help')
if 'config' in kwargs:
config = kwargs.pop('config')
config = getattr(projekt.config, config)()
else:
config = projekt.config.LargeMaps()
return
assert 'config' in kwargs, 'Specify a config'
config = kwargs.pop('config')
config = getattr(base_config, config)()
config.override(**kwargs)
self.config = config
#Round and round the num_threads flags go
#Which are needed nobody knows!
torch.set_num_threads(1)
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['NUMEXPR_NUM_THREADS'] = '1'
def train(self, **kwargs):
'''Train a model starting with the current value of --MODEL'''
imports()
loadModel(self.config).train()
'''Train a model using the current --config setting'''
run_tune_experiment(self.config)
def evaluate(self, **kwargs):
'''Evaluate a model on --EVAL_MAPS maps'''
self.config.EVALUATE = True
if not self.config.SCRIPTED:
imports()
loadEvaluator(self.config).evaluate(self.config.GENERALIZE)
'''Evaluate a model against EVAL_AGENTS models'''
self.config.TRAINING_ITERATIONS = 0
self.config.EVALUATE = True
self.config.EVALUATION_NUM_WORKERS = self.config.NUM_WORKERS
self.config.EVALUATION_NUM_EPISODES = self.config.NUM_WORKERS
run_tune_experiment(self.config)
def render(self, **kwargs):
'''Start a WebSocket server that autoconnects to the 3D Unity client'''
self.config.RENDER = True
if not self.config.SCRIPTED:
imports()
loadEvaluator(self.config).render()
self.config.RENDER = True
self.config.NUM_WORKERS = 1
self.evaluate(**kwargs)
def generate(self, **kwargs):
'''Generate game maps for the current --config setting'''
from neural_mmo.forge.blade.core import terrain
terrain.MapGenerator(self.config).generate()
def visualize(self, **kwargs):
'''Training/Evaluation results Web dashboard'''
BokehServer(self.config)
def main():
if __name__ == '__main__':
def Display(lines, out):
text = "\n".join(lines) + "\n"
out.write(text)
from fire import core
core.Display = Display
Fire(Anvil)
if __name__ == "__main__":
main()
Fire(Anvil)
\ No newline at end of file
......@@ -2,28 +2,46 @@ from pdb import set_trace as T
from neural_mmo.forge.blade import core
from neural_mmo.forge.blade.core import config
import os
from neural_mmo.forge.blade.io.stimulus.static import Stimulus
from neural_mmo.forge.trinity.scripted import baselines
from neural_mmo.forge.trinity.agent import Agent
from neural_mmo.forge.blade.systems.ai import behavior
from projekt import rllib_wrapper
class RLlibConfig:
'''Base config for RLlib Models
Extends core Config, which contains environment, evaluation,
and non-RLlib-specific learning parameters'''
@property
def MODEL(self):
return self.__class__.__name__
#Hardware Scale
#Checkpointing. Resume will load the latest trial, e.g. to continue training
#Restore (overrides resume) will force load a specific checkpoint (e.g. for rendering)
RESUME = False
RESTORE = 'experiments/CompetitionRound1/Dev_9fe1/checkpoint_001000/checkpoint-1000'
#Policy specification
AGENTS = [Agent]
EVAL_AGENTS = [baselines.Meander, baselines.Forage, baselines.Combat, Agent]
EVALUATE = False #Reserved param
#Hardware and debug
NUM_WORKERS = 1
NUM_GPUS_PER_WORKER = 0
NUM_GPUS = 1
NUM_WORKERS = 1
EVALUATION_NUM_WORKERS = 3
LOCAL_MODE = False
LOAD = True
#Memory/Batch Scale
TRAIN_EPOCHS = 10000
LOG_LEVEL = 1
#Training and evaluation settings
EVALUATION_INTERVAL = 1
EVALUATION_NUM_EPISODES = 3
EVALUATION_PARALLEL = True
TRAINING_ITERATIONS = 1000
KEEP_CHECKPOINTS_NUM = 5
CHECKPOINT_FREQ = 1
LSTM_BPTT_HORIZON = 16
NUM_SGD_ITER = 1
......@@ -41,27 +59,24 @@ class RLlibConfig:
class LargeMaps(core.Config, RLlibConfig, config.AllGameSystems):
'''Large scale Neural MMO training setting
Features up to 1000 concurrent agents and 1000 concurrent NPCs,
1km x 1km maps, and 5/10k timestep train/eval horizons
This is the default setting as of v1.5 and allows for large
scale multiagent research even on relatively modest hardware'''
#Memory/Batch Scale
NUM_WORKERS = 1 #16
TRAIN_BATCH_SIZE = 32 * NUM_WORKERS #Bug? This gets doubled
NUM_WORKERS = 14
TRAIN_BATCH_SIZE = 64 * 256 * NUM_WORKERS
ROLLOUT_FRAGMENT_LENGTH = 32
SGD_MINIBATCH_SIZE = 256
SGD_MINIBATCH_SIZE = 128
#Horizon
TRAIN_HORIZON = 8192
EVALUATION_HORIZON = 8192
class SmallMaps(config.SmallMaps, RLlibConfig, config.AllGameSystems):
class SmallMaps(RLlibConfig, config.AllGameSystems, config.SmallMaps):
'''Small scale Neural MMO training setting
Features up to 128 concurrent agents and 32 concurrent NPCs,
60x60 maps (excluding the border), and 1000 timestep train/eval horizons.
......@@ -70,22 +85,18 @@ class SmallMaps(config.SmallMaps, RLlibConfig, config.AllGameSystems):
or as a primary research target for PCG methods.'''
#Memory/Batch Scale
NUM_WORKERS = 6 #32
TRAIN_BATCH_SIZE = 256 * NUM_WORKERS #Bug? This gets doubled
NUM_WORKERS = 28
TRAIN_BATCH_SIZE = 64 * 256 * NUM_WORKERS
ROLLOUT_FRAGMENT_LENGTH = 256
SGD_MINIBATCH_SIZE = min(256, TRAIN_BATCH_SIZE)
SGD_MINIBATCH_SIZE = 128
#Horizon
TRAIN_HORIZON = 1024
EVALUATION_HORIZON = 1024
#Maps Path
PATH_MAPS = os.path.join(os.getcwd(),'neuralmmo/resource/maps')
class Debug(SmallMaps, config.AllGameSystems):
'''Debug Neural MMO training setting
A version of the SmallMap setting with greatly reduced batch parameters.
Only intended as a tool for identifying bugs in the model or environment'''
LOAD = False
......@@ -102,8 +113,7 @@ class Debug(SmallMaps, config.AllGameSystems):
### AICrowd competition settings
class Competition(config.AllGameSystems, config.Achievement): pass
class CompetitionRound1(SmallMaps, Competition):
class CompetitionRound1(config.Achievement, SmallMaps):
@property
def SPAWN(self):
return self.SPAWN_CONCURRENT
......@@ -111,7 +121,7 @@ class CompetitionRound1(SmallMaps, Competition):
NENT = 128
NPOP = 1
class CompetitionRound2(SmallMaps, Competition):
class CompetitionRound2(config.Achievement, SmallMaps):
@property
def SPAWN(self):
return self.SPAWN_CONCURRENT
......@@ -120,7 +130,7 @@ class CompetitionRound2(SmallMaps, Competition):
NPOP = 16
COOPERATIVE = True
class CompetitionRound3(LargeMaps, Competition):
class CompetitionRound3(config.Achievement, LargeMaps):
@property
def SPAWN(self):
return self.SPAWN_CONCURRENT
......@@ -138,9 +148,9 @@ class LargeMultimodalSkills(LargeMaps, config.AllGameSystems): pass
class MagnifyExploration(SmallMaps, config.Resource, config.Progression):
pass
class Population4(MagnifyExploration):
NENT = 256
NENT = 4
class Population32(MagnifyExploration):
NENT = 256
NENT = 32
class Population256(MagnifyExploration):
NENT = 256
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment