delete rllib_requirements

parent a88bde13
......@@ -35,3 +35,6 @@ neural-mmo/forge/embyr/UnityClient/Assets/Lava/Lava[[:space:]]shader/wawesMap.ps
neural-mmo/forge/embyr/.git/objects/pack/pack-5c1cfbdf3b08a40d55a16c96a08b0703d8c45d06.pack filter=lfs diff=lfs merge=lfs -text
neural-mmo/.git/objects/pack/pack-761b342ca629338a5b6f4a437e2cb3b00764032a.pack filter=lfs diff=lfs merge=lfs -text
neural-mmo/baselines/models/CompetitionRound1/checkpoint filter=lfs diff=lfs merge=lfs -text
neuralmmo/baselines/models/CompetitionRound1/checkpoint filter=lfs diff=lfs merge=lfs -text
.git/objects/86/dea2a1e66382a85833663350ccdf3e6f80df37 filter=lfs diff=lfs merge=lfs -text
.git/lfs/objects/2c/d4/2cd453319c09516a158bf133370d7b879474962c9ae2a4c1bbd7c962f3ae0a45 filter=lfs diff=lfs merge=lfs -text
'''Main file for the neural-mmo/projekt demo
/projeckt contains all necessary RLlib wrappers to train and
evaluate capable policies on Neural MMO as well as rendering,
logging, and visualization tools.
Associated docs and tutorials are hosted on jsuarez5341.github.io.'''
from pdb import set_trace as T
import numpy as np
from fire import Fire
import projekt
from neural_mmo.forge.blade.core import terrain
from neural_mmo.forge.trinity.scripted import baselines
from neural_mmo.forge.trinity.visualize import BokehServer
from neural_mmo.forge.trinity.evaluator import Evaluator
def createPolicies(config, mapPolicy):
'''Generate RLlib policies'''
obs = wrapper.observationSpace(config)
atns = wrapper.actionSpace(config)
policies = {}
for i in range(config.NPOLICIES):
params = {
"agent_id": i,
"obs_space_dict": obs,
"act_space_dict": atns}
key = mapPolicy(i)
policies[key] = (None, obs, atns, params)
return policies
def loadTrainer(config):
'''Create monolithic RLlib trainer object'''
torch.set_num_threads(1)
ray.init(local_mode=config.LOCAL_MODE)
#Register custom env
ray.tune.registry.register_env("Neural_MMO",
lambda config: wrapper.RLlibEnv(config))
#Create policies
rllib.models.ModelCatalog.register_custom_model('godsword', wrapper.RLlibPolicy)
mapPolicy = lambda agentID: 'policy_{}'.format(agentID % config.NPOLICIES)
policies = createPolicies(config, mapPolicy)
#Instantiate monolithic RLlib Trainer object.
return wrapper.SanePPOTrainer(config={
'num_workers': config.NUM_WORKERS,
'num_gpus_per_worker': config.NUM_GPUS_PER_WORKER,
'num_gpus': config.NUM_GPUS,
'num_envs_per_worker': 1,
'train_batch_size': config.TRAIN_BATCH_SIZE // 2,
'rollout_fragment_length': config.ROLLOUT_FRAGMENT_LENGTH,
'sgd_minibatch_size': config.SGD_MINIBATCH_SIZE,
'num_sgd_iter': config.NUM_SGD_ITER,
'framework': 'torch',
'horizon': np.inf,
'soft_horizon': False,
'no_done_at_end': False,
'callbacks': wrapper.RLlibLogCallbacks,
'env_config': {
'config': config
},
'multiagent': {
'policies': policies,
'policy_mapping_fn': mapPolicy,
'count_steps_by': 'env_steps'
},
'model': {
'custom_model': 'godsword',
'custom_model_config': {'config': config},
'max_seq_len': config.LSTM_BPTT_HORIZON
},
})
def loadEvaluator(config):
'''Create test/render evaluator'''
if config.SCRIPTED:
return Evaluator(config, getattr(baselines, config.SCRIPTED))
else:
return wrapper.RLlibEvaluator(config, loadModel(config))
def loadModel(config):
'''Load NN weights and optimizer state'''
trainer = loadTrainer(config)
utils.modelSize(trainer.defaultModel())
if config.LOAD:
trainer.restore()
return trainer
class Anvil():
'''Neural MMO CLI powered by Google Fire
Main file for the RLlib demo included with Neural MMO.
Usage:
python Forge.py <COMMAND> --config=<CONFIG> --ARG1=<ARG1> ...
The User API documents core env flags. Additional config options specific
to this demo are available in projekt/config.py.
The --config flag may be used to load an entire group of options at once.
The Debug, SmallMaps, and LargeMaps options are included in this demo with
the latter being the default -- or write your own in projekt/config.py
'''
def __init__(self, **kwargs):
if 'help' in kwargs:
kwargs.pop('help')
if 'config' in kwargs:
config = kwargs.pop('config')
config = getattr(projekt.config, config)()
else:
config = projekt.config.LargeMaps()
config.override(**kwargs)
self.config = config
if not config.SCRIPTED:
global torch, ray, rllib, wrapper, utils
from neural_mmo.forge.ethyr.torch import utils
import torch
import ray
from ray import rllib
from projekt import rllib_wrapper as wrapper
def train(self, **kwargs):
'''Train a model starting with the current value of --MODEL'''
loadModel(self.config).train()
def evaluate(self, **kwargs):
'''Evaluate a model on --EVAL_MAPS maps'''
self.config.EVALUATE = True
loadEvaluator(self.config).evaluate(self.config.GENERALIZE)
def render(self, **kwargs):
'''Start a WebSocket server that autoconnects to the 3D Unity client'''
self.config.RENDER = True
loadEvaluator(self.config).render()
def generate(self, **kwargs):
'''Generate game maps for the current --config setting'''
terrain.MapGenerator(self.config).generate()
def visualize(self, **kwargs):
'''Training/Evaluation results Web dashboard'''
BokehServer(self.config)
def main():
def Display(lines, out):
text = "\n".join(lines) + "\n"
out.write(text)
from fire import core
core.Display = Display
Fire(Anvil)
if __name__ == "__main__":
main()
#Resources directory
resource
......@@ -3,6 +3,7 @@ from pdb import set_trace as T
from neural_mmo.forge.blade import core
from neural_mmo.forge.blade.core import config
import os
class RLlibConfig:
'''Base config for RLlib Models
......@@ -78,6 +79,9 @@ class SmallMaps(config.SmallMaps, RLlibConfig, config.AllGameSystems):
TRAIN_HORIZON = 1024
EVALUATION_HORIZON = 1024
#Maps Path
PATH_MAPS = os.path.join(os.getcwd(),'neuralmmo/resource')
class Debug(SmallMaps, config.AllGameSystems):
'''Debug Neural MMO training setting
......
___ ___ ___ ___
/__/\ /__/\ /__/\ / /\
\ \:\ | |::\ | |::\ / /::\ An open source
\ \:\ | |:|:\ | |:|:\ / /:/\:\ project originally
_____\__\:\ __|__|:|\:\ __|__|:|\:\ / /:/ \:\ founded by Joseph Suarez
/__/::::::::\ /__/::::| \:\ /__/::::| \:\ /__/:/ \__\:\ and formalized at OpenAI
\ \:\~~\~~\/ \ \:\~~\__\/ \ \:\~~\__\/ \ \:\ / /:/
\ \:\ ~~~ \ \:\ \ \:\ \ \:\ /:/ Now developed and
\ \:\ \ \:\ \ \:\ \ \:\/:/ maintained at MIT in
\ \:\ \ \:\ \ \:\ \ \::/ Phillip Isola's lab
\__\/ \__\/ \__\/ \__\/
from projekt import config
from pdb import set_trace as T
import numpy as np
import os
from neural_mmo.forge.blade import core
from neural_mmo.forge.blade.core import config
from neural_mmo.forge.blade.systems.ai import behavior
class Base(core.Config):
'''Base config for RLlib Models
Extends core Config, which contains environment, evaluation,
and non-RLlib-specific learning parameters'''
@property
def MODEL(self):
return self.__class__.__name__
#Hardware Scale
NUM_GPUS_PER_WORKER = 0
NUM_GPUS = 1
NUM_WORKERS = 6
LOCAL_MODE = False
LOAD = True
#Memory/Batch Scale
TRAIN_EPOCHS = 10000
TRAIN_BATCH_SIZE = 256 * NUM_WORKERS #Bug? This gets doubled
ROLLOUT_FRAGMENT_LENGTH = 256
LSTM_BPTT_HORIZON = 16
SGD_MINIBATCH_SIZE = min(256, TRAIN_BATCH_SIZE)
NUM_SGD_ITER = 1
#Model
SCRIPTED = None
N_AGENT_OBS = 100
NPOLICIES = 1
HIDDEN = 64
EMBED = 64
#Reward
COOP = False
TEAM_SPIRIT = 0.0
ACHIEVEMENT_SCALE = 1.0/15.0
class LargeMaps(Base):
'''Large scale Neural MMO training setting
Features up to 1000 concurrent agents and 1000 concurrent NPCs,
1km x 1km maps, and 5/10k timestep train/eval horizons
This is the default setting as of v1.5 and allows for large
scale multiagent research even on relatively modest hardware'''
#Path settings
PATH_MAPS = core.Config.PATH_MAPS_LARGE
#Harware Scale
NUM_WORKERS = 16
LOCAL_MODE = False
LOAD = True
#Memory/Batch Scale
TRAIN_BATCH_SIZE = 32 * NUM_WORKERS #Bug? This gets doubled
ROLLOUT_FRAGMENT_LENGTH = 32
SGD_MINIBATCH_SIZE = 256
#Horizon
TRAIN_HORIZON = 8192
EVALUATION_HORIZON = 8192
#Population
NENT = 2048
NMOB = 1024
class SmallMaps(Base):
'''Small scale Neural MMO training setting
Features up to 128 concurrent agents and 32 concurrent NPCs,
60x60 maps (excluding the border), and 1000 timestep train/eval horizons.
This setting is modeled off of v1.1-v1.4 It is appropriate as a quick train
task for new ideas, a transfer target for agents trained on large maps,
or as a primary research target for PCG methods.'''
#Path settings
PATH_MAPS = core.Config.PATH_MAPS_SMALL
#Horizon
TRAIN_HORIZON = 1024
EVALUATION_HORIZON = 1024
#Scale
TERRAIN_CENTER = 128
NENT = 256
NMOB = 128
#Players spawned per tick
PLAYER_SPAWN_ATTEMPTS = 2
#NPC parameters
NPC_LEVEL_MAX = 30
NPC_LEVEL_SPREAD = 5
class Debug(SmallMaps, config.AllGameSystems):
'''Debug Neural MMO training setting
A version of the SmallMap setting with greatly reduced batch parameters.
Only intended as a tool for identifying bugs in the model or environment'''
MODEL = None
LOCAL_MODE = True
NUM_WORKERS = 1
SGD_MINIBATCH_SIZE = 100
TRAIN_BATCH_SIZE = 400
TRAIN_HORIZON = 200
EVALUATION_HORIZON = 50
HIDDEN = 2
EMBED = 2
### NeurIPS Experiments
class SmallMultimodalSkills(SmallMaps, config.AllGameSystems): pass
class LargeMultimodalSkills(LargeMaps, config.AllGameSystems): pass
class MagnifyExploration(SmallMaps, config.Resource, config.Progression):
pass
class Population4(MagnifyExploration):
NENT = 256
class Population32(MagnifyExploration):
NENT = 256
class Population256(MagnifyExploration):
NENT = 256
class DomainRandomization16384(SmallMaps, config.AllGameSystems):
TERRAIN_TRAIN_MAPS=16384
class DomainRandomization256(SmallMaps, config.AllGameSystems):
TERRAIN_TRAIN_MAPS=256
class DomainRandomization32(SmallMaps, config.AllGameSystems):
TERRAIN_TRAIN_MAPS=32
class DomainRandomization1(SmallMaps, config.AllGameSystems):
TERRAIN_TRAIN_MAPS=1
class TeamBased(MagnifyExploration, config.Combat):
NENT = 128
NPOP = 32
COOP = True
TEAM_SPIRIT = 0.5
@property
def SPAWN(self):
return self.SPAWN_CONCURRENT
### AICrowd competition settings
class Competition(config.AllGameSystems, config.Achievement): pass
class CompetitionRound1(SmallMaps, Competition):
@property
def SPAWN(self):
return self.SPAWN_CONCURRENT
NENT = 128
NPOP = 1
COOP = False
class CompetitionRound2(SmallMaps, Competition):
@property
def SPAWN(self):
return self.SPAWN_CONCURRENT
NENT = 128
NPOP = 16
COOP = True
class CompetitionRound3(LargeMaps, Competition):
@property
def SPAWN(self):
return self.SPAWN_CONCURRENT
NENT = 1024
NPOP = 32
COOP = True
This diff is collapsed.
neuralmmo/resource
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment