Commit 3f73259a authored by Siddhartha Laghuvarapu's avatar Siddhartha Laghuvarapu
Browse files

Add neural baseline agent

parent c94c9fd4
'''Main file for the neural-mmo/projekt demo
/projeckt contains all necessary RLlib wrappers to train and
evaluate capable policies on Neural MMO as well as rendering,
logging, and visualization tools.
Associated docs and tutorials are hosted on'''
from pdb import set_trace as T
import numpy as np
import torch
from fire import Fire
import ray
from ray import rllib
from forge.ethyr.torch import utils
from forge.trinity.scripted import baselines
from forge.trinity.visualize import BokehServer
from forge.trinity.evaluator import Evaluator
import projekt
from projekt import rllib_wrapper as wrapper
from forge.blade.core import terrain
def createPolicies(config, mapPolicy):
'''Generate RLlib policies'''
obs = wrapper.observationSpace(config)
atns = wrapper.actionSpace(config)
policies = {}
for i in range(config.NPOLICIES):
params = {
"agent_id": i,
"obs_space_dict": obs,
"act_space_dict": atns}
key = mapPolicy(i)
policies[key] = (None, obs, atns, params)
return policies
def loadTrainer(config):
'''Create monolithic RLlib trainer object'''
#Register custom env
lambda config: wrapper.RLlibEnv(config))
#Create policies
rllib.models.ModelCatalog.register_custom_model('godsword', wrapper.RLlibPolicy)
mapPolicy = lambda agentID: 'policy_{}'.format(agentID % config.NPOLICIES)
policies = createPolicies(config, mapPolicy)
#Instantiate monolithic RLlib Trainer object.
return wrapper.SanePPOTrainer(config={
'num_workers': config.NUM_WORKERS,
'num_gpus_per_worker': config.NUM_GPUS_PER_WORKER,
'num_gpus': config.NUM_GPUS,
'num_envs_per_worker': 1,
'train_batch_size': config.TRAIN_BATCH_SIZE // 2,
'rollout_fragment_length': config.ROLLOUT_FRAGMENT_LENGTH,
'sgd_minibatch_size': config.SGD_MINIBATCH_SIZE,
'num_sgd_iter': config.NUM_SGD_ITER,
'framework': 'torch',
'horizon': np.inf,
'soft_horizon': False,
'no_done_at_end': False,
'callbacks': wrapper.RLlibLogCallbacks,
'env_config': {
'config': config
'multiagent': {
'policies': policies,
'policy_mapping_fn': mapPolicy,
'count_steps_by': 'env_steps'
'model': {
'custom_model': 'godsword',
'custom_model_config': {'config': config},
'max_seq_len': config.LSTM_BPTT_HORIZON
def loadEvaluator(config):
'''Create test/render evaluator'''
if config.SCRIPTED:
return Evaluator(config, getattr(baselines, config.SCRIPTED))
return wrapper.RLlibEvaluator(config, loadModel(config))
def loadModel(config):
'''Load NN weights and optimizer state'''
trainer = loadTrainer(config)
if config.LOAD:
return trainer
class Anvil():
'''Neural MMO CLI powered by Google Fire
Main file for the RLlib demo included with Neural MMO.
python <COMMAND> --config=<CONFIG> --ARG1=<ARG1> ...
The User API documents core env flags. Additional config options specific
to this demo are available in projekt/
The --config flag may be used to load an entire group of options at once.
The Debug, SmallMaps, and LargeMaps options are included in this demo with
the latter being the default -- or write your own in projekt/
def __init__(self, **kwargs):
if 'help' in kwargs:
if 'config' in kwargs:
config = kwargs.pop('config')
config = getattr(projekt.config, config)()
config = projekt.config.LargeMaps()
self.config = config
def train(self, **kwargs):
'''Train a model starting with the current value of --MODEL'''
def evaluate(self, **kwargs):
'''Evaluate a model on --EVAL_MAPS maps'''
self.config.EVALUATE = True
def render(self, **kwargs):
'''Start a WebSocket server that autoconnects to the 3D Unity client'''
self.config.RENDER = True
def generate(self, **kwargs):
'''Generate game maps for the current --config setting'''
def visualize(self, **kwargs):
'''Training/Evaluation results Web dashboard'''
if __name__ == '__main__':
def Display(lines, out):
text = "\n".join(lines) + "\n"
from fire import core
core.Display = Display
from os import stat_result
from utils.base_agent import NeuralMMOAgent
from utils.env_spaces import get_action_spaces
import projekt
from Forge import loadModel
class NeuralBaselineAgent(NeuralMMOAgent):
def __init__(self):
self.config = projekt.config.CompetitionRound1()
self.trainer = loadModel(self.config)
def register_reset(self, observations):
obs = {0:observations}
actions,self.state,_ = self.trainer.compute_actions(obs)
# action = self.get_action(observations)
return actions[0]
def compute_action(self, observations, info=None):
obs = {0:observations}
actions,self.state,_ = self.trainer.compute_actions(obs)
# action = self.get_action(observations)
return actions[0]
\ No newline at end of file
......@@ -4,21 +4,25 @@
# Max number of opponent agents is 127
file: scripted_baseline_agent
agent_class: BaselineForageAgent
file: neural_baseline_agent
agent_class: NeuralBaselineAgent
agent_type: neural
file: scripted_baseline_agent
agent_class: BaselineForageAgent
agent_type: scripted
num_agents: 50
file: scripted_baseline_agent
agent_class: BaselineForageAgent
agent_type: scripted
num_agents: 51
file: scripted_baseline_agent
agent_class: BaselineForageAgent
agent_type: scripted
num_agents: 26
