Commit cc8cc7be authored by Jyotish P's avatar Jyotish P

Refactor starter kit

parent f5af046a
# Select one of the images based on your case
# Using a torch or tensorflow image will speed up the image builds
# FROM aicrowd/base-images:neural-mmo-py38-torch-190
# FROM aicrowd/base-images:neural-mmo-py38-torch-180
FROM aicrowd/base-images:neural-mmo-py38-torch-171
# FROM aicrowd/base-images:neural-mmo-py38
USER root
COPY apt.txt /home/aicrowd/apt.txt
RUN apt update -qq && apt install -qq -y `cat /home/aicrowd/apt.txt` \
&& rm -rf /var/lib/apt/list* /var/cache/apt/*
USER aicrowd
COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt
RUN pip install -r requirements.txt --no-cache-dir
COPY --chown=1001:1001 . ${HOME_DIR}
'''Main file for the neural-mmo/projekt demo
/projeckt contains all necessary RLlib wrappers to train and
evaluate capable policies on Neural MMO as well as rendering,
logging, and visualization tools.
Associated docs and tutorials are hosted on'''
from pdb import set_trace as T
import numpy as np
import torch
from fire import Fire
import ray
from ray import rllib
from forge.ethyr.torch import utils
from forge.trinity.scripted import baselines
from forge.trinity.visualize import BokehServer
from forge.trinity.evaluator import Evaluator
import projekt
from projekt import rllib_wrapper as wrapper
from forge.blade.core import terrain
def createPolicies(config, mapPolicy):
'''Generate RLlib policies'''
obs = wrapper.observationSpace(config)
atns = wrapper.actionSpace(config)
policies = {}
for i in range(config.NPOLICIES):
params = {
"agent_id": i,
"obs_space_dict": obs,
"act_space_dict": atns}
key = mapPolicy(i)
policies[key] = (None, obs, atns, params)
return policies
def loadTrainer(config):
'''Create monolithic RLlib trainer object'''
_memory = 4*1024*1024*1024,
object_store_memory = 200*1024*1024
#Register custom env
lambda config: wrapper.RLlibEnv(config))
#Create policies
rllib.models.ModelCatalog.register_custom_model('godsword', wrapper.RLlibPolicy)
mapPolicy = lambda agentID: 'policy_{}'.format(agentID % config.NPOLICIES)
policies = createPolicies(config, mapPolicy)
#Instantiate monolithic RLlib Trainer object.
return wrapper.SanePPOTrainer(config={
'num_workers': config.NUM_WORKERS,
'num_gpus_per_worker': config.NUM_GPUS_PER_WORKER,
'num_gpus': config.NUM_GPUS,
'num_envs_per_worker': 1,
'train_batch_size': config.TRAIN_BATCH_SIZE // 2,
'rollout_fragment_length': config.ROLLOUT_FRAGMENT_LENGTH,
'sgd_minibatch_size': config.SGD_MINIBATCH_SIZE,
'num_sgd_iter': config.NUM_SGD_ITER,
'framework': 'torch',
'horizon': np.inf,
'soft_horizon': False,
'no_done_at_end': False,
'callbacks': wrapper.RLlibLogCallbacks,
'env_config': {
'config': config
'multiagent': {
'policies': policies,
'policy_mapping_fn': mapPolicy,
'count_steps_by': 'env_steps'
'model': {
'custom_model': 'godsword',
'custom_model_config': {'config': config},
'max_seq_len': config.LSTM_BPTT_HORIZON
def loadEvaluator(config):
'''Create test/render evaluator'''
if config.SCRIPTED:
return Evaluator(config, getattr(baselines, config.SCRIPTED))
return wrapper.RLlibEvaluator(config, loadModel(config))
def loadModel(config):
'''Load NN weights and optimizer state'''
trainer = loadTrainer(config)
if config.LOAD:
return trainer
class Anvil():
'''Neural MMO CLI powered by Google Fire
Main file for the RLlib demo included with Neural MMO.
python <COMMAND> --config=<CONFIG> --ARG1=<ARG1> ...
The User API documents core env flags. Additional config options specific
to this demo are available in projekt/
The --config flag may be used to load an entire group of options at once.
The Debug, SmallMaps, and LargeMaps options are included in this demo with
the latter being the default -- or write your own in projekt/
def __init__(self, **kwargs):
if 'help' in kwargs:
if 'config' in kwargs:
config = kwargs.pop('config')
config = getattr(projekt.config, config)()
config = projekt.config.LargeMaps()
self.config = config
def train(self, **kwargs):
'''Train a model starting with the current value of --MODEL'''
def evaluate(self, **kwargs):
'''Evaluate a model on --EVAL_MAPS maps'''
self.config.EVALUATE = True
def render(self, **kwargs):
'''Start a WebSocket server that autoconnects to the 3D Unity client'''
self.config.RENDER = True
def generate(self, **kwargs):
'''Generate game maps for the current --config setting'''
def visualize(self, **kwargs):
'''Training/Evaluation results Web dashboard'''
if __name__ == '__main__':
def Display(lines, out):
text = "\n".join(lines) + "\n"
from fire import core
core.Display = Display
from agents.neural_baseline_agent import NeuralBaselineAgent
from agents.random_agent import RandomNeuralMMOAgent
from agents.scripted_baseline_agent import BaselineForageAgent, BaselineCombatAgent, BaselineRandomAgent
__all__ = [
from os import stat_result
from utils.base_agent import NeuralMMOAgent
from utils.env_spaces import get_action_spaces
from evaluator.base_agent import NeuralMMOAgent
import projekt
from neural_mmo import projekt
from Forge import loadModel
from neural_mmo.Forge import loadModel
class NeuralBaselineAgent(NeuralMMOAgent):
def __init__(self):
from utils.base_agent import NeuralMMOAgent
from utils.env_spaces import get_action_spaces
from evaluator.base_agent import NeuralMMOAgent
from flexdict import FlexDict
from collections import defaultdict
from import Action
from flexdict import FlexDict
from collections import defaultdict
import gym
from neural_mmo import projekt
def get_action_spaces():
config = projekt.config.SmallMaps()
atns = FlexDict(defaultdict(FlexDict))
for atn in sorted(Action.edges):
for arg in sorted(atn.edges):
n = arg.N(config)
atns[atn][arg] = gym.spaces.Discrete(n)
return atns
class RandomNeuralMMOAgent(NeuralMMOAgent):
def __init__(self):
from utils.base_agent import NeuralMMOAgent
from forge.ethyr.torch import utils
from forge.trinity.scripted import baselines
import projekt
from evaluator.base_agent import NeuralMMOAgent
from neural_mmo.forge.trinity.scripted import baselines
from neural_mmo import projekt
class BaselineForageAgent(NeuralMMOAgent):
def __init__(self):
from agents import BaselineCombatAgent, BaselineForageAgent, RandomNeuralMMOAgent, BaselineRandomAgent
class LocalEvaluationConfig:
player_agents = [BaselineCombatAgent]
opponents = []
opponents += [BaselineForageAgent] * 50
opponents += [BaselineCombatAgent] * 45
opponents += [RandomNeuralMMOAgent] * 6
opponents += [BaselineRandomAgent] * 26
from gym.envs.registration import register
id="neuralmmo-v0", entry_point="gym_neuralmmo.envs:NeuralMMOEval",
id="neuralmmo-v1", entry_point="gym_neuralmmo.envs:NeuralMMOTrain",
from gym_neuralmmo.envs.neuralmmo_eval import NeuralMMOEval
from gym_neuralmmo.envs.neuralmmo_train import NeuralMMOTrain
import gym
from gym import error, spaces, utils
from gym.utils import seeding
from import ai
from forge.trinity.env import Env
import projekt
from import Action
from flexdict import FlexDict
from collections import defaultdict
def get_action_spaces(config):
action_space = FlexDict(defaultdict(FlexDict))
for atn in sorted(Action.edges):
for arg in sorted(atn.edges):
n = arg.N(config)
action_space[atn][arg] = gym.spaces.Discrete(n)
return action_space
class NeuralMMOEval(gym.Env):
def __init__(self):
config = projekt.config.SmallMaps()
self.action_space = get_action_spaces(config)
self.env = Env(config)
self.agents_in_play = {}
self.available_agents = []
self.alive_agents = []
def reset(self):
self.agents_in_play = {}
self.available_agents = []
self.dead_agents = []
self.observations = self.env.reset()
self.alive_agents = list(self.observations.keys())
self.player_idx = self.alive_agents[0]
self.agents_in_play[self.alive_agents[0]] = self.player_agent
self.available_agents = self.eval_agents[:]
self.actions = self.get_agent_actions()
return self.observations[self.player_idx]
def get_available_agent(self):
return self.available_agents.pop()
return self.get_default_agent()
def assign_agents(self):
for agent in self.alive_agents:
if agent not in self.agents_in_play:
self.agents_in_play[agent] = self.get_available_agent()
def get_agent_actions(self):
actions = {}
for agent in self.observations:
if agent in self.dead_agents:
actions[agent] = self.agents_in_play[agent].compute_action(
return actions
def step(self,action):
self.observations, dones, rewards, _ = self.env.step(self.actions, preprocessActions = False)
for agent in dones:
if dones[agent] == -1:
self.actions = self.get_agent_actions()
self.alive_agents = list(self.observations.keys())
return self.parse_observations(self.observations, dones, rewards, _)
def set_player_agent(self, player_agent):
self.player_agent = player_agent
def set_eval_agents(self, eval_agents):
self.eval_agents = eval_agents
def get_default_agent(self):
raise NotImplementedError
def parse_observations(self, obs, dones, rewards, _):
parse_obs = {}
parse_dones = {}
parse_rewards = {}
parse_obs["player"] = obs[self.player_idx]
parse_dones["player"] = dones[self.player_idx]
parse_rewards["player"] = rewards[self.player_idx]
return parse_obs, parse_dones, parse_rewards, _
import gym
from gym import error, spaces, utils
from gym.utils import seeding
from import ai
from forge.trinity import env
import projekt
class NeuralMMOTrain(env.Env,gym.Env):
def __init__(self):
config = projekt.config.SmallMaps()
from setuptools import setup
setup(name="gym_neuralmmo", version="0.0.2", install_requires=["gym","flexdict"])
......@@ -2,6 +2,8 @@ from abc import ABC, abstractmethod
class NeuralMMOAgent(ABC):
agent_type = "scripted"
def register_reset(self, observations):
def initialize_agents_array(agents):
for idx, agent in enumerate(agents):
agents[idx] = agent()
def load_agents(agents_config):
return agents_config.player_agents, agents_config.opponents
import sys
from neural_mmo.forge.trinity.env import Env
from import static as Action
from neural_mmo import projekt
from forge.ethyr.torch import utils
from forge.trinity.env import Env
from import static as Action
import projekt
from utils.helpers import load_agents
from evaluator.helpers import load_agents
from config import LocalEvaluationConfig
import random
import copy
def assign_agents(player_agent,opponent_agents):
player_index = 0
def assign_agents(player_agent, opponent_agents):
player_index = 0
if len(opponent_agents) != 127:
raise Exception("Number of opponent agents should add up to exactly 127")
player_index = random.randint(0,127)
player_index = random.randint(0, 127)
agents = copy.deepcopy(opponent_agents)
return agents,player_index
agents.insert(player_index, player_agent)
return agents, player_index
def run_episode(player_index, agents, N_TIME_STEPS):
......@@ -31,11 +29,11 @@ def run_episode(player_index, agents, N_TIME_STEPS):
obs = env.reset()
entids = list(obs.keys())
agent_entid_map = dict(zip(range(len(agents)), entids))
entid_agent_map = {x[1]:x[0] for x in agent_entid_map.items()}
for idx,agent in enumerate(agents):
if agent.type == 'neural':
entid_agent_map = {x[1]: x[0] for x in agent_entid_map.items()}
for idx, agent in enumerate(agents):
if agent.type == "neural":
actions = {}
for entid in entids:
actions[entid] = agents[entid_agent_map[entid]].register_reset(obs[entid])
......@@ -50,7 +48,9 @@ def run_episode(player_index, agents, N_TIME_STEPS):
targID = actions[entid][Action.Attack][Action.Target]
actions[entid][Action.Attack][Action.Target] = realm.entity(targID)
obs,dones,rewards,_ = env.step(actions,omitDead=True,preprocess=neural_agents)
obs, dones, rewards, _ = env.step(
actions, omitDead=True, preprocess=neural_agents
for entid in sorted(alive_agents):
if entid not in list(obs.keys()):
......@@ -63,37 +63,46 @@ def run_episode(player_index, agents, N_TIME_STEPS):
actions[entid] = agents[entid_agent_map[entid]].compute_action(obs[entid])
n_steps += 1
for entid in sorted(list(obs.keys())):
if entid not in dead_agents:
logs = env.terminal()
player_entid = agent_entid_map[player_index]
logs = env.terminal()
player_entid = agent_entid_map[player_index]
player_log = {}
player_log["Achievement"] = logs['Stats']['Achievement'][dead_agents.index(player_entid)]
player_log["Equipment"] = logs['Stats']['Equipment'][dead_agents.index(player_entid)]
player_log["Exploration"] = logs['Stats']['Exploration'][dead_agents.index(player_entid)]
player_log["PlayerKills"] = logs['Stats']['PlayerKills'][dead_agents.index(player_entid)]
player_log["Foraging"] = logs['Stats']['Foraging'][dead_agents.index(player_entid)]
player_log["Achievement"] = logs["Stats"]["Achievement"][
player_log["Equipment"] = logs["Stats"]["Equipment"][
player_log["Exploration"] = logs["Stats"]["Exploration"][
player_log["PlayerKills"] = logs["Stats"]["PlayerKills"][
player_log["Foraging"] = logs["Stats"]["Foraging"][dead_agents.index(player_entid)]
return player_log
def print_statistics(player_statistics,episode):
print("======= Episode {} =======".format(episode+1))
print("Achievement ",player_statistics['Achievement'])
print("Equipment ",player_statistics['Equipment'])
print("Exploration ",player_statistics['Exploration'])
print("PlayerKills ",player_statistics['PlayerKills'])
print("Foraging ",player_statistics['Foraging'])
def print_statistics(player_statistics, episode):
print("======= Episode {} =======".format(episode + 1))
print("Achievement ", player_statistics["Achievement"])
print("Equipment ", player_statistics["Equipment"])
print("Exploration ", player_statistics["Exploration"])
print("PlayerKills ", player_statistics["PlayerKills"])
print("Foraging ", player_statistics["Foraging"])
if __name__== "__main__":
player_agent, opponent_agents = load_agents("players.yaml")
if __name__ == "__main__":
player_agent, opponent_agents = load_agents(LocalEvaluationConfig)
for episode in range(N_EPISODES):
agents,player_index = assign_agents(player_agent,opponent_agents)
statistics = run_episode(player_index,agents,N_TIME_STEPS)
agents, player_index = assign_agents(player_agent, opponent_agents)
statistics = run_episode(player_index, agents, N_TIME_STEPS)
print_statistics(statistics, episode)
export PYTHONPATH=$PYTHONPATH:`pwd`/neural-mmo
python ./
from import Action
from flexdict import FlexDict
from collections import defaultdict
import gym
import projekt
def get_action_spaces():
config = projekt.config.SmallMaps()
atns = FlexDict(defaultdict(FlexDict))
for atn in sorted(Action.edges):
for arg in sorted(atn.edges):
n = arg.N(config)
atns[atn][arg] = gym.spaces.Discrete(n)
return atns
import sys
import yaml
import importlib
def get_agent(agent_dict):
module = importlib.import_module(agent_dict["file"])
agent = getattr(module, agent_dict["agent_class"])()
agent.type = agent_dict["agent_type"]
return agent
def load_agents(agents_config):
with open(agents_config, "r") as stream:
data = yaml.safe_load(stream)
player_agent = get_agent(data["player_agent"])
opponent_agents = []
for agent in data["opponent_agents"]:
for num in range(data["opponent_agents"][agent]["num_agents"]):
return (player_agent, opponent_agents)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment