TypeError when passing Team actions to TeamBasedEnv

Hi,

I am trying to write a custom script which runs some baseline scripted agents in the TeamBasedEnv. I am generating the actions in the Dict[int, Dict[int, dict]] format required by TeamBasedEnv.step(), but when TeamBasedEnv.step() passes the formatted actions to self._env.step(actions), an IterableNameComparable error is raised.

My code:

from neurips2022nmmo.scripted import baselines
from neurips2022nmmo import Team
from neurips2022nmmo import CompetitionConfig, TeamBasedEnv

conf = CompetitionConfig()

players = [
    baselines.Fisher,
    baselines.Herbalist,
    baselines.Prospector,
    baselines.Carver,
    baselines.Alchemist,
    baselines.Melee,
    baselines.Range,
    baselines.Mage,
]

class MyTeam(Team):
    def __init__(self, 
                 team_id: str,
                 players: list,
                 conf=None, 
                 **kwargs):
        super().__init__(team_id, conf)
        self.players = [player(config=conf, idx=player_idx) for player_idx, player in enumerate(players)]
            
    def reset(self):
        pass

    def act(self, 
            observations):
        actions = {}
        if "stat" in observations:
            observations.pop("stat")
        for player_idx, obs in observations.items():
            actions[player_idx] = self.players[player_idx](obs)
        return actions
    
my_team = MyTeam(team_id='my_team',
                 players=players,
                 conf=conf)

teams = [scripted.CombatTeam(f"Combat-{i}", conf) for i in range(5)]
teams.extend([scripted.MixtureTeam(f"Mixture-{i}", conf) for i in range(10)])
teams.append(my_team)

class EnvLoop:
    def __init__(self,
                 env_cls,
                 env_conf,
                 teams,
                 **kwargs):
        self.env_conf = env_conf
        self.env = env_cls(conf)
        
        self.teams = teams
    
    def run(self,
            **kwargs):
        '''Runs one episode.'''
        observations, done = self.env.reset(), False
        step_counter = 1
        while not done:
            team_to_player_to_actions = self._get_team_to_player_to_actions(observations)
            observations, rewards, dones, infos = self.env.step(team_to_player_to_actions)
    
    def _get_team_to_player_to_actions(self, observations):
        team_to_player_to_actions = {}
        for team_idx, team_observations in observations.items():
            team_to_player_to_actions[team_idx] = self.teams[team_idx].act(team_observations)
        return team_to_player_to_actions

env_loop = EnvLoop(env_cls=TeamBasedEnv, env_conf=conf, teams=teams)
env_loop.run()

When I print my team_to_player_to_actions dict, I believe it has the required Dict[int, Dict[int, dict]], so I am not really sure what is causing this error:

0
	0
		<class 'nmmo.io.action.Move'>:
			<class 'nmmo.io.action.Direction'>: 1
		<class 'nmmo.io.action.Attack'>:
			<class 'nmmo.io.action.Style'>: 0
			<class 'nmmo.io.action.Target'>: 1
	1
		<class 'nmmo.io.action.Move'>:
			<class 'nmmo.io.action.Direction'>: 1
		<class 'nmmo.io.action.Attack'>:
			<class 'nmmo.io.action.Style'>: 0
			<class 'nmmo.io.action.Target'>: 1
...

The output error message is:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [155], in <cell line: 50>()
     46         return team_to_player_to_actions
     49 env_loop = EnvLoop(env_cls=TeamBasedEnv, env_conf=conf, teams=teams)
---> 50 env_loop.run(verbose=True)

Input In [155], in EnvLoop.run(self, verbose, **kwargs)
     30             for dim_key, dim_val in chosen_action.items():
     31                 print(f'\t\t\t{dim_key}: {dim_val}')
---> 32 observations, rewards, dones, infos = self.env.step(team_to_player_to_actions)
     33 print(f'rewards: {rewards}')
     34 print(f'dones: {dones}')

File /scratch/zciccwf/py36/envs/neurips2022-nmmo/lib/python3.9/site-packages/neurips2022nmmo/env/team_based_env.py:52, in TeamBasedEnv.step(self, actions_by_team)
     49         if i >= 0 and i < len(player_ids):
     50             actions[player_ids[i]] = action
---> 52 observations, rewards, dones, infos = self._env.step(actions)
     54 # delete the observations of the done players
     55 for player_id, done in dones.items():

File /scratch/zciccwf/py36/envs/neurips2022-nmmo/lib/python3.9/site-packages/nmmo/core/env.py:437, in Env.step(self, actions)
    435 for arg, val in args.items():
    436    if arg.argType == nmmo.action.Fixed:
--> 437       self.actions[entID][atn][arg] = arg.edges[val]
    438    elif arg == nmmo.action.Target:
    439       if val >= len(ent.targets):

TypeError: list indices must be integers or slices, not IterableNameComparable

Edited Aug 21, 2022 by christopher.parsonson