Fix rollout.py

f43da181 · Siddhartha Laghuvarapu · 685dec5f · f43da181
Commit f43da181 authored 3 years ago by Siddhartha Laghuvarapu
--- a/evaluator/rollout.py
+++ b/evaluator/rollout.py
@@ -13,6 +13,54 @@ from config import LocalEvaluationConfig
 import random
 import copy

+class ProxyEnv(Env):
+    
+   def step(self, actions, preprocess=set(), omitDead=True):
+      #Preprocess actions
+      for entID in preprocess:
+         ent = self.realm.players[entID]
+         if not ent.alive:
+            continue
+
+         for atn, args in actions[entID].items():
+            for arg, val in args.items():
+               if len(arg.edges) > 0:
+                  actions[entID][atn][arg] = arg.edges[val]
+               elif val < len(ent.targets):
+                  targ                     = ent.targets[val]
+                  actions[entID][atn][arg] = self.realm.entity(targ)
+               else: #Need to fix -inf in classifier before removing this
+                  actions[entID][atn][arg] = ent
+
+      #Step: Realm, Observations, Logs
+      self.dead = self.realm.step(actions)
+      obs, rewards, dones, self.raw = {}, {}, {}, {}
+      for entID, ent in self.realm.players.items():
+         ob             = self.realm.dataframe.get(ent)
+         obs[entID]     = ob
+         self.dummy_ob  = ob
+
+         rewards[entID] = self.reward(ent)
+         dones[entID]   = False
+
+      #self.steps += len(self.realm.players.items())
+      #print('World {} Tick {} Steps {}'.format(self.worldIdx, self.realm.tick, self.steps))
+
+      for entID, ent in self.dead.items():
+         self.log(ent)
+
+      #Postprocess dead agents
+      if omitDead:
+         return obs, rewards, dones, {}
+
+      for entID, ent in self.dead.items():
+         rewards[ent.entID] = self.reward(ent)
+         dones[ent.entID]   = True
+         obs[ent.entID]     = self.dummy_ob
+
+      return obs, rewards, dones, {}
+
+

 def assign_agents(player_agent, opponent_agents):
    player_index = 0
@@ -27,7 +75,7 @@ def assign_agents(player_agent, opponent_agents):

 def run_episode(player_index, agents, N_TIME_STEPS):
    config = projekt.config.CompetitionRound1()
-    env = Env(config)
+    env = ProxyEnv(config)
    n_steps = 0
    neural_agents = set()
    dead_agents = []
@@ -53,7 +101,6 @@ def run_episode(player_index, agents, N_TIME_STEPS):
                if Action.Attack in actions[entid]:
                    targID = actions[entid][Action.Attack][Action.Target]
                    actions[entid][Action.Attack][Action.Target] = realm.entity(targID)
-        print(actions)
        obs, dones, rewards, _ = env.step(
            actions, omitDead=True, preprocess=neural_agents
        )