diff --git a/envs/batched_env.py b/envs/batched_env.py index 442f47cc016196bf8cc4d393b9b1b3427de8e4d1..796e29641a7299145993375306f8cd1331bd71b1 100644 --- a/envs/batched_env.py +++ b/envs/batched_env.py @@ -1,9 +1,9 @@ import aicrowd_gym import numpy as np -from tqdm import trange + from collections.abc import Iterable -class BactchedEnv: +class BatchedEnv: def __init__(self, env_make_fn, num_envs=32): """ Creates multiple copies of the environment with the same env_make_fn function @@ -52,23 +52,15 @@ class BactchedEnv: if __name__ == '__main__': - def nethack_make_fn(): - return aicrowd_gym.make('NetHackChallenge-v0', - observation_keys=("glyphs", - "chars", - "colors", - "specials", - "blstats", - "message", - "tty_chars", - "tty_colors", - "tty_cursor",)) - num_envs = 4 - batched_env = BactchedEnv(env_make_fn=nethack_make_fn, num_envs=num_envs) + batched_env = BatchedEnv( + env_make_fn=lambda:aicrowd_gym.make('NetHackChallenge-v0'), + num_envs=4 + ) + observations = batched_env.batch_reset() num_actions = batched_env.envs[0].action_space.n - for _ in trange(10000000000000): + for _ in range(50): actions = np.random.randint(num_actions, size=num_envs) observations, rewards, dones, infos = batched_env.batch_step(actions) for done_idx in np.where(dones)[0]: diff --git a/envs/nethack_make_function.py b/envs/nethack_make_function.py deleted file mode 100644 index f2401736568326fcd79c6daf414e6deabccb0d6e..0000000000000000000000000000000000000000 --- a/envs/nethack_make_function.py +++ /dev/null @@ -1,14 +0,0 @@ -import aicrowd_gym -import nle - -def nethack_make_fn(): - return aicrowd_gym.make('NetHackChallenge-v0', - observation_keys=("glyphs", - "chars", - "colors", - "specials", - "blstats", - "message", - "tty_chars", - "tty_colors", - "tty_cursor",)) \ No newline at end of file diff --git a/envs/nle_batched_env.py b/envs/nle_batched_env.py deleted file mode 100644 index 516b268e1c30b98268d182bd384976d6c96f7395..0000000000000000000000000000000000000000 --- a/envs/nle_batched_env.py +++ /dev/null @@ -1,73 +0,0 @@ -import numpy as np -from tqdm import trange -from collections.abc import Iterable -from envs.nethack_make_function import nethack_make_fn - - -class NetHackChallengeBatchedEnv: - def __init__(self, env_make_fn, num_envs=1): - """ - Creates multiple copies of the NetHackChallenge environment - """ - - self.num_envs = num_envs - self.envs = [env_make_fn() for _ in range(self.num_envs)] - - self.action_space = self.envs[0].action_space - self.observation_space = self.envs[0].observation_space - self.reward_range = self.envs[0].reward_range - - def step(self, actions): - """ - Applies each action to each env in the same order as self.envs - Actions should be iterable and have the same length as self.envs - Returns lists of obsevations, rewards, dones, infos - """ - assert isinstance( - actions, Iterable), f"actions with type {type(actions)} is not iterable" - assert len( - actions) == self.num_envs, f"actions has length {len(actions)} which different from num_envs" - - observations, rewards, dones, infos = [], [], [], [] - for env, a in zip(self.envs, actions): - observation, reward, done, info = env.step(a) - if done: - observation = env.reset() - observations.append(observation) - rewards.append(reward) - dones.append(done) - infos.append(info) - - return observations, rewards, dones, infos - - def reset(self): - """ - Resets all the environments in self.envs - """ - observations = [env.reset() for env in self.envs] - return observations - - def single_env_reset(self, index): - """ - Resets the env at the index location - """ - observation = self.envs[index].reset() - return observation - - def single_env_step(self, index, action): - """ - Resets the env at the index location - """ - observation, reward, done, info = self.envs[index].step(action) - return observation, reward, done, info - -if __name__ == '__main__': - num_envs = 4 - batched_env = NetHackChallengeBatchedEnv(env_make_fn=nethack_make_fn, num_envs=num_envs) - observations = batched_env.reset() - num_actions = batched_env.action_space.n - for _ in trange(10000000000000): - actions = np.random.randint(num_actions, size=num_envs) - observations, rewards, dones, infos = batched_env.step(actions) - for done_idx in np.where(dones)[0]: - observations[done_idx] = batched_env.single_env_reset(done_idx) diff --git a/submission_wrappers.py b/envs/wrappers.py similarity index 58% rename from submission_wrappers.py rename to envs/wrappers.py index 9f75d9de6baad97a888c7d462f6a7e6af6575625..bff728daacdf73bc8fd9700cff131dd687cce40f 100644 --- a/submission_wrappers.py +++ b/envs/wrappers.py @@ -1,12 +1,18 @@ +import aicrowd_gym +import nle from gym.wrappers import TimeLimit -from envs.nethack_make_function import nethack_make_fn + +def create_env(): + """This is the environment that will be assessed by AIcrowd.""" + return aicrowd_gym.make("NetHackChallenge-v0") + def addtimelimitwrapper_fn(): """ An example of how to add wrappers to the nethack_make_fn Should return a gym env which wraps the nethack gym env """ - env = nethack_make_fn() + env = create_env() env = TimeLimit(env, max_episode_steps=10_000_000) return env \ No newline at end of file diff --git a/local_evaluation.py b/local_evaluation.py index b8b625dc8e967446c6086d8aad1edf7c35063eb8..5da924a72f2da615824d021aa7055bdd008f9fa5 100644 --- a/local_evaluation.py +++ b/local_evaluation.py @@ -8,7 +8,7 @@ from submission_agent import SubmissionConfig, LocalEvaluationConfig from rollout import run_batched_rollout -from nethack_baselines.utils.batched_env import BactchedEnv +from nethack_baselines.utils.batched_env import BatchedEnv # Ideally you shouldn't need to change anything below @@ -23,7 +23,7 @@ def evaluate(): Agent = SubmissionConfig.Submision_Agent evaluation_env_fn = add_evaluation_wrappers_fn(submission_env_make_fn) - batched_env = BactchedEnv(env_make_fn=evaluation_env_fn, + batched_env = BatchedEnv(env_make_fn=evaluation_env_fn, num_envs=num_envs) num_envs = batched_env.num_envs diff --git a/rollout.py b/rollout.py index 89899aecdfb24a4fad309114b2dcd007bbb1a43d..586c4b22d29d70563d0bc8c34a00ec96ff58dd60 100644 --- a/rollout.py +++ b/rollout.py @@ -9,7 +9,7 @@ from tqdm import tqdm import numpy as np -from envs.batched_env import BactchedEnv +from envs.batched_env import BatchedEnv from submission_config import SubmissionConfig NUM_ASSESSMENTS = 512 @@ -46,8 +46,6 @@ def run_batched_rollout(batched_env, agent): returns[i] += r for done_idx in np.where(dones)[0]: - observations[done_idx] = batched_env.single_env_reset(done_idx) - if active_envs[done_idx]: # We were 'counting' this episode all_returns.append(returns[done_idx]) @@ -66,7 +64,7 @@ if __name__ == "__main__": NUM_PARALLEL_ENVIRONMENTS = SubmissionConfig.NUM_PARALLEL_ENVIRONMENTS Agent = SubmissionConfig.Submision_Agent - batched_env = BactchedEnv( + batched_env = BatchedEnv( env_make_fn=submission_env_make_fn, num_envs=NUM_PARALLEL_ENVIRONMENTS ) diff --git a/submission_config.py b/submission_config.py index df2916b6fd9a4319d34f5d7e3296f0b8df13ae08..5c28e9999cd55fb73717652dc03e10c4d65b3be8 100644 --- a/submission_config.py +++ b/submission_config.py @@ -2,7 +2,7 @@ from agents.random_batched_agent import RandomAgent from agents.torchbeast_agent import TorchBeastAgent # from agents.rllib_batched_agent import RLlibAgent -from submission_wrappers import addtimelimitwrapper_fn +from envs.wrappers import addtimelimitwrapper_fn ################################################ # Import your own agent code # @@ -28,7 +28,7 @@ class SubmissionConfig: ## Add a function that creates your nethack env ## Mainly this is to add wrappers - ## Add your wrappers to wrappers.py and change the name here + ## Add your wrappers to envs/wrappers.py and change the name here ## IMPORTANT: Don't "call" the function, only provide the name submission_env_make_fn = addtimelimitwrapper_fn