diff --git a/envs/batched_env.py b/envs/batched_env.py index 796e29641a7299145993375306f8cd1331bd71b1..2543d6bc30890b01822d050779b9a87cef99d81c 100644 --- a/envs/batched_env.py +++ b/envs/batched_env.py @@ -42,13 +42,6 @@ class BatchedEnv: observation = [env.reset() for env in self.envs] return observation - def single_env_reset(self, index): - """ - Resets the env at the index location - """ - observation = self.envs[index].reset() - return observation - if __name__ == '__main__': diff --git a/local_evaluation.py b/local_evaluation.py deleted file mode 100644 index 5da924a72f2da615824d021aa7055bdd008f9fa5..0000000000000000000000000000000000000000 --- a/local_evaluation.py +++ /dev/null @@ -1,38 +0,0 @@ -## This file is intended to emulate the evaluation on AIcrowd - -# IMPORTANT - Differences to expect -# * All the environment's functions are not available -# * The run might be slower than your local run -# * Resources might vary from your local machine - -from submission_agent import SubmissionConfig, LocalEvaluationConfig - -from rollout import run_batched_rollout -from nethack_baselines.utils.batched_env import BatchedEnv - - -# Ideally you shouldn't need to change anything below -def add_evaluation_wrappers_fn(env_make_fn): - max_episodes = LocalEvaluationConfig.LOCAL_EVALUATION_NUM_EPISODES - # TOOD: use LOCAL_EVALUATION_NUM_EPISODES for limiting episodes - return env_make_fn - -def evaluate(): - submission_env_make_fn = SubmissionConfig.submission_env_make_fn - num_envs = SubmissionConfig.NUM_PARALLEL_ENVIRONMENTS - Agent = SubmissionConfig.Submision_Agent - - evaluation_env_fn = add_evaluation_wrappers_fn(submission_env_make_fn) - batched_env = BatchedEnv(env_make_fn=evaluation_env_fn, - num_envs=num_envs) - - num_envs = batched_env.num_envs - num_actions = batched_env.num_actions - - agent = Agent(num_envs, num_actions) - - run_batched_rollout(batched_env, agent) - - -if __name__ == '__main__': - evaluate() diff --git a/rollout.py b/rollout.py index 586c4b22d29d70563d0bc8c34a00ec96ff58dd60..aac7720dd9aa636e5d075cc79f3f983bde895d6b 100644 --- a/rollout.py +++ b/rollout.py @@ -10,11 +10,12 @@ from tqdm import tqdm import numpy as np from envs.batched_env import BatchedEnv +from envs.wrappers import create_env from submission_config import SubmissionConfig -NUM_ASSESSMENTS = 512 -def run_batched_rollout(batched_env, agent): + +def run_batched_rollout(num_episodes, batched_env, agent): """ This function will generate a series of rollouts in a batched manner. """ @@ -28,16 +29,16 @@ def run_batched_rollout(batched_env, agent): infos = [{} for _ in range(num_envs)] # We mark at the start of each episode if we are 'counting it' - active_envs = [i < NUM_ASSESSMENTS for i in range(num_envs)] - num_remaining = NUM_ASSESSMENTS - sum(active_envs) + active_envs = [i < num_episodes for i in range(num_envs)] + num_remaining = num_episodes - sum(active_envs) episode_count = 0 - pbar = tqdm(total=NUM_ASSESSMENTS) + pbar = tqdm(total=num_episodes) all_returns = [] returns = [0.0 for _ in range(num_envs)] # The evaluator will automatically stop after the episodes based on the development/test phase - while episode_count < NUM_ASSESSMENTS: + while episode_count < num_episodes: actions = agent.batched_step(observations, rewards, dones, infos) observations, rewards, dones, infos = batched_env.batch_step(actions) @@ -57,20 +58,19 @@ def run_batched_rollout(batched_env, agent): pbar.update(1) returns[done_idx] = 0.0 + pbar.close() return all_returns if __name__ == "__main__": - submission_env_make_fn = SubmissionConfig.submission_env_make_fn - NUM_PARALLEL_ENVIRONMENTS = SubmissionConfig.NUM_PARALLEL_ENVIRONMENTS - Agent = SubmissionConfig.Submision_Agent + # AIcrowd will cut the assessment early duing the dev phase + NUM_ASSESSMENTS = 4096 - batched_env = BatchedEnv( - env_make_fn=submission_env_make_fn, num_envs=NUM_PARALLEL_ENVIRONMENTS - ) + env_make_fn = SubmissionConfig.MAKE_ENV_FN + num_envs = SubmissionConfig.NUM_ENVIRONMENTS + Agent = SubmissionConfig.AGENT - num_envs = batched_env.num_envs - num_actions = batched_env.num_actions - agent = Agent(num_envs, num_actions) + batched_env = BatchedEnv(env_make_fn=env_make_fn, num_envs=num_envs) + agent = Agent(num_envs, batched_env.num_actions) - run_batched_rollout(batched_env, agent) + run_batched_rollout(NUM_ASSESSMENTS, batched_env, agent) diff --git a/submission_config.py b/submission_config.py index 5c28e9999cd55fb73717652dc03e10c4d65b3be8..9968e9996021f3040b04f47d7ec4b88dcf08b88d 100644 --- a/submission_config.py +++ b/submission_config.py @@ -15,26 +15,26 @@ from envs.wrappers import addtimelimitwrapper_fn class SubmissionConfig: ## Add your own agent class - Submision_Agent = TorchBeastAgent - # Submision_Agent = RLlibAgent - # Submision_Agent = RandomAgent + AGENT = TorchBeastAgent + # AGENT = RLlibAgent + # AGENT = RandomAgent - ## Change the NUM_PARALLEL_ENVIRONMENTS as you need + ## Change the NUM_ENVIRONMENTS as you need ## for example reduce it if your GPU doesn't fit ## Increasing above 32 is not advisable for the Nethack Challenge 2021 - NUM_PARALLEL_ENVIRONMENTS = 32 + NUM_ENVIRONMENTS = 32 ## Add a function that creates your nethack env ## Mainly this is to add wrappers ## Add your wrappers to envs/wrappers.py and change the name here ## IMPORTANT: Don't "call" the function, only provide the name - submission_env_make_fn = addtimelimitwrapper_fn + MAKE_ENV_FN = addtimelimitwrapper_fn -class LocalEvaluationConfig: +class TestEvaluationConfig: # Change this to locally check a different number of rollouts # The AIcrowd submission evaluator will not use this # It is only for your local evaluation - LOCAL_EVALUATION_NUM_EPISODES = 50 + NUM_EPISODES = 64 diff --git a/test_submission.py b/test_submission.py new file mode 100644 index 0000000000000000000000000000000000000000..7ab3495f27719d55fff6097f60df95ab429b1e87 --- /dev/null +++ b/test_submission.py @@ -0,0 +1,33 @@ +## This file is intended to emulate the evaluation on AIcrowd + +# IMPORTANT - Differences to expect +# * All the environment's functions are not available +# * The run might be slower than your local run +# * Resources might vary from your local machine + +import numpy as np + +from agents.batched_agent import BatchedAgent +from submission_config import SubmissionConfig, TestEvaluationConfig + +from rollout import run_batched_rollout +from envs.batched_env import BatchedEnv + + +def evaluate(): + env_make_fn = SubmissionConfig.MAKE_ENV_FN + num_envs = SubmissionConfig.NUM_ENVIRONMENTS + Agent = SubmissionConfig.AGENT + + num_episodes = TestEvaluationConfig.NUM_EPISODES + + batched_env = BatchedEnv(env_make_fn=env_make_fn, num_envs=num_envs) + + agent = Agent(num_envs, batched_env.num_actions) + + scores = run_batched_rollout(num_episodes, batched_env, agent) + print(f"Median Score: {np.median(scores)}, Mean Score: {np.mean(scores)}") + + +if __name__ == "__main__": + evaluate()