diff --git a/agents/custom_agent.py b/agents/jessie_agent.py similarity index 60% rename from agents/custom_agent.py rename to agents/jessie_agent.py index c27936d8b84fc7c51227aa06a110746f9263cfaf..92b7d2b3a346882cb7a73bccb14bd51e711e36ca 100644 --- a/agents/custom_agent.py +++ b/agents/jessie_agent.py @@ -1,15 +1,18 @@ import numpy as np from agents.base import BatchedAgent +from nethack_baselines.jessie import JessieBot -class CustomAgent(BatchedAgent): +class JessieAgent(BatchedAgent): """A example agent... that simple acts randomly. Adapt to your needs!""" def __init__(self, num_envs, num_actions): """Set up and load you model here""" super().__init__(num_envs, num_actions) - self.seeded_state = np.random.RandomState(42) + self.bots = [] + for env in self.envs: + self.bots.append(JessieBot(env)) def batched_step(self, observations, rewards, dones, infos): """ @@ -18,5 +21,9 @@ class CustomAgent(BatchedAgent): Each argument is a list of the respective gym output. Returns an iterable of actions. """ - actions = self.seeded_state.randint(self.num_actions, size=self.num_envs) + actions = [] + + for index in [0..self.num_envs]: + actions.append(self.bots[index].step(observations[index], rewards[index], dones[index], infos[index])) + return actions diff --git a/aicrowd.json b/aicrowd.json index cae19d5534e45c1c9c1c41b7e36020685f5dc007..252e707a3762471752dc8677bfb53e0b8838c570 100644 --- a/aicrowd.json +++ b/aicrowd.json @@ -1,9 +1,9 @@ { "challenge_id": "neurips-2021-the-nethack-challenge", "authors": [ - "aicrowd-bot" + "clint_herron" ], - "description": "(optional) description about your awesome agent", + "description": "Jessie 'The Bottie' Venturer 2.0", "gpu": true } diff --git a/nethack_baselines/jessie/JessieBot.py b/nethack_baselines/jessie/JessieBot.py new file mode 100644 index 0000000000000000000000000000000000000000..6722bfa623cfe1bfe38a3c8847416a94192117e9 --- /dev/null +++ b/nethack_baselines/jessie/JessieBot.py @@ -0,0 +1,78 @@ +import aicrowd_gym +import nle +from nle import nethack as nh +import os +import random +from nle_consts import * +from JessieUtils import * + +class JessieBot: + def __init__(self, env): + self.env = env + self.step_count = 0 + self.message_history = [''] + self.last_observation = None + self.blstats = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] + self.dungeon = {} + + def pre_step(self, observation, reward, done, info): + + # Grab any new dungeon map updates and update our dungeon map + self.merge_dungeon + + # Grab any new input messages and add them to our history + message = decode_str(observation[OBS_MESSAGE]) + + if (len(message) > 0 and self.message_history[-1] != message): + self.message_history.append(message) + + # Are we waiting for space? + if (observation[OBS_MISC][MISC_XWAITFORSPACE] == 1): + print(message) + action = nh.TextCharacters.SPACE + + def step(self, observation, reward, done, info): + action = self.pre_step(observation, reward, done, info) + + actions = [ + nh.CompassDirection.N, + nh.CompassDirection.E, + nh.CompassDirection.S, + nh.CompassDirection.W, + nh.CompassDirection.NE, + nh.CompassDirection.SE, + nh.CompassDirection.NW, + nh.CompassDirection.SW, + ] + + # Grab the latest information + self.step_count += 1 + + # Grab the top-line message (if any) + + if (action != None): + # Are we writing input to a line? + if (observation[OBS_MISC][MISC_GETLINE] == 1): + action = nh.MiscAction.MORE + # Are we answering a yes/no question? + elif (observation[OBS_MISC][MISC_YN_QUESTION] == 1): + action = ord('y') + # None of the above -- get our regular action! + else: + # Update our dungeon map + self.blstats = observation[OBS_BLSTATS] + self.updateLocalMap() + + action = random.choice(actions) + + return self.actionToIndex(action) + + def letterToActionIndex(self, raw_action): + return self.actionToIndex(ord(raw_action)) + + def actionToIndex(self, action): + return self.env._actions.index(action) + + def finalDump(self): + print("Message history: ") + print(self.message_history) diff --git a/nethack_baselines/jessie/JessieBot_Dungeon.py b/nethack_baselines/jessie/JessieBot_Dungeon.py new file mode 100644 index 0000000000000000000000000000000000000000..d0edc50fa5f2737d12b3972448d9fbb1609bde77 --- /dev/null +++ b/nethack_baselines/jessie/JessieBot_Dungeon.py @@ -0,0 +1,38 @@ +import aicrowd_gym +import nle +from nle import nethack as nh +import os +import random +from nle_consts import * +from JessieUtils import * + +class DungeonTile: + def __init__(self, branch, level, x, y): + self.branch = branch + self.level = level + self.x = x + self.y = y + +class Dungeon: + def __init__(self): + self.dungeons = {} + + def observe(self, obs): + # Get our current location as branch,level + branch = obs[OBS_BLSTATS][BL_DUNGEON_NUMBER] + level = obs[OBS_BLSTATS][BL_LEVEL_NUMBER] + + # If our current branch doesn't exist yet, then create it + if not branch in self.dungeons: + self.dungeons[branch] = {} + + if not level in self.dungeons[branch]: + self.dungeons[branch][level] = obs[OBS_GLYPHS] + + + + obs[OBS_GLYPHS] + +def observe_dungeon(self, obs): + self.dungeon + diff --git a/nethack_baselines/jessie/JessieBot_Tome.py b/nethack_baselines/jessie/JessieBot_Tome.py new file mode 100644 index 0000000000000000000000000000000000000000..cd95255ce43024e435e9afaf4cf0adbcdcab2379 --- /dev/null +++ b/nethack_baselines/jessie/JessieBot_Tome.py @@ -0,0 +1,5 @@ + +class Tome: + def __init__(self): + self.potions = [] + \ No newline at end of file diff --git a/nethack_baselines/jessie/JessieUtils.py b/nethack_baselines/jessie/JessieUtils.py new file mode 100644 index 0000000000000000000000000000000000000000..bed087b43e24ed3421a090e812d47e1164b50a34 --- /dev/null +++ b/nethack_baselines/jessie/JessieUtils.py @@ -0,0 +1,9 @@ + + +def simple_chr(i): + if i >= 32 and i <= 126: + return chr(i) + return '' + +def decode_str(arr): + return ''.join(simple_chr(i) for i in arr).strip() \ No newline at end of file diff --git a/nethack_baselines/jessie/nle_consts.py b/nethack_baselines/jessie/nle_consts.py new file mode 100644 index 0000000000000000000000000000000000000000..f389e452bece86eafe1764f8fb16ba1648baf646 --- /dev/null +++ b/nethack_baselines/jessie/nle_consts.py @@ -0,0 +1,47 @@ +OBS_BLSTATS = 'blstats' + +OBS_BLSTATS = 'blstats' # [25] -2147483648 2147483647 int64 Array of player stats, generally found on the bottom line of the screen. +OBS_CHARS = 'chars' # [21, 79] 0 255 uint8 Characters used in the map view of the screen (see also glyphs and tty_chars) +OBS_COLORS = 'colors' # [21, 79] 0 15 uint8 Colors of the characters on the screen. +OBS_GLYPHS = 'glyphs' # [21, 79] 0 5976 int16 Glyphs of each space on the map. Much more detailed than the simple chars array. This is what you want to be using. +OBS_INV_GLYPHS = 'inv_glyphs' # [55] 0 5976 int16 Glyphs of each item in your inventory. +OBS_INV_LETTERS = 'inv_letters' # [55] 0 127 uint8 Letters assigned to each item in your inventory, used for accessing those items (such as what you want to throw / drop / equip / quaff / eat / etc) +OBS_INV_OCLASSES = 'inv_oclasses' # [55] 0 18 uint8 The category that each inventory item belongs to. +OBS_INV_STRS = 'inv_strs' # [55, 80] 0 255 uint8 String description of each item in your inventory. +OBS_MESSAGE = 'message' # [256] 0 255 uint8 Text of the top-line message. +OBS_MISC = 'misc' # [3] -2147483648 2147483647 int32 Special array that indicates the state of the user interface, corresponding to three boolean values: Am I in a yes/no question? Am I writing the input to a line? Am I waiting for a space? See below for more info. +OBS_SPECIALS = 'specials' # [21, 79] 0 255 uint8 Array that holds special information for each glyph on the map, indicating some extended information. See below for more info. +OBS_TTY_CHARS = 'tty_chars' # [24, 80] 0 255 uint8 Similar to chars, these are the characters used in the TTY map view of the screen (see also glyphs and chars). Again, there is overlap here with glyphs and this array provides less information. Better to use glyphs. +OBS_TTY_COLORS = 'tty_colors' # [24, 80] 0 31 int8 Similar to colors, these are the colors used by the tty_chars display. +OBS_TTY_CURSOR = 'tty_cursor' # [2] 0 255 uint8 X/Y coordinates of the cursor in the terminal interface. + +BL_XCOORD = 0 # Current player X coordinate on the grid. 1 <= ux <= cols +BL_YCOORD = 1 # Current player Y coordinate on the grid. 0 <= uy < rows +BL_STR_PCT = 2 +BL_STR = 3 # strength Character Str attribute, relates to damage dealt, carrying capacity, etc. +BL_DEX = 4 # dexterity Character Dex attribute, relates to to-hit percentage, throwing accuracy, etc. +BL_CON = 5 # constitution Character Con attribute, relates to max HP, healing rate, etc. +BL_INT = 6 # intelligence Character Int attribute, relates to some classes casting spells, learning spells, etc. +BL_WIS = 7 # wisdom Character Wis attribute, relates to some classes casting spells, recharging mana, etc. +BL_CHA = 8 # charisma Character Cha attribute, primarily relates to the cost of items in shops, but affects some other encounters as well. +BL_SCORE = 9 # score The player's current score as judged by Nethack (not necessarily the reward function of NLE). +BL_HP = 10 # hitpoints Current hitpoints +BL_MAX_HP = 11 # max_hitpoints Maximum hitpoints +BL_DEPTH = 12 # depth ?Max depth achieved? Or current dungeon level? +BL_GOLD = 13 # gold How much gold is in your main inventory (not counting anything held in bags) +BL_ENERGY = 14 # energy Amount of energy / mana available for casting spells +BL_MAX_ENERGY = 15 # max_energy Maximum amount of energy / mana available for casting spells +BL_AC = 16 # armor_class The player's resistance to damage. Lower is better. Generally improved by wearing equipment, though intrinsic AC can be gained via divine Protection. +BL_MON_LVL = 17 # monster_level ?The level of your current monster shape (if polymorphed different from your starting race)? +BL_XP_LVL = 18 # experience_level Your current experience level (from 1-30) +BL_XP = 19 # experience_points The amount of experience you have. Generally increased by fighting monsters, though some non-fighting conducts will need to use other means. +BL_TIME = 20 # time The current turn counter. +BL_HUNGER = 21 # hunger state Player's current state of hunger. 0 = Satiated, 1 = Normal, 2 = Hungry, 3 = Weak, 4 = Fainting +BL_ENCUMBERED = 22 # carrying_capacity ?Current state of encumbrance? +BL_DUNGEON_NUMBER = 23 # dungeon number ?Which dungeon branch you are currently in (main, gnomish mines, Sokoban, etc)? +BL_LEVEL_NUMBER = 24 # level number + +MISC_YN_QUESTION = 0 # Am I in a yes or no question? (Like after "pray") +MISC_GETLINE = 1 # Am I writing the input to a line? (Like making a wish) +MISC_XWAITFORSPACE = 2 # Am I waiting for a space? (Like when -More- is shown, or after "inventory") + diff --git a/nethack_baselines/jessie/test.py b/nethack_baselines/jessie/test.py new file mode 100644 index 0000000000000000000000000000000000000000..fb9cd3edf2382a785a406def2b0d338d256db1e5 --- /dev/null +++ b/nethack_baselines/jessie/test.py @@ -0,0 +1,44 @@ +import aicrowd_gym +import nle +from nle import nethack as nh +import os +import random +from nle_consts import * +import JessieBot +from JessieUtils import * + +def main(): + """ + This function will be called for training phase. + """ + + # This allows us to limit the features of the environment + # that we don't want participants to use during the submission + env = aicrowd_gym.make("NetHackChallenge-v0", savedir='') + + episode_count = 0 + max_score = 0 + + while episode_count < 20: + obs = env.reset() + reward = 0 + done = False + info = [] + + # Create my bot + jessie = JessieBot.JessieBot(env) + step_count = 0 + + while not done: + step_count += 1 + + action = jessie.step(obs, reward, done, info) + obs, reward, done, info = env.step(action) + max_score = max(max_score, obs[OBS_BLSTATS][BL_SCORE]) + + episode_count += 1 + + print(f"Max score was {max_score}!") + +if __name__ == "__main__": + main()