Commit 413643fd authored by nilabha's avatar nilabha

cleanup and changes to make compatible with tune

parent 4b39e2f0
from bayes_opt import BayesianOptimization, JSONLogger, Events
from libs.cell_graph_dispatcher import CellGraphDispatcher
from libs.cell_graph_validator import CellGraphValidator
import numpy as np
N = 15
seed = 42
np.random.seed(seed)
width = np.random.randint(20, 150, (N,))
height = np.random.randint(20, 150, (N,))
nr_train = np.random.randint(50, 200, (N,))
n_cities = np.random.randint(2, 35, (N,))
grid_distribution_of_cities = False
max_rails_between_cities = np.random.randint(2, 4, (N,))
max_rail_in_city = np.random.randint(3, 6, (N,))
malfunction_rate = np.random.randint(500, 4000, (N,))
#???
prop_malfunction = np.random.uniform(0.01, 0.01, (N,))
min_duration = np.random.randint(20, 80, (N,))
max_duration = np.random.randint(20, 80, (N,))
max_duration = np.maximum(min_duration, max_duration)
speed_ration_map = {1.: 0.25, # Fast passenger train
1. / 2.: 0.25, # Fast freight train
1. / 3.: 0.25, # Slow commuter train
1. / 4.: 0.25} # Slow freight train
#make half with square sizes + last one is 150x150x200 trains
width[N-1] = 150
height[N//2:] = width[N//2:]
nr_train[N-1] = 200
test = {
"width": width,
"height": height,
"trains": nr_train,
"seed": seed,
"cities": n_cities,
"rails_between_cities": max_rails_between_cities,
"rails_in_city": max_rail_in_city,
"malfunction_rate": malfunction_rate,
"prop_malfunction": prop_malfunction,
"min_prop": min_duration,
"max_prop": max_duration
}
#Run
def flatland_function(speed_coef, time_coef):
def get_dispatcher(env):
def get_sort_function(dispatcher: CellGraphDispatcher):
def sort(idx):
time = dispatcher.controllers[idx].dist_to_target[
dispatcher.graph._vertex_idx_from_point(env.agents[idx].initial_position), env.agents[
idx].initial_direction]
speed = env.agents[idx].speed_data['speed']
return speed * speed_coef + time * time_coef
return sort
return CellGraphDispatcher(env, sort_function=get_sort_function)
res = CellGraphValidator.multiple_tests(get_dispatcher, **test)
return res["finished"]
pbounds = {'speed_coef' : (-10000, 10000), 'time_coef': (-1, 1)}
optimizer = BayesianOptimization(
f=flatland_function,
pbounds=pbounds,
random_state=seed,
)
logger = JSONLogger(path="./opt_log.json")
optimizer.subscribe(Events.OPTMIZATION_STEP, logger)
optimizer.probe({'speed_coef' : -10000, 'time_coef': 1})
optimizer.probe({'speed_coef' : -10000, 'time_coef': -1})
optimizer.probe({'speed_coef' : +10000, 'time_coef': 1})
optimizer.maximize(init_points=10, n_iter=100)
print(optimizer.max)
\ No newline at end of file
import numpy as np
import time
# In Flatland you can use custom observation builders and predicitors
# Observation builders generate the observation needed by the controller
# Preditctors can be used to do short time prediction which can help in avoiding conflicts in the network
from flatland.envs.observations import GlobalObsForRailEnv, ObservationBuilder
# First of all we import the Flatland rail environment
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_env import RailEnvActions, RailAgentStatus
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
# We also include a renderer because we want to visualize what is going on in the environment
from flatland.utils.rendertools import RenderTool, AgentRenderVariant
from flatland.envs.malfunction_generators import malfunction_from_params
from libs.cell_graph_dispatcher import CellGraphDispatcher
start_time = time.time()
# width = 150 # With of map
# height = 150 # Height of map
# nr_trains = 200 # Number of trains that have an assigned task in the env
# cities_in_map = 35 # Number of cities where agents can start or end
# seed = 5 # Random seed
width = 50 # With of map
height = 50 # Height of map
nr_trains = 200 # Number of trains that have an assigned task in the env
cities_in_map = 35 # Number of cities where agents can start or end
seed = 5 # Random seed
# width = 150 # With of map
# height = 150 # Height of map
# nr_trains = 100 # Number of trains that have an assigned task in the env
# cities_in_map = 100 # Number of cities where agents can start or end
# seed = 14 # Random seed
grid_distribution_of_cities = False # Type of city distribution, if False cities are randomly placed
max_rails_between_cities = 2 # Max number of tracks allowed between cities. This is number of entry point to a city
max_rail_in_cities = 6 # Max number of parallel tracks within a city, representing a realistic trainstation
rail_generator = sparse_rail_generator(max_num_cities=cities_in_map,
seed=seed,
grid_mode=grid_distribution_of_cities,
max_rails_between_cities=max_rails_between_cities,
max_rails_in_city=max_rail_in_cities,
)
# The schedule generator can make very basic schedules with a start point, end point and a speed profile for each agent.
# The speed profiles can be adjusted directly as well as shown later on. We start by introducing a statistical
# distribution of speed profiles
# Different agent types (trains) with different speeds.
speed_ration_map = {1.: 0.25, # Fast passenger train
1. / 2.: 0.25, # Fast freight train
1. / 3.: 0.25, # Slow commuter train
1. / 4.: 0.25} # Slow freight train
# We can now initiate the schedule generator with the given speed profiles
schedule_generator = sparse_schedule_generator(speed_ration_map)
# We can furthermore pass stochastic data to the RailEnv constructor which will allow for stochastic malfunctions
# during an episode.
stochastic_data = {'malfunction_rate': 500, # Rate of malfunction occurence of single agent
'prop_malfunction': 0.01,
'min_duration': 20, # Minimal duration of malfunction
'max_duration': 80 # Max duration of malfunction
}
# Custom observation builder without predictor
class DummyObservationBuilder(ObservationBuilder):
"""
DummyObservationBuilder class which returns dummy observations
This is used in the evaluation service
"""
def __init__(self):
super().__init__()
def reset(self):
pass
def get_many(self, handles = None) -> bool:
return True
def get(self, handle: int = 0) -> bool:
return True
observation_builder = DummyObservationBuilder()
# Custom observation builder with predictor, uncomment line below if you want to try this one
# observation_builder = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
# Construct the enviornment with the given observation, generataors, predictors, and stochastic data
env = RailEnv(width=width,
height=height,
rail_generator=rail_generator,
schedule_generator=schedule_generator,
number_of_agents=nr_trains,
malfunction_generator_and_process_data=malfunction_from_params(stochastic_data), # Malfunction data generator
obs_builder_object=observation_builder,
remove_agents_at_target=True # Removes agents at the end of their journey to make space for others
)
env.reset()
# Initiate the renderer
env_renderer = RenderTool(env, gl="PILSVG",
agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX,
show_debug=False,
screen_height=1920, # Adjust these parameters to fit your resolution
screen_width=1080) # Adjust these parameters to fit your resolution
dispatcher = CellGraphDispatcher(env)
score = 0
# Run episode
frame_step = 0
step = 0
while True:
step += 1
action_dict = dispatcher.step(step)
# Environment step which returns the observations for all agents, their corresponding
# reward and whether their are done
next_obs, all_rewards, done, _ = env.step(action_dict)
env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
# env_renderer.render_env(show=True, show_observations=True, show_predictions=True)
# os.makedirs('./misc/Fames2/', exist_ok=True)
# env_renderer.gl.save_image('./misc/Fames2/flatland_frame_{:04d}.png'.format(step))
frame_step += 1
score += np.sum(list(all_rewards.values()))
#
# observations = next_obs.copy()
finished = np.sum([a.status==RailAgentStatus.DONE or a.status==RailAgentStatus.DONE_REMOVED for a in env.agents])
print('Episode: Steps {}\t Score = {}\t Finished = {}'.format(step, score, finished))
if done['__all__']:
break
finished = np.sum([a.status==RailAgentStatus.DONE or a.status==RailAgentStatus.DONE_REMOVED for a in env.agents])
print(f'Trains finished {finished}/{len(env.agents)} = {finished*100/len(env.agents):.2f}%')
print(f'Total time: {time.time()-start_time}s')
This diff is collapsed.
from libs.cell_graph_validator import CellGraphValidator
from libs.cell_graph_dispatcher import CellGraphDispatcher
import numpy as np
def no_sort_dispatcher(env):
return CellGraphDispatcher(env)
micro_test = {
"width": [50],
"height": [50],
"cities": [10],
"trains": [50],
"seed": 42
}
mini_test = {
"width": [50, 50, 100, 125],
"height": [50, 50, 100, 125],
"cities": [10]*4,
"trains": [100, 200, 100, 100],
"seed": 42
}
#small_test
N = 15
seed = 42
np.random.seed(seed)
width = np.random.randint(20, 150, (N,))
height = np.random.randint(20, 150, (N,))
width[N-1] = 150
height[N//2:] = width[N//2:]
nr_train = np.random.randint(50, 200, (N,))
nr_train[N-1] = 200
n_cities = np.random.randint(2, 35, (N,))
grid_distribution_of_cities = False
max_rails_between_cities = np.random.randint(2, 4, (N,))
max_rail_in_city = np.random.randint(3, 6, (N,))
malfunction_rate = np.random.randint(500, 4000, (N,))
prop_malfunction = np.random.uniform(0.01, 0.01, (N,))
min_duration = np.random.randint(20, 80, (N,))
max_duration = np.random.randint(20, 80, (N,))
max_duration = np.maximum(min_duration, max_duration)
small_test = {
"width": width,
"height": height,
"trains": nr_train,
"seed": seed,
"cities": n_cities,
"rails_between_cities": max_rails_between_cities,
"rails_in_city": max_rail_in_city,
"malfunction_rate": malfunction_rate,
"prop_malfunction": prop_malfunction,
"min_prop": min_duration,
"max_prop": max_duration
}
#Run
CellGraphValidator.multiple_tests(no_sort_dispatcher, **mini_test)
from unittest import TestCase
from flatland.envs.rail_env import RailEnv
from libs.cell_graph import CellGraph
from libs.cell_graph_locker import CellGraphLocker
SEED = 42
WIDTH = 10
HEIGHT = 10
class TestCellGraphLocker(TestCase):
def setUp(self):
self.env = RailEnv(width=WIDTH,
height=HEIGHT,
random_seed=SEED
)
self.env.reset()
self.graph = CellGraph(self.env)
self.locker = CellGraphLocker(self.graph)
def tearDown(self):
del self.locker
del self.graph
del self.env
def test_not_empty(self):
self.assertGreater(len(self.locker.data), 0)
def test_lock_single_position_by_another_agent(self):
self.locker.lock(0, 0, (0,2))
self.assertTrue(self.locker.is_locked(0, 1, (0, 2)))
self.assertTrue(self.locker.is_locked(0, 1, (1, 2)))
self.assertTrue(self.locker.is_locked(0, 1, (-1, 1)))
self.assertTrue(self.locker.is_locked(0, 1, (-1, 10)))
self.assertFalse(self.locker.is_locked(0, 1, (2, 3)))
self.assertFalse(self.locker.is_locked(0, 1, (3, 10)))
self.assertFalse(self.locker.is_locked(0, 1, (10, 20)))
self.assertFalse(self.locker.is_locked(0, 1, (-1, 0)))
def test_lock_single_position_by_agent_itself(self):
self.locker.lock(0, 0, (0,2))
self.assertFalse(self.locker.is_locked(0, 0, (0, 2)))
self.assertFalse(self.locker.is_locked(0, 0, (1, 2)))
self.assertFalse(self.locker.is_locked(0, 0, (-1, 1)))
self.assertFalse(self.locker.is_locked(0, 0, (-1, 10)))
self.assertFalse(self.locker.is_locked(0, 0, (2, 3)))
self.assertFalse(self.locker.is_locked(0, 0, (3, 10)))
self.assertFalse(self.locker.is_locked(0, 0, (10, 20)))
self.assertFalse(self.locker.is_locked(0, 0, (-1, 0)))
def test_lock_agent_after_agent(self):
self.locker.lock(0, 0, (0, 2))
self.assertTrue(self.locker.is_locked(0, 1, (0, 2)))
self.assertFalse(self.locker.is_locked(0, 1, (2, 5)))
self.locker.lock(0, 1, (2, 5))
self.assertTrue(self.locker.is_locked(0, 2, (0, 1)))
self.assertTrue(self.locker.is_locked(0, 2, (3, 4)))
self.assertTrue(self.locker.is_locked(0, 2, (2, 3)))
self.assertTrue(self.locker.is_locked(0, 2, (2, 10)))
self.assertTrue(self.locker.is_locked(0, 2, (-2, 10)))
self.assertFalse(self.locker.is_locked(0, 0, (5, 7)))
self.assertFalse(self.locker.is_locked(0, 0, (5, 10)))
self.assertFalse(self.locker.is_locked(0, 0, (10, 20)))
self.assertFalse(self.locker.is_locked(0, 0, (-1, 0)))
def test_lock_agent_after_agent_with_extending(self):
self.locker.lock(0, 0, (0, 2))
self.assertTrue(self.locker.is_locked(0, 1, (0, 2)))
self.assertFalse(self.locker.is_locked(0, 1, (5, 10)))
self.locker.lock(0, 1, (5, 10))
self.assertTrue(self.locker.is_locked(0, 2, (0, 1)))
self.assertFalse(self.locker.is_locked(0, 2, (3, 4)))
self.assertFalse(self.locker.is_locked(0, 2, (2, 3)))
self.assertTrue(self.locker.is_locked(0, 2, (2, 10)))
self.assertTrue(self.locker.is_locked(0, 2, (-2, 10)))
# #extend
# self.assertFalse(self.locker.is_locked(0, 1, (2, 10)))
# self.locker.lock(0, 1, (2, 10))
#
# self.assertTrue(self.locker.is_locked(0, 2, (0, 1)))
# self.assertTrue(self.locker.is_locked(0, 2, (3, 4)))
# self.assertTrue(self.locker.is_locked(0, 2, (2, 3)))
# self.assertTrue(self.locker.is_locked(0, 2, (2, 10)))
# self.assertTrue(self.locker.is_locked(0, 2, (-2, 10)))
#
# self.assertFalse(self.locker.is_locked(0, 0, (10, 17)))
# self.assertFalse(self.locker.is_locked(0, 0, (10, 11)))
# self.assertFalse(self.locker.is_locked(0, 0, (20, 30)))
# self.assertFalse(self.locker.is_locked(0, 0, (-1, 0)))
def test_lock_assert_situation(self):
self.locker.lock(15, 142, (252, 257))
self.assertTrue(self.locker.is_locked(15, 17, (253, 258)))
self.locker.reset()
self.locker.lock(15, 142, (253, 255))
self.assertTrue(self.locker.is_locked(15, 17, (253, 258)))
......@@ -55,7 +55,7 @@ import numpy as np
from flatland.envs.agent_utils import RailAgentStatus
import sys,os
sys.path.insert(0, os.getcwd() + '/envs/expert')
# sys.path.insert(0, os.getcwd() + '/envs/expert')
from libs.cell_graph_dispatcher import CellGraphDispatcher
def adam_optimizer(policy, config):
......@@ -302,7 +302,6 @@ if __name__ == "__main__":
resources = PPOTrainer.default_resource_request(_default_config).to_json()
imitation_train_fn(_default_config)
# tune.run(imitation_train_fn, resources_per_trial=resources, config=_default_config)
tune.run(imitation_train_fn, resources_per_trial=resources, config=_default_config)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment