Commit 4102e91c authored by nilabha's avatar nilabha
Browse files

Added pure imitation learning in rllib

parent 5349396b
from bayes_opt import BayesianOptimization, JSONLogger, Events
from libs.cell_graph_dispatcher import CellGraphDispatcher
from libs.cell_graph_validator import CellGraphValidator
import numpy as np
N = 15
seed = 42
np.random.seed(seed)
width = np.random.randint(20, 150, (N,))
height = np.random.randint(20, 150, (N,))
nr_train = np.random.randint(50, 200, (N,))
n_cities = np.random.randint(2, 35, (N,))
grid_distribution_of_cities = False
max_rails_between_cities = np.random.randint(2, 4, (N,))
max_rail_in_city = np.random.randint(3, 6, (N,))
malfunction_rate = np.random.randint(500, 4000, (N,))
#???
prop_malfunction = np.random.uniform(0.01, 0.01, (N,))
min_duration = np.random.randint(20, 80, (N,))
max_duration = np.random.randint(20, 80, (N,))
max_duration = np.maximum(min_duration, max_duration)
speed_ration_map = {1.: 0.25, # Fast passenger train
1. / 2.: 0.25, # Fast freight train
1. / 3.: 0.25, # Slow commuter train
1. / 4.: 0.25} # Slow freight train
#make half with square sizes + last one is 150x150x200 trains
width[N-1] = 150
height[N//2:] = width[N//2:]
nr_train[N-1] = 200
test = {
"width": width,
"height": height,
"trains": nr_train,
"seed": seed,
"cities": n_cities,
"rails_between_cities": max_rails_between_cities,
"rails_in_city": max_rail_in_city,
"malfunction_rate": malfunction_rate,
"prop_malfunction": prop_malfunction,
"min_prop": min_duration,
"max_prop": max_duration
}
#Run
def flatland_function(speed_coef, time_coef):
def get_dispatcher(env):
def get_sort_function(dispatcher: CellGraphDispatcher):
def sort(idx):
time = dispatcher.controllers[idx].dist_to_target[
dispatcher.graph._vertex_idx_from_point(env.agents[idx].initial_position), env.agents[
idx].initial_direction]
speed = env.agents[idx].speed_data['speed']
return speed * speed_coef + time * time_coef
return sort
return CellGraphDispatcher(env, sort_function=get_sort_function)
res = CellGraphValidator.multiple_tests(get_dispatcher, **test)
return res["finished"]
pbounds = {'speed_coef' : (-10000, 10000), 'time_coef': (-1, 1)}
optimizer = BayesianOptimization(
f=flatland_function,
pbounds=pbounds,
random_state=seed,
)
logger = JSONLogger(path="./opt_log.json")
optimizer.subscribe(Events.OPTMIZATION_STEP, logger)
optimizer.probe({'speed_coef' : -10000, 'time_coef': 1})
optimizer.probe({'speed_coef' : -10000, 'time_coef': -1})
optimizer.probe({'speed_coef' : +10000, 'time_coef': 1})
optimizer.maximize(init_points=10, n_iter=100)
print(optimizer.max)
\ No newline at end of file
import numpy as np
import time
# In Flatland you can use custom observation builders and predicitors
# Observation builders generate the observation needed by the controller
# Preditctors can be used to do short time prediction which can help in avoiding conflicts in the network
from flatland.envs.observations import GlobalObsForRailEnv, ObservationBuilder
# First of all we import the Flatland rail environment
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_env import RailEnvActions, RailAgentStatus
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
# We also include a renderer because we want to visualize what is going on in the environment
from flatland.utils.rendertools import RenderTool, AgentRenderVariant
from flatland.envs.malfunction_generators import malfunction_from_params
from libs.cell_graph_dispatcher import CellGraphDispatcher
start_time = time.time()
# width = 150 # With of map
# height = 150 # Height of map
# nr_trains = 200 # Number of trains that have an assigned task in the env
# cities_in_map = 35 # Number of cities where agents can start or end
# seed = 5 # Random seed
width = 50 # With of map
height = 50 # Height of map
nr_trains = 200 # Number of trains that have an assigned task in the env
cities_in_map = 35 # Number of cities where agents can start or end
seed = 5 # Random seed
# width = 150 # With of map
# height = 150 # Height of map
# nr_trains = 100 # Number of trains that have an assigned task in the env
# cities_in_map = 100 # Number of cities where agents can start or end
# seed = 14 # Random seed
grid_distribution_of_cities = False # Type of city distribution, if False cities are randomly placed
max_rails_between_cities = 2 # Max number of tracks allowed between cities. This is number of entry point to a city
max_rail_in_cities = 6 # Max number of parallel tracks within a city, representing a realistic trainstation
rail_generator = sparse_rail_generator(max_num_cities=cities_in_map,
seed=seed,
grid_mode=grid_distribution_of_cities,
max_rails_between_cities=max_rails_between_cities,
max_rails_in_city=max_rail_in_cities,
)
# The schedule generator can make very basic schedules with a start point, end point and a speed profile for each agent.
# The speed profiles can be adjusted directly as well as shown later on. We start by introducing a statistical
# distribution of speed profiles
# Different agent types (trains) with different speeds.
speed_ration_map = {1.: 0.25, # Fast passenger train
1. / 2.: 0.25, # Fast freight train
1. / 3.: 0.25, # Slow commuter train
1. / 4.: 0.25} # Slow freight train
# We can now initiate the schedule generator with the given speed profiles
schedule_generator = sparse_schedule_generator(speed_ration_map)
# We can furthermore pass stochastic data to the RailEnv constructor which will allow for stochastic malfunctions
# during an episode.
stochastic_data = {'malfunction_rate': 500, # Rate of malfunction occurence of single agent
'prop_malfunction': 0.01,
'min_duration': 20, # Minimal duration of malfunction
'max_duration': 80 # Max duration of malfunction
}
# Custom observation builder without predictor
class DummyObservationBuilder(ObservationBuilder):
"""
DummyObservationBuilder class which returns dummy observations
This is used in the evaluation service
"""
def __init__(self):
super().__init__()
def reset(self):
pass
def get_many(self, handles = None) -> bool:
return True
def get(self, handle: int = 0) -> bool:
return True
observation_builder = DummyObservationBuilder()
# Custom observation builder with predictor, uncomment line below if you want to try this one
# observation_builder = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
# Construct the enviornment with the given observation, generataors, predictors, and stochastic data
env = RailEnv(width=width,
height=height,
rail_generator=rail_generator,
schedule_generator=schedule_generator,
number_of_agents=nr_trains,
malfunction_generator_and_process_data=malfunction_from_params(stochastic_data), # Malfunction data generator
obs_builder_object=observation_builder,
remove_agents_at_target=True # Removes agents at the end of their journey to make space for others
)
env.reset()
# Initiate the renderer
env_renderer = RenderTool(env, gl="PILSVG",
agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX,
show_debug=False,
screen_height=1920, # Adjust these parameters to fit your resolution
screen_width=1080) # Adjust these parameters to fit your resolution
dispatcher = CellGraphDispatcher(env)
score = 0
# Run episode
frame_step = 0
step = 0
while True:
step += 1
action_dict = dispatcher.step(step)
# Environment step which returns the observations for all agents, their corresponding
# reward and whether their are done
next_obs, all_rewards, done, _ = env.step(action_dict)
env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
# env_renderer.render_env(show=True, show_observations=True, show_predictions=True)
# os.makedirs('./misc/Fames2/', exist_ok=True)
# env_renderer.gl.save_image('./misc/Fames2/flatland_frame_{:04d}.png'.format(step))
frame_step += 1
score += np.sum(list(all_rewards.values()))
#
# observations = next_obs.copy()
finished = np.sum([a.status==RailAgentStatus.DONE or a.status==RailAgentStatus.DONE_REMOVED for a in env.agents])
print('Episode: Steps {}\t Score = {}\t Finished = {}'.format(step, score, finished))
if done['__all__']:
break
finished = np.sum([a.status==RailAgentStatus.DONE or a.status==RailAgentStatus.DONE_REMOVED for a in env.agents])
print(f'Trains finished {finished}/{len(env.agents)} = {finished*100/len(env.agents):.2f}%')
print(f'Total time: {time.time()-start_time}s')
import numpy as np
import time
# In Flatland you can use custom observation builders and predicitors
# Observation builders generate the observation needed by the controller
# Preditctors can be used to do short time prediction which can help in avoiding conflicts in the network
from flatland.envs.observations import GlobalObsForRailEnv
# First of all we import the Flatland rail environment
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_env import RailEnvActions, RailAgentStatus
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
# We also include a renderer because we want to visualize what is going on in the environment
from flatland.utils.rendertools import RenderTool, AgentRenderVariant
from flatland.envs.malfunction_generators import malfunction_from_params
from libs.graph import BuildGraphFromEnvironment, GraphPathsLocker
from libs.graph_agent import GraphAgent, AgentsList
import os
width = 40 # With of map
height = 40 # Height of map
nr_trains = 8 # Number of trains that have an assigned task in the env
cities_in_map = 5 # Number of cities where agents can start or end
seed = 14 # Random seed
width = 150 # With of map
height = 150 # Height of map
nr_trains = 100 # Number of trains that have an assigned task in the env
cities_in_map = 100 # Number of cities where agents can start or end
seed = 14 # Random seed
# width = 26 # With of map
# height = 26 # Height of map
# nr_trains = 1 # Number of trains that have an assigned task in the env
# cities_in_map = 2 # Number of cities where agents can start or end
# seed = 14 # Random seed
# width = 40 # With of map
# height = 40 # Height of map
# nr_trains = 5 # Number of trains that have an assigned task in the env
# cities_in_map = 5 # Number of cities where agents can start or end
# seed = 14 # Random seed
# width = 30 # With of map
# height = 30 # Height of map
# nr_trains = 3 # Number of trains that have an assigned task in the env
# cities_in_map = 100 # Number of cities where agents can start or end
# seed = 14 # Random seed
width = 80 # With of map
height = 80 # Height of map
nr_trains = 50 # Number of trains that have an assigned task in the env
cities_in_map = 100 # Number of cities where agents can start or end
seed = 14 # Random seed
grid_distribution_of_cities = False # Type of city distribution, if False cities are randomly placed
max_rails_between_cities = 2 # Max number of tracks allowed between cities. This is number of entry point to a city
max_rail_in_cities = 6 # Max number of parallel tracks within a city, representing a realistic trainstation
rail_generator = sparse_rail_generator(max_num_cities=cities_in_map,
seed=seed,
grid_mode=grid_distribution_of_cities,
max_rails_between_cities=max_rails_between_cities,
max_rails_in_city=max_rail_in_cities,
)
# The schedule generator can make very basic schedules with a start point, end point and a speed profile for each agent.
# The speed profiles can be adjusted directly as well as shown later on. We start by introducing a statistical
# distribution of speed profiles
# Different agent types (trains) with different speeds.
speed_ration_map = {1.: 0.25, # Fast passenger train
1. / 2.: 0.25, # Fast freight train
1. / 3.: 0.25, # Slow commuter train
1. / 4.: 0.25} # Slow freight train
# We can now initiate the schedule generator with the given speed profiles
schedule_generator = sparse_schedule_generator(speed_ration_map)
# We can furthermore pass stochastic data to the RailEnv constructor which will allow for stochastic malfunctions
# during an episode.
stochastic_data = {'malfunction_rate': 100, # Rate of malfunction occurence of single agent
'prop_malfunction': 0.01,
'min_duration': 15, # Minimal duration of malfunction
'max_duration': 50 # Max duration of malfunction
}
# Custom observation builder without predictor
observation_builder = GlobalObsForRailEnv()
# Custom observation builder with predictor, uncomment line below if you want to try this one
# observation_builder = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
# Construct the enviornment with the given observation, generataors, predictors, and stochastic data
env = RailEnv(width=width,
height=height,
rail_generator=rail_generator,
schedule_generator=schedule_generator,
number_of_agents=nr_trains,
malfunction_generator_and_process_data=malfunction_from_params(stochastic_data), # Malfunction data generator
obs_builder_object=observation_builder,
remove_agents_at_target=True # Removes agents at the end of their journey to make space for others
)
env.reset()
# Initiate the renderer
env_renderer = RenderTool(env, gl="PILSVG",
agent_render_variant=AgentRenderVariant.AGENT_SHOWS_OPTIONS_AND_BOX,
show_debug=False,
screen_height=1920, # Adjust these parameters to fit your resolution
screen_width=1080) # Adjust these parameters to fit your resolution
# The first thing we notice is that some agents don't have feasible paths to their target.
# We first look at the map we have created
# nv_renderer.render_env(show=True)
timev = time.time()
graph = BuildGraphFromEnvironment(env)
locker = GraphPathsLocker(env.height, env.width)
controllers = [GraphAgent(graph.vs, graph.es, graph.rev_es, graph.calc_distances(agent.target), agent.initial_position, agent.direction, agent.target, locker, env=env, agent_id=i) for i, agent in enumerate(env.agents)]
alist = AgentsList(controllers, env.agents, max(5, int(round(0.1*(env.width+env.height)/2))))
print("Time for graph and agents:", time.time()-timev)
# We start by looking at the information of each agent
# We can see the task assigned to the agent by looking at
print("\n Agents in the environment have to solve the following tasks: \n")
for agent_idx, agent in enumerate(env.agents):
print(
"The agent with index {} has the task to go from its initial position {}, facing in the direction {} to its target at {}.".format(
agent_idx, agent.initial_position, agent.direction, agent.target))
# The agent will always have a status indicating if it is currently present in the environment or done or active
# For example we see that agent with index 0 is currently not active
print("\n Their current statuses are:")
print("============================")
for agent_idx, agent in enumerate(env.agents):
print("Agent {} status is: {} with its current position being {}".format(agent_idx, str(agent.status),
str(agent.position)))
# The agent needs to take any action [1,2,3] except do_nothing or stop to enter the level
# If the starting cell is free they will enter the level
# If multiple agents want to enter the same cell at the same time the lower index agent will enter first.
# Let's check if there are any agents with the same start location
agents_with_same_start = set()
print("\n The following agents have the same initial position:")
print("=====================================================")
for agent_idx, agent in enumerate(env.agents):
for agent_2_idx, agent2 in enumerate(env.agents):
if agent_idx != agent_2_idx and agent.initial_position == agent2.initial_position:
print("Agent {} as the same initial position as agent {}".format(agent_idx, agent_2_idx))
agents_with_same_start.add(agent_idx)
# Lets try to enter with all of these agents at the same time
action_dict = dict()
# for agent_id in agents_with_same_start:
# action_dict[agent_id] = 1 # Try to move with the agents
# Do a step in the environment to see what agents entered:
# env.step(action_dict)
# Current state and position of the agents after all agents with same start position tried to move
# print("\n This happened when all tried to enter at the same time:")
# print("========================================================")
# for agent_id in agents_with_same_start:
# print(
# "Agent {} status is: {} with the current position being {}.".format(
# agent_id, str(env.agents[agent_id].status),
# str(env.agents[agent_id].position)))
# As you see only the agents with lower indexes moved. As soon as the cell is free again the agents can attempt
# to start again.
# You will also notice, that the agents move at different speeds once they are on the rail.
# The agents will always move at full speed when moving, never a speed inbetween.
# The fastest an agent can go is 1, meaning that it moves to the next cell at every time step
# All slower speeds indicate the fraction of a cell that is moved at each time step
# Lets look at the current speed data of the agents:
print("\n The speed information of the agents are:")
print("=========================================")
for agent_idx, agent in enumerate(env.agents):
print(
"Agent {} speed is: {:.2f} with the current fractional position being {}".format(
agent_idx, agent.speed_data['speed'], agent.speed_data['position_fraction']))
# New the agents can also have stochastic malfunctions happening which will lead to them being unable to move
# for a certain amount of time steps. The malfunction data of the agents can easily be accessed as follows
print("\n The malfunction data of the agents are:")
print("========================================")
for agent_idx, agent in enumerate(env.agents):
print(
"Agent {} is OK = {}".format(
agent_idx, agent.malfunction_data['malfunction'] < 1))
# Now that you have seen these novel concepts that were introduced you will realize that agents don't need to take
# an action at every time step as it will only change the outcome when actions are chosen at cell entry.
# Therefore the environment provides information about what agents need to provide an action in the next step.
# You can access this in the following way.
# Chose an action for each agent
# for a in range(env.get_num_agents()):
# action = controller.act(0)
# action_dict.update({a: action})
# for i, a in enumerate(env.agents):
# action = controllers[i].act(a)
# action_dict.update({i: action})
# Do the environment step
observations, rewards, dones, information = env.step(action_dict)
print("\n The following agents can register an action:")
print("========================================")
for info in information['action_required']:
print("Agent {} needs to submit an action.".format(info))
# We recommend that you monitor the malfunction data and the action required in order to optimize your training
# and controlling code.
# Let us now look at an episode playing out with random actions performed
print("\nStart episode...")
# Reset the rendering system
env_renderer.reset()
# Here you can also further enhance the provided observation by means of normalization
# See training navigation example in the baseline repository
score = 0
# Run episode
frame_step = 0
# for step in range(500):
step = 0
while True:
step += 1
# Chose an action for each agent in the environment
# for a in range(env.get_num_agents()):
# action = controller.act(observations[a])
# action_dict.update({a: action})
# for i, a in enumerate(env.agents):
for i in alist.active():
a = env.agents[i]
if (a.speed_data['position_fraction']==0.0):
action = controllers[i].act(a)
action_dict.update({i: action})
# env.agents[a].position = env.agents[a].target
# Environment step which returns the observations for all agents, their corresponding
# reward and whether their are done
next_obs, all_rewards, done, _ = env.step(action_dict)
# env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
env_renderer.render_env(show=True, show_observations=True, show_predictions=True)
# os.makedirs('./misc/Fames2/', exist_ok=True)
# env_renderer.gl.save_image('./misc/Fames2/flatland_frame_{:04d}.png'.format(step))
frame_step += 1
score += np.sum(list(all_rewards.values()))
#
# observations = next_obs.copy()
if done['__all__']:
break
finished = np.sum([a.status==RailAgentStatus.DONE or a.status==RailAgentStatus.DONE_REMOVED for a in env.agents])
print('Episode: Steps {}\t Score = {}\t Finished = {}\t Not started = {}'.format(step, score, finished, alist.not_started()))
finished = np.sum([a.status==RailAgentStatus.DONE or a.status==RailAgentStatus.DONE_REMOVED for a in env.agents])
print(f'Trains finished {finished}/{len(env.agents)} = {finished*100/len(env.agents):.2f}%')
from libs.cell_graph_validator import CellGraphValidator
from libs.cell_graph_dispatcher import CellGraphDispatcher
import numpy as np
def no_sort_dispatcher(env):
return CellGraphDispatcher(env)
micro_test = {
"width": [50],
"height": [50],
"cities": [10],
"trains": [50],
"seed": 42
}
mini_test = {
"width": [50, 50, 100, 125],
"height": [50, 50, 100, 125],
"cities": [10]*4,
"trains": [100, 200, 100, 100],
"seed": 42
}
#small_test
N = 15
seed = 42
np.random.seed(seed)
width = np.random.randint(20, 150, (N,))
height = np.random.randint(20, 150, (N,))
width[N-1] = 150
height[N//2:] = width[N//2:]
nr_train = np.random.randint(50, 200, (N,))
nr_train[N-1] = 200
n_cities = np.random.randint(2, 35, (N,))
grid_distribution_of_cities = False
max_rails_between_cities = np.random.randint(2, 4, (N,))
max_rail_in_city = np.random.randint(3, 6, (N,))
malfunction_rate = np.random.randint(500, 4000, (N,))
prop_malfunction = np.random.uniform(0.01, 0.01, (N,))
min_duration = np.random.randint(20, 80, (N,))
max_duration = np.random.randint(20, 80, (N,))
max_duration = np.maximum(min_duration, max_duration)
small_test = {
"width": width,
"height": height,
"trains": nr_train,