Commit b25d312c authored by nilabha's avatar nilabha

Update OR expert and wandb data

parent 3f321add
......@@ -206,22 +206,26 @@ class ImitationAgent(PPOTrainer):
steps += 1
for agent, agent_info in info.items():
if episode_max_steps == 0:
episode_max_steps = agent_info["max_episode_steps"]
episode_num_agents = agent_info["num_agents"]
episode_steps = max(episode_steps, agent_info["agent_step"])
episode_score += agent_info["agent_score"]
if agent_info["agent_done"]:
episode_done_agents += 1
if done["__all__"]:
print(float(episode_done_agents) / n_agents)
for agent, agent_info in info.items():
if episode_max_steps == 0:
episode_max_steps = agent_info["max_episode_steps"]
episode_num_agents = agent_info["num_agents"]
episode_steps = max(episode_steps, agent_info["agent_step"])
episode_score += agent_info["agent_score"]
print(float(episode_done_agents) / episode_num_agents)
break
norm_factor = 1.0 / (episode_max_steps * episode_num_agents)
result = {
"expert_episode_reward_mean": episode_score,
"episode_reward_mean" : episode_score,
"expert_episode_completion_mean": float(episode_done_agents) / n_agents,
"expert_episode_completion_mean": float(episode_done_agents) / episode_num_agents,
"expert_episode_score_normalized": episode_score * norm_factor,
"episodes_this_iter": n_episodes,
"timesteps_this_iter": steps,
}
......
or-tree-obs-small-v0:
run: ImitationAgent
env: flatland_sparse
stop:
training_iteration: 50 # 1.5e7
checkpoint_at_end: True
checkpoint_freq: 50
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 1
config:
num_workers: 2
num_envs_per_worker: 1
num_gpus: 0
clip_rewards: False
vf_clip_param: 500.0
entropy_coeff: 0.01
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
vf_share_layers: False
env_config:
seed: 100
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_v0
wandb:
project: flatland-paper
entity: aicrowd
tags: ["small_v0", "OR", "eval"] # TODO should be set programmatically
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
or-tree-obs-small-v0:
run: ImitationAgent
env: flatland_sparse
stop:
training_iteration: 50 # 1.5e7
checkpoint_at_end: True
checkpoint_freq: 50
keep_checkpoints_num: 100
checkpoint_score_attr: episode_reward_mean
num_samples: 3
config:
num_workers: 2
num_envs_per_worker: 1
num_gpus: 0
clip_rewards: False
vf_clip_param: 500.0
entropy_coeff: 0.01
# effective batch_size: train_batch_size * num_agents_in_each_environment [5, 10]
# see https://github.com/ray-project/ray/issues/4628
train_batch_size: 1000 # 5000
rollout_fragment_length: 50 # 100
sgd_minibatch_size: 100 # 500
vf_share_layers: False
env_config:
observation: tree
observation_config:
max_depth: 2
shortest_path_max_depth: 30
generator: sparse_rail_generator
generator_config: small_v0
wandb:
project: flatland-paper
entity: aicrowd
tags: ["small_v0", "OR", "train"] # TODO should be set programmatically
model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
vf_share_layers: True # False
import wandb
import pandas as pd
import os
from tqdm.autonotebook import tqdm
import numpy as np
sample = False # For testing purposes
get_wandb_data = False
def smooth(scalars, weight = 0.9): # Weight between 0 and 1
last = scalars[0] # First value in the plot (first timestep)
smoothed = list()
for point in scalars:
smoothed_val = last * weight + (1 - weight) * point # Calculate smoothed value
smoothed.append(smoothed_val) # Save it
last = smoothed_val # Anchor the last smoothed value
return smoothed
if get_wandb_data:
api = wandb.Api()
project_entity = "aicrowd/flatland-paper"
runs = api.runs(project_entity)
reports = api.reports(project_entity)
eval_metrics = ["evaluation/custom_metrics/percentage_complete_mean","evaluation/custom_metrics/episode_score_normalized_mean","timesteps_total"]
train_metrics = ["custom_metrics/percentage_complete_mean","custom_metrics/episode_score_normalized_mean","timesteps_total"]
expert_metrics = ["expert_episode_completion_mean","expert_episode_reward_mean"]
all_metrics = train_metrics + eval_metrics
exclude_runs = ["eval_recording_ppo_tree_obs"]
eval_metrics_names = [eval_metric.replace("/", "_") for eval_metric in eval_metrics]
summary_list = []
config_list = []
name_list = []
df_all_eval = pd.DataFrame(columns=eval_metrics+['run','group'])
df_all_train = pd.DataFrame(columns=train_metrics+['run','group'])
for run in tqdm(runs):
if run.name not in exclude_runs:
# run.summary are the output key/values like accuracy. We call ._json_dict to omit large files
summary_list.append(run.summary._json_dict)
# run.config is the input metrics. We remove special values that start with _.
config_list.append({k:v for k,v in run.config.items() if not k.startswith('_')})
# run.name is the name of the run.
name_list.append(run.name)
df_eval = pd.DataFrame(columns=eval_metrics)
df_train = pd.DataFrame(columns=train_metrics)
history_eval = run.scan_history(eval_metrics)
history_train = run.scan_history(train_metrics)
i=0
for row in history_eval:
df_eval = df_eval.append({eval_metrics[0]:row.get(eval_metrics[0]),
eval_metrics[1]:row.get(eval_metrics[1]),eval_metrics[2]:row.get(eval_metrics[2])},
ignore_index = True)
if sample:
i+=1
if i >2:
break
i=0
for row in history_train:
df_train = df_train.append({train_metrics[0]:row.get(train_metrics[0]),
train_metrics[1]:row.get(train_metrics[1]),train_metrics[2]:row.get(train_metrics[2])},
ignore_index = True)
if sample:
i+=1
if i >2:
break
df_eval['run'] = run.name
df_train['run'] = run.name
group = run.config.get('env_config',{}).get('yaml_config')
if group:
group = group.split(os.sep)[-1].split('.')[0]
df_eval['group'] = group
df_train['group'] = group
df_eval.sort_values(by="timesteps_total",inplace=True)
df_train.sort_values(by="timesteps_total",inplace=True)
df_all_eval = pd.concat([df_all_eval,df_eval])
df_all_train = pd.concat([df_all_train,df_train])
summary_df = pd.DataFrame.from_records(summary_list)
config_df = pd.DataFrame.from_records(config_list)
name_df = pd.DataFrame({'name': name_list})
all_df = pd.concat([name_df, config_df,summary_df], axis=1)
all_df.to_csv("project.csv")
df_all_eval.to_csv("all_eval_runs.csv",index=False)
df_all_train.to_csv("all_train_runs.csv",index=False)
df_all_eval = pd.read_csv("all_eval_runs.csv")
df_all_train = pd.read_csv("all_train_runs.csv")
df_all_eval.sort_values(by="timesteps_total",inplace=True,ascending=True)
df_all_train.sort_values(by="timesteps_total",inplace=True,ascending=True)
min_steps = 1000000
def get_smooth_results(df_all_final):
if 'evaluation/custom_metrics/percentage_complete_mean' in df_all_final.columns:
df_all_final['perc_completion_mean'] = df_all_final['evaluation/custom_metrics/percentage_complete_mean']
df_all_final['normalized_mean'] = df_all_final['evaluation/custom_metrics/episode_score_normalized_mean']
return df_all_final
def get_final_results(df_all_final,min_steps=min_steps):
df_all_final = df_all_final[df_all_final['timesteps_total'] > min_steps]
if 'evaluation/custom_metrics/percentage_complete_mean' in df_all_final.columns:
df_all_final['perc_completion_mean'] = df_all_final['evaluation/custom_metrics/percentage_complete_mean']
df_all_final['normalized_mean'] = df_all_final['evaluation/custom_metrics/episode_score_normalized_mean']
df_all_final_results = df_all_final[["run","group","perc_completion_mean","normalized_mean"]].groupby("run").max().groupby("group").aggregate([np.mean,np.std]).reset_index()
elif 'custom_metrics/percentage_complete_mean' in df_all_final.columns:
df_all_final_results = df_all_final[["run","group","custom_metrics/percentage_complete_mean","custom_metrics/episode_score_normalized_mean"]].groupby(by=["run","group"])
norm_mean = df_all_final_results.apply(lambda x: max(smooth(x["custom_metrics/episode_score_normalized_mean"].to_list())))
perc_completion_mean = df_all_final_results.apply(lambda x: max(smooth(x["custom_metrics/percentage_complete_mean"].to_list())))
df_all_final = pd.concat([perc_completion_mean,norm_mean],axis=1)
df_all_final_results = df_all_final.groupby("group").aggregate([np.mean,np.std]).reset_index()
return df_all_final,df_all_final_results
df_all_eval_final_results = get_final_results(df_all_eval)
df_all_train_final_results = get_final_results(df_all_train)
df_all_eval_final_results[0].to_csv('evaluation_results.csv')
df_all_train_final_results[0].to_csv('training_results.csv')
df_all_eval_final_results[1].to_csv('eval_results_group.csv',index=False)
df_all_train_final_results[1].to_csv('train_results_group.csv',index=False)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment