Commit 817e5040 authored by nilabha's avatar nilabha

Rollout scripts updates

parent ed021c4d
......@@ -15,9 +15,9 @@ import ray
from ray.rllib.agents.registry import get_agent_class
from ray.rllib.env import MultiAgentEnv
from ray.rllib.env.base_env import _DUMMY_AGENT_ID
from ray.rllib.evaluation.episode import _flatten_action
# from ray.rllib.evaluation.episode import _flatten_action # ray 0.8.4
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
#from ray.rllib.utils.space_utils import flatten_to_single_ndarray
from ray.rllib.utils.space_utils import flatten_to_single_ndarray # ray 0.8.5
from ray.tune.utils import merge_dicts
from utils.loader import load_envs, load_models
......@@ -324,8 +324,8 @@ def rollout(agent,
state_init = {p: m.get_initial_state() for p, m in policy_map.items()}
use_lstm = {p: len(s) > 0 for p, s in state_init.items()}
action_init = {
#p: flatten_to_single_ndarray(m.action_space.sample()) # ray 0.8.5
p: _flatten_action(m.action_space.sample()) # ray 0.8.4
p: flatten_to_single_ndarray(m.action_space.sample()) # ray 0.8.5
# p: _flatten_action(m.action_space.sample()) # ray 0.8.4
for p, m in policy_map.items()
}
else:
......@@ -389,8 +389,8 @@ def rollout(agent,
prev_action=prev_actions[agent_id],
prev_reward=prev_rewards[agent_id],
policy_id=policy_id)
#a_action = flatten_to_single_ndarray(a_action) # ray 0.8.5
a_action = _flatten_action(a_action) # tuple actions # ray 0.8.4
a_action = flatten_to_single_ndarray(a_action) # ray 0.8.5
# a_action = _flatten_action(a_action) # tuple actions # ray 0.8.4
action_dict[agent_id] = a_action
prev_actions[agent_id] = a_action
action = action_dict
......@@ -425,7 +425,7 @@ def rollout(agent,
episode_score = sum(agents_score.values())
simulation_rewards.append(episode_score)
simulation_rewards_normalized.append(episode_score / (episode_max_steps + episode_num_agents))
simulation_rewards_normalized.append(episode_score / (episode_max_steps * episode_num_agents))
simulation_percentage_complete.append(float(len(agents_done)) / episode_num_agents)
simulation_steps.append(episode_steps)
......
echo "===================="
echo "APEX TREE OBS"
echo "===================="
# python rollout.py baselines/checkpoints/apex-tree-obs-small-v0-0/checkpoint_400/checkpoint-400 --run APEX --no-render --episodes 50 --env 'flatland_sparse' --config '{"env_config": {"seed":1000000000,"generator": "sparse_rail_generator", "generator_config": "small_v0", "observation": "tree", "observation_config": {"max_depth": 2, "shortest_path_max_depth": 30}}, "model": {"fcnet_activation": "relu", "fcnet_hiddens": [256, 256], "vf_share_layers": "True"}}'
# python rollout.py baselines/checkpoints/apex_tree_obs_small_v0-1/checkpoint_400/checkpoint-400 --run APEX --no-render --episodes 50 --env 'flatland_sparse' --config '{"env_config": {"seed":1000000000,"generator": "sparse_rail_generator", "generator_config": "small_v0", "observation": "tree", "observation_config": {"max_depth": 2, "shortest_path_max_depth": 30}}, "model": {"fcnet_activation": "relu", "fcnet_hiddens": [256, 256], "vf_share_layers": "True"}}'
# python rollout.py baselines/checkpoints/apex_tree_obs_small_v0-2/checkpoint_250/checkpoint-250 --run APEX --no-render --episodes 50 --env 'flatland_sparse' --config '{"env_config": {"seed":1000000000,"generator": "sparse_rail_generator", "generator_config": "small_v0", "observation": "tree", "observation_config": {"max_depth": 2, "shortest_path_max_depth": 30}}, "model": {"fcnet_activation": "relu", "fcnet_hiddens": [256, 256], "vf_share_layers": "True"}}'
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment