diff --git a/reinforcement_learning/multi_agent_training.py b/reinforcement_learning/multi_agent_training.py index 2e74d689f2d19b05b75946388216908d1dfcfda1..68bf90bf6865726d05351d3e217f17e7ebe3a05e 100755 --- a/reinforcement_learning/multi_agent_training.py +++ b/reinforcement_learning/multi_agent_training.py @@ -181,7 +181,7 @@ def train_agent(train_params, train_env_params, eval_env_params, obs_params): elif train_params.policy == "PPO": policy = PPOPolicy(state_size, get_action_size(), use_replay_buffer=False, in_parameters=train_params) elif train_params.policy == "DeadLockAvoidance": - policy = DeadLockAvoidanceAgent(train_env, get_action_size()) + policy = DeadLockAvoidanceAgent(train_env, get_action_size(), enable_eps=False) elif train_params.policy == "DeadLockAvoidanceWithDecision": # inter_policy = PPOPolicy(state_size, get_action_size(), use_replay_buffer=False, in_parameters=train_params) inter_policy = DDDQNPolicy(state_size, get_action_size(), train_params)