From 3bbf1cea6fa3e43268efeda9da4b6c75cde77936 Mon Sep 17 00:00:00 2001 From: Erik Nygren <erik.nygren@sbb.ch> Date: Tue, 23 Apr 2019 17:09:30 +0200 Subject: [PATCH] updated level parameters for training --- examples/training_navigation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/training_navigation.py b/examples/training_navigation.py index 18bb6356..681e8b78 100644 --- a/examples/training_navigation.py +++ b/examples/training_navigation.py @@ -74,7 +74,7 @@ for trials in range(1, n_trials + 1): #print(step) # Action for a in range(env.number_of_agents): - action = agent.act(np.array(obs[a]), eps=0) + action = agent.act(np.array(obs[a]), eps=eps) action_prob[action] += 1 action_dict.update({a: action}) @@ -110,7 +110,7 @@ for trials in range(1, n_trials + 1): eps, action_prob/np.sum(action_prob)), end=" ") if trials % 100 == 0: - action_prob = [1]*4 + print( '\rTraining {} Agents.\tEpisode {}\tAverage Score: {:.0f}\tDones: {:.2f}%\tEpsilon: {:.2f} \t Action Probabilities: \t {}'.format( env.number_of_agents, @@ -122,4 +122,4 @@ for trials in range(1, n_trials + 1): eps, action_prob / np.sum(action_prob))) torch.save(agent.qnetwork_local.state_dict(), '../flatland/baselines/Nets/avoid_checkpoint' + str(trials) + '.pth') - + action_prob = [1]*4 -- GitLab