From a96ddb70a7114693b81657902a0bdf8df95dbe10 Mon Sep 17 00:00:00 2001
From: Erik Nygren <erik.nygren@sbb.ch>
Date: Thu, 23 May 2019 22:16:26 +0200
Subject: [PATCH] updated documentation

---
 docs/gettingstarted.rst      |  2 ++
 examples/training_example.py | 10 ++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst
index e23af100..2d545b24 100644
--- a/docs/gettingstarted.rst
+++ b/docs/gettingstarted.rst
@@ -139,6 +139,8 @@ Next we configure the difficulty of our task by modifying the complex_rail_gener
 The difficulty of a railway network depends on the dimensions (width x height) and the number of agents in the network.
 By varying the number of start and goal connections (nr_start_goal) and the number of extra railway elements added (nr_extra)
 the number of alternative paths of each agents can be modified. The more possible paths an agent has to reach its target the easier the task becomes.
+Here we don't specify any observation builder but rather use the standard tree observation. If you would like to use a custom obervation please follow
+ the instructions in the next tutorial.
 Feel free to vary these parameters to see how your own agent holds up on different setting. The evalutation set of railway configurations will 
 cover the whole spectrum from easy to complex tasks.
 
diff --git a/examples/training_example.py b/examples/training_example.py
index e9359ccc..5a8c7c00 100644
--- a/examples/training_example.py
+++ b/examples/training_example.py
@@ -45,13 +45,13 @@ class RandomAgent:
         return
 
 
-# Load the agent with the parameters corresponding to the environment and observation_builder
-agent = RandomAgent(env.get_observation_size(), env.get_action_size())
+# Initialize the agent with the parameters corresponding to the environment and observation_builder
+agent = RandomAgent(218, 4)
 n_trials = 1000
 
 # Empty dictionary for all agent action
 action_dict = dict()
-
+print("Starting Training...")
 for trials in range(1, n_trials + 1):
 
     # Reset environment and get initial observations for all agents
@@ -72,9 +72,11 @@ for trials in range(1, n_trials + 1):
         next_obs, all_rewards, done, _ = env.step(action_dict)
 
         # Update replay buffer and train agent
-        agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])
+        agent.step((obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]))
         score += all_rewards[a]
 
         obs = next_obs.copy()
         if done['__all__']:
             break
+    print('Episode Nr. {}'.format(trials))
+
-- 
GitLab