diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst index e23af1005eba53b638ba3946f10c4291d23f8c44..2d545b24f1703c153fc3f66739fb9c9d7538d1b6 100644 --- a/docs/gettingstarted.rst +++ b/docs/gettingstarted.rst @@ -139,6 +139,8 @@ Next we configure the difficulty of our task by modifying the complex_rail_gener The difficulty of a railway network depends on the dimensions (width x height) and the number of agents in the network. By varying the number of start and goal connections (nr_start_goal) and the number of extra railway elements added (nr_extra) the number of alternative paths of each agents can be modified. The more possible paths an agent has to reach its target the easier the task becomes. +Here we don't specify any observation builder but rather use the standard tree observation. If you would like to use a custom obervation please follow + the instructions in the next tutorial. Feel free to vary these parameters to see how your own agent holds up on different setting. The evalutation set of railway configurations will cover the whole spectrum from easy to complex tasks. diff --git a/examples/training_example.py b/examples/training_example.py index e9359ccc179b41bcf11bff2d6f196d63a69bebe7..5a8c7c00db6fafa34323b5177b5d1cff2880c2d0 100644 --- a/examples/training_example.py +++ b/examples/training_example.py @@ -45,13 +45,13 @@ class RandomAgent: return -# Load the agent with the parameters corresponding to the environment and observation_builder -agent = RandomAgent(env.get_observation_size(), env.get_action_size()) +# Initialize the agent with the parameters corresponding to the environment and observation_builder +agent = RandomAgent(218, 4) n_trials = 1000 # Empty dictionary for all agent action action_dict = dict() - +print("Starting Training...") for trials in range(1, n_trials + 1): # Reset environment and get initial observations for all agents @@ -72,9 +72,11 @@ for trials in range(1, n_trials + 1): next_obs, all_rewards, done, _ = env.step(action_dict) # Update replay buffer and train agent - agent.step(obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]) + agent.step((obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a])) score += all_rewards[a] obs = next_obs.copy() if done['__all__']: break + print('Episode Nr. {}'.format(trials)) +