1#!/usr/bin/env python
2#########################################################################
3# Reinforcement Learning with PGPE on the CartPoleEnvironment
4#
5# Requirements: pylab (for plotting only). If not available, comment the
6# last 3 lines out
7#########################################################################
8__author__ = "Thomas Rueckstiess, Frank Sehnke"
9__version__ = '$Id$'
10
11from pybrain.tools.example_tools import ExTools
12from pybrain.tools.shortcuts import buildNetwork
13from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask
14from pybrain.rl.agents import OptimizationAgent
15from pybrain.optimization import PGPE
16from pybrain.rl.experiments import EpisodicExperiment
17
18batch=1 #number of samples per learning step
19prnts=100 #number of learning steps after results are printed
20epis=4000/batch/prnts #number of roleouts
21numbExp=10 #number of experiments
22et = ExTools(batch, prnts) #tool for printing and plotting
23
24for runs in range(numbExp):
25    # create environment
26    env = CartPoleEnvironment()
27    # create task
28    task = BalanceTask(env, 200, desiredValue=None)
29    # create controller network
30    net = buildNetwork(4, 1, bias=False)
31    # create agent with controller and learner (and its options)
32    agent = OptimizationAgent(net, PGPE(storeAllEvaluations = True))
33    et.agent = agent
34    # create the experiment
35    experiment = EpisodicExperiment(task, agent)
36
37    #Do the experiment
38    for updates in range(epis):
39        for i in range(prnts):
40            experiment.doEpisodes(batch)
41        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
42    et.addExps()
43et.showExps()
44