1#!/usr/bin/env python
2#########################################################################
3# Reinforcement Learning with several optimization algorithms
4# on the CartPoleEnvironment
5#
6# Requirements: pylab (for plotting only). If not available, comment the
7# last 3 lines out
8#########################################################################
9
10__author__ = "Thomas Rueckstiess, Frank Sehnke"
11
12
13from pybrain.tools.example_tools import ExTools
14from pybrain.tools.shortcuts import buildNetwork
15from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask
16from pybrain.rl.agents import OptimizationAgent
17from pybrain.optimization import PGPE #@UnusedImport
18from pybrain.optimization import ExactNES #@UnusedImport
19from pybrain.optimization import FEM #@UnusedImport
20from pybrain.optimization import CMAES #@UnusedImport
21
22from pybrain.rl.experiments import EpisodicExperiment
23
24batch=2 #number of samples per learning step
25prnts=100 #number of learning steps after results are printed
26epis=4000/batch/prnts #number of roleouts
27numbExp=40 #number of experiments
28et = ExTools(batch, prnts) #tool for printing and plotting
29expList = ["PGPE(storeAllEvaluations = True)", "ExactNES(storeAllEvaluations = True)", "FEM(storeAllEvaluations = True)", "CMAES(storeAllEvaluations = True)"]
30for e in expList:
31    for runs in range(numbExp):
32        # create environment
33        env = CartPoleEnvironment()
34        # create task
35        task = BalanceTask(env, 200, desiredValue=None)
36        # create controller network
37        net = buildNetwork(4, 1, bias=False)
38        # create agent with controller and learner (and its options)
39        agent = OptimizationAgent(net, eval(e))
40        et.agent = agent
41        # create the experiment
42        experiment = EpisodicExperiment(task, agent)
43
44        #Do the experiment
45        for updates in range(epis):
46            for i in range(prnts):
47                experiment.doEpisodes(batch)
48            et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
49        et.addExps()
50    et.nextExps()
51et.showExps()
52