1#!/usr/bin/env python
2#########################################################################
3# Reinforcement Learning with CMA-ES on the CartPoleEnvironment
4#
5# Requirements: pylab (for plotting only). If not available, comment the
6# last 3 lines out
7#########################################################################
8
9__author__ = "Thomas Rueckstiess, Frank Sehnke"
10
11
12from pybrain.tools.example_tools import ExTools
13from pybrain.tools.shortcuts import buildNetwork
14from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask
15from pybrain.rl.agents import OptimizationAgent
16from pybrain.optimization import CMAES
17from pybrain.rl.experiments import EpisodicExperiment
18
19batch=2 #number of samples per learning step
20prnts=100 #number of learning steps after results are printed
21epis=4000/batch/prnts #number of roleouts
22numbExp=10 #number of experiments
23et = ExTools(batch, prnts) #tool for printing and plotting
24
25for runs in range(numbExp):
26    # create environment
27    env = CartPoleEnvironment()
28    # create task
29    task = BalanceTask(env, 200, desiredValue=None)
30    # create controller network
31    net = buildNetwork(4, 1, bias=False)
32    # create agent with controller and learner (and its options)
33    agent = OptimizationAgent(net, CMAES(storeAllEvaluations = True))
34    et.agent = agent
35    # create the experiment
36    experiment = EpisodicExperiment(task, agent)
37
38    #Do the experiment
39    for updates in range(epis):
40        for i in range(prnts):
41            experiment.doEpisodes(batch)
42        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
43    et.addExps()
44et.showExps()
45