1#!/usr/bin/env python 2######################################################################### 3# Reinforcement Learning with CMA-ES on the CartPoleEnvironment 4# 5# Requirements: pylab (for plotting only). If not available, comment the 6# last 3 lines out 7######################################################################### 8 9__author__ = "Thomas Rueckstiess, Frank Sehnke" 10 11 12from pybrain.tools.example_tools import ExTools 13from pybrain.tools.shortcuts import buildNetwork 14from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask 15from pybrain.rl.agents import OptimizationAgent 16from pybrain.optimization import CMAES 17from pybrain.rl.experiments import EpisodicExperiment 18 19batch=2 #number of samples per learning step 20prnts=100 #number of learning steps after results are printed 21epis=4000/batch/prnts #number of roleouts 22numbExp=10 #number of experiments 23et = ExTools(batch, prnts) #tool for printing and plotting 24 25for runs in range(numbExp): 26 # create environment 27 env = CartPoleEnvironment() 28 # create task 29 task = BalanceTask(env, 200, desiredValue=None) 30 # create controller network 31 net = buildNetwork(4, 1, bias=False) 32 # create agent with controller and learner (and its options) 33 agent = OptimizationAgent(net, CMAES(storeAllEvaluations = True)) 34 et.agent = agent 35 # create the experiment 36 experiment = EpisodicExperiment(task, agent) 37 38 #Do the experiment 39 for updates in range(epis): 40 for i in range(prnts): 41 experiment.doEpisodes(batch) 42 et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) 43 et.addExps() 44et.showExps() 45