1#!/usr/bin/env python 2######################################################################### 3# Reinforcement Learning with PGPE on the CartPoleEnvironment 4# 5# Requirements: pylab (for plotting only). If not available, comment the 6# last 3 lines out 7######################################################################### 8__author__ = "Thomas Rueckstiess, Frank Sehnke" 9__version__ = '$Id$' 10 11from pybrain.tools.example_tools import ExTools 12from pybrain.tools.shortcuts import buildNetwork 13from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask 14from pybrain.rl.agents import OptimizationAgent 15from pybrain.optimization import PGPE 16from pybrain.rl.experiments import EpisodicExperiment 17 18batch=1 #number of samples per learning step 19prnts=100 #number of learning steps after results are printed 20epis=4000/batch/prnts #number of roleouts 21numbExp=10 #number of experiments 22et = ExTools(batch, prnts) #tool for printing and plotting 23 24for runs in range(numbExp): 25 # create environment 26 env = CartPoleEnvironment() 27 # create task 28 task = BalanceTask(env, 200, desiredValue=None) 29 # create controller network 30 net = buildNetwork(4, 1, bias=False) 31 # create agent with controller and learner (and its options) 32 agent = OptimizationAgent(net, PGPE(storeAllEvaluations = True)) 33 et.agent = agent 34 # create the experiment 35 experiment = EpisodicExperiment(task, agent) 36 37 #Do the experiment 38 for updates in range(epis): 39 for i in range(prnts): 40 experiment.doEpisodes(batch) 41 et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) 42 et.addExps() 43et.showExps() 44