1#!/usr/bin/env python 2######################################################################### 3# Reinforcement Learning with several optimization algorithms 4# on the CartPoleEnvironment 5# 6# Requirements: pylab (for plotting only). If not available, comment the 7# last 3 lines out 8######################################################################### 9 10__author__ = "Thomas Rueckstiess, Frank Sehnke" 11 12 13from pybrain.tools.example_tools import ExTools 14from pybrain.tools.shortcuts import buildNetwork 15from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask 16from pybrain.rl.agents import OptimizationAgent 17from pybrain.optimization import PGPE #@UnusedImport 18from pybrain.optimization import ExactNES #@UnusedImport 19from pybrain.optimization import FEM #@UnusedImport 20from pybrain.optimization import CMAES #@UnusedImport 21 22from pybrain.rl.experiments import EpisodicExperiment 23 24batch=2 #number of samples per learning step 25prnts=100 #number of learning steps after results are printed 26epis=4000/batch/prnts #number of roleouts 27numbExp=40 #number of experiments 28et = ExTools(batch, prnts) #tool for printing and plotting 29expList = ["PGPE(storeAllEvaluations = True)", "ExactNES(storeAllEvaluations = True)", "FEM(storeAllEvaluations = True)", "CMAES(storeAllEvaluations = True)"] 30for e in expList: 31 for runs in range(numbExp): 32 # create environment 33 env = CartPoleEnvironment() 34 # create task 35 task = BalanceTask(env, 200, desiredValue=None) 36 # create controller network 37 net = buildNetwork(4, 1, bias=False) 38 # create agent with controller and learner (and its options) 39 agent = OptimizationAgent(net, eval(e)) 40 et.agent = agent 41 # create the experiment 42 experiment = EpisodicExperiment(task, agent) 43 44 #Do the experiment 45 for updates in range(epis): 46 for i in range(prnts): 47 experiment.doEpisodes(batch) 48 et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) 49 et.addExps() 50 et.nextExps() 51et.showExps() 52