1#!/usr/bin/env python 2######################################################################### 3# Reinforcement Learning with REINFORCE on the CartPoleEnvironment 4# 5# Requirements: pylab (for plotting only). If not available, comment the 6# last 3 lines out 7######################################################################### 8__author__ = "Thomas Rueckstiess, Frank Sehnke" 9__version__ = '$Id$' 10 11from pybrain.tools.example_tools import ExTools 12from pybrain.tools.shortcuts import buildNetwork 13from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask 14from pybrain.rl.agents import LearningAgent 15from pybrain.rl.learners import Reinforce 16from pybrain.rl.experiments import EpisodicExperiment 17 18batch=50 #number of samples per learning step 19prnts=4 #number of learning steps after results are printed 20epis=4000/batch/prnts #number of roleouts 21numbExp=10 #number of experiments 22et = ExTools(batch, prnts, kind = "learner") #tool for printing and plotting 23 24for runs in range(numbExp): 25 # create environment 26 env = CartPoleEnvironment() 27 # create task 28 task = BalanceTask(env, 200, desiredValue=None) 29 # create controller network 30 net = buildNetwork(4, 1, bias=False) 31 # create agent with controller and learner (and its options) 32 agent = LearningAgent(net, Reinforce()) 33 et.agent = agent 34 # create the experiment 35 experiment = EpisodicExperiment(task, agent) 36 37 #Do the experiment 38 for updates in range(epis): 39 for i in range(prnts): 40 experiment.doEpisodes(batch) 41 state, action, reward = agent.learner.dataset.getSequence(agent.learner.dataset.getNumSequences()-1) 42 et.printResults(reward.sum(), runs, updates) 43 et.addExps() 44et.showExps() 45