1#!/usr/bin/env python
2#########################################################################
3# Reinforcement Learning with REINFORCE on the CartPoleEnvironment
4#
5# Requirements: pylab (for plotting only). If not available, comment the
6# last 3 lines out
7#########################################################################
8__author__ = "Thomas Rueckstiess, Frank Sehnke"
9__version__ = '$Id$'
10
11from pybrain.tools.example_tools import ExTools
12from pybrain.tools.shortcuts import buildNetwork
13from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask
14from pybrain.rl.agents import LearningAgent
15from pybrain.rl.learners import Reinforce
16from pybrain.rl.experiments import EpisodicExperiment
17
18batch=50 #number of samples per learning step
19prnts=4 #number of learning steps after results are printed
20epis=4000/batch/prnts #number of roleouts
21numbExp=10 #number of experiments
22et = ExTools(batch, prnts, kind = "learner") #tool for printing and plotting
23
24for runs in range(numbExp):
25    # create environment
26    env = CartPoleEnvironment()
27    # create task
28    task = BalanceTask(env, 200, desiredValue=None)
29    # create controller network
30    net = buildNetwork(4, 1, bias=False)
31    # create agent with controller and learner (and its options)
32    agent = LearningAgent(net, Reinforce())
33    et.agent = agent
34    # create the experiment
35    experiment = EpisodicExperiment(task, agent)
36
37    #Do the experiment
38    for updates in range(epis):
39        for i in range(prnts):
40            experiment.doEpisodes(batch)
41        state, action, reward = agent.learner.dataset.getSequence(agent.learner.dataset.getNumSequences()-1)
42        et.printResults(reward.sum(), runs, updates)
43    et.addExps()
44et.showExps()
45