1#!/usr/bin/env python
2#########################################################################
3# Reinforcement Learning with PGPE on the CCRL ODE Environment
4#
5# The CCRL robot is a body structure with 2x 7 DoF Arms.
6# Complex grasping tasks can be learned with this environment.
7#
8# Control/Actions:
9# The agent can control all 14 DOF of the robot arms plus the 2 hands.
10#
11# A wide variety of sensors are available for observation and reward:
12# - 16 angles of joints
13# - 16 angle velocitys of joints
14# - Number of hand parts that have contact to target object
15# - collision with table
16# - distance of hand to target
17# - angle of hand to horizontal and vertical plane
18#
19# Task available are:
20# - Grasp Task, agent has to get hold of the object with avoiding collision with table
21#
22# Requirements: pylab (for plotting only). If not available, comment the
23# last 3 lines out
24# Author: Frank Sehnke, sehnke@in.tum.de
25#########################################################################
26__author__ = "Frank Sehnke"
27__version__ = '$Id$'
28
29from pybrain.tools.example_tools import ExTools
30from pybrain.rl.environments.ode import CCRLEnvironment
31from pybrain.rl.environments.ode.tasks import CCRLGlasTask
32from pybrain.structure.modules.tanhlayer import TanhLayer
33from pybrain.tools.shortcuts import buildNetwork
34from pybrain.rl.agents import OptimizationAgent
35from pybrain.optimization import PGPE
36from pybrain.rl.experiments import EpisodicExperiment
37
38hiddenUnits = 4
39batch=1 #number of samples per learning step
40prnts=1 #number of learning steps after results are printed
41epis=2000/batch/prnts #number of roleouts
42numbExp=10 #number of experiments
43et = ExTools(batch, prnts) #tool for printing and plotting
44
45env = None
46for runs in range(numbExp):
47    # create environment
48    #Options: XML-Model, Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
49    if env != None: env.closeSocket()
50    env = CCRLEnvironment()
51    # create task
52    task = CCRLGlasTask(env)
53    # create controller network
54    net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer) #, hiddenUnits
55    # create agent with controller and learner (and its options)
56    agent = OptimizationAgent(net, PGPE(storeAllEvaluations = True))
57    et.agent = agent
58    # create the experiment
59    experiment = EpisodicExperiment(task, agent)
60
61    #Do the experiment
62    for updates in range(epis):
63        for i in range(prnts):
64            experiment.doEpisodes(batch)
65        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
66    et.addExps()
67et.showExps()
68#To view what the simulation is doing at the moment, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
69