1#!/usr/bin/env python 2######################################################################### 3# Reinforcement Learning with PGPE on the CCRL ODE Environment 4# 5# The CCRL robot is a body structure with 2x 7 DoF Arms. 6# Complex grasping tasks can be learned with this environment. 7# 8# Control/Actions: 9# The agent can control all 14 DOF of the robot arms plus the 2 hands. 10# 11# A wide variety of sensors are available for observation and reward: 12# - 16 angles of joints 13# - 16 angle velocitys of joints 14# - Number of hand parts that have contact to target object 15# - collision with table 16# - distance of hand to target 17# - angle of hand to horizontal and vertical plane 18# 19# Task available are: 20# - Grasp Task, agent has to get hold of the object with avoiding collision with table 21# 22# Requirements: pylab (for plotting only). If not available, comment the 23# last 3 lines out 24# Author: Frank Sehnke, sehnke@in.tum.de 25######################################################################### 26__author__ = "Frank Sehnke" 27__version__ = '$Id$' 28 29from pybrain.tools.example_tools import ExTools 30from pybrain.rl.environments.ode import CCRLEnvironment 31from pybrain.rl.environments.ode.tasks import CCRLGlasTask 32from pybrain.structure.modules.tanhlayer import TanhLayer 33from pybrain.tools.shortcuts import buildNetwork 34from pybrain.rl.agents import OptimizationAgent 35from pybrain.optimization import PGPE 36from pybrain.rl.experiments import EpisodicExperiment 37 38hiddenUnits = 4 39batch=1 #number of samples per learning step 40prnts=1 #number of learning steps after results are printed 41epis=2000/batch/prnts #number of roleouts 42numbExp=10 #number of experiments 43et = ExTools(batch, prnts) #tool for printing and plotting 44 45env = None 46for runs in range(numbExp): 47 # create environment 48 #Options: XML-Model, Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) 49 if env != None: env.closeSocket() 50 env = CCRLEnvironment() 51 # create task 52 task = CCRLGlasTask(env) 53 # create controller network 54 net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer) #, hiddenUnits 55 # create agent with controller and learner (and its options) 56 agent = OptimizationAgent(net, PGPE(storeAllEvaluations = True)) 57 et.agent = agent 58 # create the experiment 59 experiment = EpisodicExperiment(task, agent) 60 61 #Do the experiment 62 for updates in range(epis): 63 for i in range(prnts): 64 experiment.doEpisodes(batch) 65 et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) 66 et.addExps() 67et.showExps() 68#To view what the simulation is doing at the moment, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation) 69