#!/usr/bin/env python ######################################################################### # Reinforcement Learning with PGPE on the CCRL ODE Environment # # The CCRL robot is a body structure with 2x 7 DoF Arms. # Complex grasping tasks can be learned with this environment. # # Control/Actions: # The agent can control all 14 DOF of the robot arms plus the 2 hands. # # A wide variety of sensors are available for observation and reward: # - 16 angles of joints # - 16 angle velocitys of joints # - Number of hand parts that have contact to target object # - collision with table # - distance of hand to target # - angle of hand to horizontal and vertical plane # # Task available are: # - Grasp Task, agent has to get hold of the object with avoiding collision with table # # Requirements: pylab (for plotting only). If not available, comment the # last 3 lines out # Author: Frank Sehnke, sehnke@in.tum.de ######################################################################### __author__ = "Frank Sehnke" __version__ = '$Id$' from pybrain.tools.example_tools import ExTools from pybrain.rl.environments.ode import CCRLEnvironment from pybrain.rl.environments.ode.tasks import CCRLGlasTask from pybrain.structure.modules.tanhlayer import TanhLayer from pybrain.tools.shortcuts import buildNetwork from pybrain.rl.agents import OptimizationAgent from pybrain.optimization import PGPE from pybrain.rl.experiments import EpisodicExperiment hiddenUnits = 4 batch=1 #number of samples per learning step prnts=1 #number of learning steps after results are printed epis=2000/batch/prnts #number of roleouts numbExp=10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting env = None for runs in range(numbExp): # create environment #Options: XML-Model, Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) if env != None: env.closeSocket() env = CCRLEnvironment() # create task task = CCRLGlasTask(env) # create controller network net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer) #, hiddenUnits # create agent with controller and learner (and its options) agent = OptimizationAgent(net, PGPE(storeAllEvaluations = True)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps() #To view what the simulation is doing at the moment, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)