1#!/usr/bin/env python 2######################################################################### 3# Reinforcement Learning with PGPE on the FlexCube Environment 4# 5# The FlexCube Environment is a Mass-Spring-System composed of 8 mass points. 6# These resemble a cube with flexible edges. 7# 8# Control/Actions: 9# The agent can control the 12 equilibrium edge lengths. 10# 11# A wide variety of sensors are available for observation and reward: 12# - 12 edge lengths 13# - 12 wanted edge lengths (the last action) 14# - vertexes contact with floor 15# - vertexes min height (distance of closest vertex to the floor) 16# - distance to origin 17# - distance and angle to target 18# 19# Task available are: 20# - GrowTask, agent has to maximize the volume of the cube 21# - JumpTask, agent has to maximize the distance of the lowest mass point during the episode 22# - WalkTask, agent has to maximize the distance to the starting point 23# - WalkDirectionTask, agent has to minimize the distance to a target point. 24# - TargetTask, like the previous task but with several target points 25# 26# Requirements: pylab (for plotting only). If not available, comment the 27# last 3 lines out 28# Author: Frank Sehnke, sehnke@in.tum.de 29######################################################################### 30__author__ = "Frank Sehnke" 31__version__ = '$Id$' 32 33from pybrain.tools.example_tools import ExTools 34from pybrain.structure.modules.tanhlayer import TanhLayer 35from pybrain.tools.shortcuts import buildNetwork 36from pybrain.rl.environments.flexcube import FlexCubeEnvironment, WalkTask 37from pybrain.rl.agents import OptimizationAgent 38from pybrain.optimization import SimpleSPSA 39from pybrain.rl.experiments import EpisodicExperiment 40 41hiddenUnits = 4 42batch=2 #number of samples per learning step 43prnts=1 #number of learning steps after results are printed 44epis=5000000/batch/prnts #number of roleouts 45numbExp=10 #number of experiments 46et = ExTools(batch, prnts) #tool for printing and plotting 47 48for runs in range(numbExp): 49 # create environment 50 #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) 51 env = FlexCubeEnvironment() 52 # create task 53 task = WalkTask(env) 54 # create controller network 55 net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer) 56 # create agent with controller and learner (and its options) 57 agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True)) 58 et.agent = agent 59 # create the experiment 60 experiment = EpisodicExperiment(task, agent) 61 62 #Do the experiment 63 for updates in range(epis): 64 for i in range(prnts): 65 experiment.doEpisodes(batch) 66 et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) 67 et.addExps() 68et.showExps() 69#To view what the simulation is doing at the moment, go to pybrain/rl/environments/flexcube/ and start renderer.py (python-openGL musst be installed) 70