1#!/usr/bin/env python
2#########################################################################
3# Reinforcement Learning with PGPE on the FlexCube Environment
4#
5# The FlexCube Environment is a Mass-Spring-System composed of 8 mass points.
6# These resemble a cube with flexible edges.
7#
8# Control/Actions:
9# The agent can control the 12 equilibrium edge lengths.
10#
11# A wide variety of sensors are available for observation and reward:
12# - 12 edge lengths
13# - 12 wanted edge lengths (the last action)
14# - vertexes contact with floor
15# - vertexes min height (distance of closest vertex to the floor)
16# - distance to origin
17# - distance and angle to target
18#
19# Task available are:
20# - GrowTask, agent has to maximize the volume of the cube
21# - JumpTask, agent has to maximize the distance of the lowest mass point during the episode
22# - WalkTask, agent has to maximize the distance to the starting point
23# - WalkDirectionTask, agent has to minimize the distance to a target point.
24# - TargetTask, like the previous task but with several target points
25#
26# Requirements: pylab (for plotting only). If not available, comment the
27# last 3 lines out
28# Author: Frank Sehnke, sehnke@in.tum.de
29#########################################################################
30__author__ = "Frank Sehnke"
31__version__ = '$Id$'
32
33from pybrain.tools.example_tools import ExTools
34from pybrain.structure.modules.tanhlayer import TanhLayer
35from pybrain.tools.shortcuts import buildNetwork
36from pybrain.rl.environments.flexcube import FlexCubeEnvironment, WalkTask
37from pybrain.rl.agents import OptimizationAgent
38from pybrain.optimization import SimpleSPSA
39from pybrain.rl.experiments import EpisodicExperiment
40
41hiddenUnits = 4
42batch=2 #number of samples per learning step
43prnts=1 #number of learning steps after results are printed
44epis=5000000/batch/prnts #number of roleouts
45numbExp=10 #number of experiments
46et = ExTools(batch, prnts) #tool for printing and plotting
47
48for runs in range(numbExp):
49    # create environment
50    #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
51    env = FlexCubeEnvironment()
52    # create task
53    task = WalkTask(env)
54    # create controller network
55    net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer)
56    # create agent with controller and learner (and its options)
57    agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True))
58    et.agent = agent
59     # create the experiment
60    experiment = EpisodicExperiment(task, agent)
61
62    #Do the experiment
63    for updates in range(epis):
64        for i in range(prnts):
65            experiment.doEpisodes(batch)
66        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
67    et.addExps()
68et.showExps()
69#To view what the simulation is doing at the moment, go to pybrain/rl/environments/flexcube/ and start renderer.py (python-openGL musst be installed)
70