1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <iostream>
4 #include <math.h>
5 
6 using namespace std;
7 
8 double noise;
9 int numPoles;
10 double longPoleAngle;
11 double poleangle;
12 bool markov;
13 double polelength;
14 double dydx[6];
15 double state[6];
16 int episodes;
17 int totalIterations;
18 int steps;
19 int maxsteps;
20 
21 
22 //////////////////////////////////////////////////////////////////////
23 //
24 // Double Pole physics
25 //
26 //////////////////////////////////////////////////////////////////////
27 
28 static const double TRACK_SIZE  = 2.4;
29 static const double MUP         = 0.000002;
30 static const double MUC         = 0.0005;
31 static const double GRAVITY     = -9.8;
32 static const double MASSCART    = 1.0;
33 static const double MASSPOLE_1  = 0.1;
34 double MASSPOLE_2        = 0.01;
35 static const double LENGTH_1    = 0.5;	 // actually half the pole's length
36 double LENGTH_2          = 0.05;
37 static const double FORCE_MAG   = 10.0;  //magnitude of max force
38 static const double FORCE_MAG2   = 20.0;  //magnitude of max force times 2
39 static const double TAU         = 0.01;	  //seconds between state updates
40 
41 #define BIAS 0.01
42 #define one_degree 0.0174532	/* 2pi/360 */
43 #define six_degrees 0.1047192
44 #define twelve_degrees 0.2094384
45 #define fifteen_degrees 0.2617993
46 #define thirty_six_degrees 0.628329
47 #define degrees64    1.2566580
48 #define fifty_degrees 0.87266
49 
50 #define BANG_BANG false
51 
52 #include "cartpole.h"
53 
54 
55 
56 char modelfile[100];
57 
initCartPole(int markov_,int numPoles_,int maxsteps_)58 extern void initCartPole(int markov_, int numPoles_, int maxsteps_)
59 {
60   poleangle = 4.0156035;
61   longPoleAngle = poleangle * one_degree;
62   polelength = 0.1;
63   LENGTH_2 = polelength/2;
64   MASSPOLE_2 = polelength/10;
65   noise = 0.0;
66   markov = markov_;
67   numPoles = numPoles_;
68   maxsteps = maxsteps_;
69 
70   reset();
71 
72   episodes = 0;
73   totalIterations = 0;
74 
75 
76   //echoParams();
77 }
78 
79 
80 
getObservationDimension()81 extern unsigned int getObservationDimension()
82 {
83   unsigned int inputDimension = 0;
84 
85   switch (numPoles){
86   case 1:
87     if(!markov) {         //**********************
88       inputDimension = 3; // BIAS
89                           //**********************
90     }
91     else inputDimension = 5;
92     break;
93   case 2:
94     if(!markov) {
95       inputDimension = 4;
96     }
97     else inputDimension = 7;
98     break;
99   };
100 
101   return inputDimension;
102 }
103 
104 
105 /*
106 void echoParams(){
107   cout << "\nCart-pole environment settings: \n";
108   cout << "------------------------------\n";
109   cout << "Number of poles            : " << numPoles << "\n";
110   cout << "Length of short pole       : " << LENGTH_2 * 2 << " meters\n";
111   cout << "Initial angle of long pole : " << longPoleAngle/one_degree << " degrees\n";
112   cout << "Number of inputs           : " << getObservationDimension() << "\n";
113   if(markov)
114     cout << "Markov -- full state information." << endl;
115   else
116     cout << "Non-Markov -- no velocity information." << endl;
117   if(noise)
118     cout << "Percent sensor noise    : " << noise * 50 << "\n";
119   if(BANG_BANG)
120     cout << "BANG BANG control" << endl;
121 }
122 */
123 
reset()124 extern void reset()
125 {
126   dydx[0] = dydx[1] = dydx[2] = dydx[3] =  dydx[4] = dydx[5] = 0.0;
127   state[0] = state[1] = state[3] = state[4] = state[5] = 0;
128   state[2] = longPoleAngle;
129   steps = 0;
130   episodes++;
131 }
132 
133 
134 
135 
136 
137 
138 
139 #define one_over_256  0.0390625
step(double action,double * st,double * derivs)140 void step(double action, double *st, double *derivs)
141 {
142   double force;
143   double costheta_1, costheta_2=0.0;
144   double sintheta_1, sintheta_2=0.0;
145   double gsintheta_1,gsintheta_2=0.0;
146   double temp_1,temp_2=0.0;
147   double ml_1, ml_2;
148   double fi_1,fi_2=0.0;
149   double mi_1,mi_2=0.0;
150 
151   if(BANG_BANG){
152     if(action > 0.5) force = FORCE_MAG;
153     else force = -FORCE_MAG;
154   }
155   else
156     force =  (action /*- 0.5*/) * FORCE_MAG;
157     //force =  (action - 0.5) * FORCE_MAG2;
158   if(force > FORCE_MAG)
159     force = FORCE_MAG;
160   if(force < -FORCE_MAG)
161     force = -FORCE_MAG;
162 
163 
164   if((force >= 0) && (force < one_over_256))
165     force = one_over_256;
166   if((force < 0) && (force > -one_over_256))
167     force = -one_over_256;
168 
169 
170   costheta_1 = cos(st[2]);
171   sintheta_1 = sin(st[2]);
172   gsintheta_1 = GRAVITY * sintheta_1;
173   ml_1 = LENGTH_1 * MASSPOLE_1;
174   temp_1 = MUP * st[3] / ml_1;
175   fi_1 = (ml_1 * st[3] * st[3] * sintheta_1) +
176     (0.75 * MASSPOLE_1 * costheta_1 * (temp_1 + gsintheta_1));
177   mi_1 = MASSPOLE_1 * (1 - (0.75 * costheta_1 * costheta_1));
178 
179   if(numPoles > 1){
180     costheta_2 = cos(st[4]);
181     sintheta_2 = sin(st[4]);
182     gsintheta_2 = GRAVITY * sintheta_2;
183     ml_2 = LENGTH_2 * MASSPOLE_2;
184     temp_2 = MUP * st[5] / ml_2;
185     fi_2 = (ml_2 * st[5] * st[5] * sintheta_2) +
186       (0.75 * MASSPOLE_2 * costheta_2 * (temp_2 + gsintheta_2));
187     mi_2 = MASSPOLE_2 * (1 - (0.75 * costheta_2 * costheta_2));
188   }
189 
190   derivs[1] = (force + fi_1 + fi_2)
191 	/ (mi_1 + mi_2 + MASSCART);
192 
193   derivs[3] = -0.75 * (derivs[1] * costheta_1 + gsintheta_1 + temp_1)
194     / LENGTH_1;
195   if(numPoles > 1)
196     derivs[5] = -0.75 * (derivs[1] * costheta_2 + gsintheta_2 + temp_2)
197       / LENGTH_2;
198 
199 }
200 
rk4(double f,double y[],double dydx[],double yout[])201 void rk4(double f, double y[], double dydx[], double yout[])
202 {
203 
204 	int i;
205 
206 	double hh,h6,dym[6],dyt[6],yt[6];
207 	int vars = 3;
208 
209 	if(numPoles > 1)
210 	  vars = 5;
211 
212 	hh=TAU*0.5;
213 	h6=TAU/6.0;
214 	for (i=0;i<=vars;i++) yt[i]=y[i]+hh*dydx[i];
215 	step(f,yt,dyt);
216 	dyt[0] = yt[1];
217 	dyt[2] = yt[3];
218 	dyt[4] = yt[5];
219 	for (i=0;i<=vars;i++) yt[i]=y[i]+hh*dyt[i];
220 	step(f,yt,dym);
221 	dym[0] = yt[1];
222 	dym[2] = yt[3];
223 	dym[4] = yt[5];
224 	for (i=0;i<=vars;i++) {
225 	  yt[i]=y[i]+TAU*dym[i];
226 	  dym[i] += dyt[i];
227 	}
228 	step(f,yt,dyt);
229 	dyt[0] = yt[1];
230 	dyt[2] = yt[3];
231 	dyt[4] = yt[5];
232 	for (i=0;i<=vars;i++)
233 	  yout[i]=y[i]+h6*(dydx[i]+dyt[i]+2.0*dym[i]);
234 }
235 
236 
237 
getObservation(double * input)238 extern void getObservation(double * input)
239 {
240   // first is always bias, last is always cart position (state[0])
241   // pole angles are preceded by their dreivative in the markov case
242   switch(numPoles){
243     case 1:
244       if(markov){
245 	input[4] = (state[0] / 2.4-0.01)*10                + (rand() * noise - (noise/2));
246 	input[1] = (state[1] / 10.0-0.01)*10               + (rand() * noise - (noise/2));
247 	input[2] = state[2] / twelve_degrees     + (rand() * noise - (noise/2));
248 	input[3] = 10*state[3] / 5.0                + (rand() * noise - (noise/2));
249 	input[0] = BIAS;
250       }
251       else{
252 	input[2] = 10* (state[0] / 2.4-0.01)                + (rand() * noise - (noise/2));
253 	input[1] = state[2] / twelve_degrees     + (rand() * noise - (noise/2));
254 	input[0] = BIAS;
255       }
256       break;
257     case 2:
258       if(markov){
259 	input[6] = 10*(state[0] / 2.4-0.01)                + (rand() * noise - (noise/2));
260 	input[1] = 10*(state[1] / 10.0-0.01)               + (rand() * noise - (noise/2));
261 	input[2] = state[2] / twelve_degrees;//thirty_six_degrees + (rand() * noise - (noise/2));
262 	input[3] = 10*state[3] / 5.0                + (rand() * noise - (noise/2));
263 	input[4] = state[4] / thirty_six_degrees + (rand() * noise - (noise/2));
264 	input[5] = state[5] / 16.0               + (rand() * noise - (noise/2));
265 	input[0] = BIAS;
266       }
267       else{
268 
269 	input[3] = 10*(state[0] / 2.4-0.01)                + (rand() * noise - (noise/2));
270 	input[1] = state[2] / twelve_degrees               + (rand() * noise - (noise/2));
271 	input[2] = state[4] / 0.52               + (rand() * noise - (noise/2));
272 	input[0] = BIAS;
273 
274 
275       }
276       break;
277     };
278 }
279 
280 #define RK4 1
281 #define EULER_TAU (TAU/8)
doAction(double * output)282 extern void doAction(double * output)
283 {
284 
285   int i;
286   double tmpState[6];
287   double force;
288 
289   force = output[0];
290   /*random start state for long pole*/
291   /*state[2]= rand();   */
292 
293 
294   /*--- Apply action to the simulated cart-pole ---*/
295 
296   if(RK4){
297     dydx[0] = state[1];
298     dydx[2] = state[3];
299     dydx[4] = state[5];
300     step(force,state,dydx);
301     rk4(force,state,dydx,state);
302     for(i=0;i<6;++i)
303       tmpState[i] = state[i];
304     dydx[0] = state[1];
305     dydx[2] = state[3];
306     dydx[4] = state[5];
307     step(force,state,dydx);
308     rk4(force,state,dydx,state);
309   }
310   else{
311     for(i=0;i<16;++i){
312       step(output[0],state,dydx);
313       state[0] += EULER_TAU * state[1];
314       state[1] += EULER_TAU * dydx[1];
315       state[2] += EULER_TAU * state[3];
316       state[3] += EULER_TAU * dydx[3];
317       state[4] += EULER_TAU * state[5];
318       state[5] += EULER_TAU * dydx[5];
319     }
320   }
321 
322   steps++;
323   totalIterations++;
324 }
325 
326 
outsideBounds()327 bool outsideBounds()
328 {
329   double failureAngle;
330 
331   if(numPoles > 1){
332     failureAngle = thirty_six_degrees;
333     return
334       fabs(state[0]) > TRACK_SIZE       ||
335       fabs(state[2]) > failureAngle     ||
336       fabs(state[4]) > failureAngle;
337   }
338   else{
339     failureAngle = twelve_degrees;
340     return
341       fabs(state[0]) > TRACK_SIZE       ||
342       fabs(state[2]) > failureAngle;
343   }
344 }
345 
346 
347 
trialFinished()348 extern int trialFinished()
349 {
350   if(outsideBounds())
351     return 1;
352   if(steps > maxsteps) {
353     //cout << "SUCCESS in episode " << episodes << endl;
354     return 1;
355   }
356   return 0;
357 }
358 
359 
360 
getReward()361 double getReward()
362 {
363   if(outsideBounds())
364     return -1.0;
365   return 0.0;
366 }
367 
368 
369