1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <iostream>
4 #include <math.h>
5
6 using namespace std;
7
8 double noise;
9 int numPoles;
10 double longPoleAngle;
11 double poleangle;
12 bool markov;
13 double polelength;
14 double dydx[6];
15 double state[6];
16 int episodes;
17 int totalIterations;
18 int steps;
19 int maxsteps;
20
21
22 //////////////////////////////////////////////////////////////////////
23 //
24 // Double Pole physics
25 //
26 //////////////////////////////////////////////////////////////////////
27
28 static const double TRACK_SIZE = 2.4;
29 static const double MUP = 0.000002;
30 static const double MUC = 0.0005;
31 static const double GRAVITY = -9.8;
32 static const double MASSCART = 1.0;
33 static const double MASSPOLE_1 = 0.1;
34 double MASSPOLE_2 = 0.01;
35 static const double LENGTH_1 = 0.5; // actually half the pole's length
36 double LENGTH_2 = 0.05;
37 static const double FORCE_MAG = 10.0; //magnitude of max force
38 static const double FORCE_MAG2 = 20.0; //magnitude of max force times 2
39 static const double TAU = 0.01; //seconds between state updates
40
41 #define BIAS 0.01
42 #define one_degree 0.0174532 /* 2pi/360 */
43 #define six_degrees 0.1047192
44 #define twelve_degrees 0.2094384
45 #define fifteen_degrees 0.2617993
46 #define thirty_six_degrees 0.628329
47 #define degrees64 1.2566580
48 #define fifty_degrees 0.87266
49
50 #define BANG_BANG false
51
52 #include "cartpole.h"
53
54
55
56 char modelfile[100];
57
initCartPole(int markov_,int numPoles_,int maxsteps_)58 extern void initCartPole(int markov_, int numPoles_, int maxsteps_)
59 {
60 poleangle = 4.0156035;
61 longPoleAngle = poleangle * one_degree;
62 polelength = 0.1;
63 LENGTH_2 = polelength/2;
64 MASSPOLE_2 = polelength/10;
65 noise = 0.0;
66 markov = markov_;
67 numPoles = numPoles_;
68 maxsteps = maxsteps_;
69
70 reset();
71
72 episodes = 0;
73 totalIterations = 0;
74
75
76 //echoParams();
77 }
78
79
80
getObservationDimension()81 extern unsigned int getObservationDimension()
82 {
83 unsigned int inputDimension = 0;
84
85 switch (numPoles){
86 case 1:
87 if(!markov) { //**********************
88 inputDimension = 3; // BIAS
89 //**********************
90 }
91 else inputDimension = 5;
92 break;
93 case 2:
94 if(!markov) {
95 inputDimension = 4;
96 }
97 else inputDimension = 7;
98 break;
99 };
100
101 return inputDimension;
102 }
103
104
105 /*
106 void echoParams(){
107 cout << "\nCart-pole environment settings: \n";
108 cout << "------------------------------\n";
109 cout << "Number of poles : " << numPoles << "\n";
110 cout << "Length of short pole : " << LENGTH_2 * 2 << " meters\n";
111 cout << "Initial angle of long pole : " << longPoleAngle/one_degree << " degrees\n";
112 cout << "Number of inputs : " << getObservationDimension() << "\n";
113 if(markov)
114 cout << "Markov -- full state information." << endl;
115 else
116 cout << "Non-Markov -- no velocity information." << endl;
117 if(noise)
118 cout << "Percent sensor noise : " << noise * 50 << "\n";
119 if(BANG_BANG)
120 cout << "BANG BANG control" << endl;
121 }
122 */
123
reset()124 extern void reset()
125 {
126 dydx[0] = dydx[1] = dydx[2] = dydx[3] = dydx[4] = dydx[5] = 0.0;
127 state[0] = state[1] = state[3] = state[4] = state[5] = 0;
128 state[2] = longPoleAngle;
129 steps = 0;
130 episodes++;
131 }
132
133
134
135
136
137
138
139 #define one_over_256 0.0390625
step(double action,double * st,double * derivs)140 void step(double action, double *st, double *derivs)
141 {
142 double force;
143 double costheta_1, costheta_2=0.0;
144 double sintheta_1, sintheta_2=0.0;
145 double gsintheta_1,gsintheta_2=0.0;
146 double temp_1,temp_2=0.0;
147 double ml_1, ml_2;
148 double fi_1,fi_2=0.0;
149 double mi_1,mi_2=0.0;
150
151 if(BANG_BANG){
152 if(action > 0.5) force = FORCE_MAG;
153 else force = -FORCE_MAG;
154 }
155 else
156 force = (action /*- 0.5*/) * FORCE_MAG;
157 //force = (action - 0.5) * FORCE_MAG2;
158 if(force > FORCE_MAG)
159 force = FORCE_MAG;
160 if(force < -FORCE_MAG)
161 force = -FORCE_MAG;
162
163
164 if((force >= 0) && (force < one_over_256))
165 force = one_over_256;
166 if((force < 0) && (force > -one_over_256))
167 force = -one_over_256;
168
169
170 costheta_1 = cos(st[2]);
171 sintheta_1 = sin(st[2]);
172 gsintheta_1 = GRAVITY * sintheta_1;
173 ml_1 = LENGTH_1 * MASSPOLE_1;
174 temp_1 = MUP * st[3] / ml_1;
175 fi_1 = (ml_1 * st[3] * st[3] * sintheta_1) +
176 (0.75 * MASSPOLE_1 * costheta_1 * (temp_1 + gsintheta_1));
177 mi_1 = MASSPOLE_1 * (1 - (0.75 * costheta_1 * costheta_1));
178
179 if(numPoles > 1){
180 costheta_2 = cos(st[4]);
181 sintheta_2 = sin(st[4]);
182 gsintheta_2 = GRAVITY * sintheta_2;
183 ml_2 = LENGTH_2 * MASSPOLE_2;
184 temp_2 = MUP * st[5] / ml_2;
185 fi_2 = (ml_2 * st[5] * st[5] * sintheta_2) +
186 (0.75 * MASSPOLE_2 * costheta_2 * (temp_2 + gsintheta_2));
187 mi_2 = MASSPOLE_2 * (1 - (0.75 * costheta_2 * costheta_2));
188 }
189
190 derivs[1] = (force + fi_1 + fi_2)
191 / (mi_1 + mi_2 + MASSCART);
192
193 derivs[3] = -0.75 * (derivs[1] * costheta_1 + gsintheta_1 + temp_1)
194 / LENGTH_1;
195 if(numPoles > 1)
196 derivs[5] = -0.75 * (derivs[1] * costheta_2 + gsintheta_2 + temp_2)
197 / LENGTH_2;
198
199 }
200
rk4(double f,double y[],double dydx[],double yout[])201 void rk4(double f, double y[], double dydx[], double yout[])
202 {
203
204 int i;
205
206 double hh,h6,dym[6],dyt[6],yt[6];
207 int vars = 3;
208
209 if(numPoles > 1)
210 vars = 5;
211
212 hh=TAU*0.5;
213 h6=TAU/6.0;
214 for (i=0;i<=vars;i++) yt[i]=y[i]+hh*dydx[i];
215 step(f,yt,dyt);
216 dyt[0] = yt[1];
217 dyt[2] = yt[3];
218 dyt[4] = yt[5];
219 for (i=0;i<=vars;i++) yt[i]=y[i]+hh*dyt[i];
220 step(f,yt,dym);
221 dym[0] = yt[1];
222 dym[2] = yt[3];
223 dym[4] = yt[5];
224 for (i=0;i<=vars;i++) {
225 yt[i]=y[i]+TAU*dym[i];
226 dym[i] += dyt[i];
227 }
228 step(f,yt,dyt);
229 dyt[0] = yt[1];
230 dyt[2] = yt[3];
231 dyt[4] = yt[5];
232 for (i=0;i<=vars;i++)
233 yout[i]=y[i]+h6*(dydx[i]+dyt[i]+2.0*dym[i]);
234 }
235
236
237
getObservation(double * input)238 extern void getObservation(double * input)
239 {
240 // first is always bias, last is always cart position (state[0])
241 // pole angles are preceded by their dreivative in the markov case
242 switch(numPoles){
243 case 1:
244 if(markov){
245 input[4] = (state[0] / 2.4-0.01)*10 + (rand() * noise - (noise/2));
246 input[1] = (state[1] / 10.0-0.01)*10 + (rand() * noise - (noise/2));
247 input[2] = state[2] / twelve_degrees + (rand() * noise - (noise/2));
248 input[3] = 10*state[3] / 5.0 + (rand() * noise - (noise/2));
249 input[0] = BIAS;
250 }
251 else{
252 input[2] = 10* (state[0] / 2.4-0.01) + (rand() * noise - (noise/2));
253 input[1] = state[2] / twelve_degrees + (rand() * noise - (noise/2));
254 input[0] = BIAS;
255 }
256 break;
257 case 2:
258 if(markov){
259 input[6] = 10*(state[0] / 2.4-0.01) + (rand() * noise - (noise/2));
260 input[1] = 10*(state[1] / 10.0-0.01) + (rand() * noise - (noise/2));
261 input[2] = state[2] / twelve_degrees;//thirty_six_degrees + (rand() * noise - (noise/2));
262 input[3] = 10*state[3] / 5.0 + (rand() * noise - (noise/2));
263 input[4] = state[4] / thirty_six_degrees + (rand() * noise - (noise/2));
264 input[5] = state[5] / 16.0 + (rand() * noise - (noise/2));
265 input[0] = BIAS;
266 }
267 else{
268
269 input[3] = 10*(state[0] / 2.4-0.01) + (rand() * noise - (noise/2));
270 input[1] = state[2] / twelve_degrees + (rand() * noise - (noise/2));
271 input[2] = state[4] / 0.52 + (rand() * noise - (noise/2));
272 input[0] = BIAS;
273
274
275 }
276 break;
277 };
278 }
279
280 #define RK4 1
281 #define EULER_TAU (TAU/8)
doAction(double * output)282 extern void doAction(double * output)
283 {
284
285 int i;
286 double tmpState[6];
287 double force;
288
289 force = output[0];
290 /*random start state for long pole*/
291 /*state[2]= rand(); */
292
293
294 /*--- Apply action to the simulated cart-pole ---*/
295
296 if(RK4){
297 dydx[0] = state[1];
298 dydx[2] = state[3];
299 dydx[4] = state[5];
300 step(force,state,dydx);
301 rk4(force,state,dydx,state);
302 for(i=0;i<6;++i)
303 tmpState[i] = state[i];
304 dydx[0] = state[1];
305 dydx[2] = state[3];
306 dydx[4] = state[5];
307 step(force,state,dydx);
308 rk4(force,state,dydx,state);
309 }
310 else{
311 for(i=0;i<16;++i){
312 step(output[0],state,dydx);
313 state[0] += EULER_TAU * state[1];
314 state[1] += EULER_TAU * dydx[1];
315 state[2] += EULER_TAU * state[3];
316 state[3] += EULER_TAU * dydx[3];
317 state[4] += EULER_TAU * state[5];
318 state[5] += EULER_TAU * dydx[5];
319 }
320 }
321
322 steps++;
323 totalIterations++;
324 }
325
326
outsideBounds()327 bool outsideBounds()
328 {
329 double failureAngle;
330
331 if(numPoles > 1){
332 failureAngle = thirty_six_degrees;
333 return
334 fabs(state[0]) > TRACK_SIZE ||
335 fabs(state[2]) > failureAngle ||
336 fabs(state[4]) > failureAngle;
337 }
338 else{
339 failureAngle = twelve_degrees;
340 return
341 fabs(state[0]) > TRACK_SIZE ||
342 fabs(state[2]) > failureAngle;
343 }
344 }
345
346
347
trialFinished()348 extern int trialFinished()
349 {
350 if(outsideBounds())
351 return 1;
352 if(steps > maxsteps) {
353 //cout << "SUCCESS in episode " << episodes << endl;
354 return 1;
355 }
356 return 0;
357 }
358
359
360
getReward()361 double getReward()
362 {
363 if(outsideBounds())
364 return -1.0;
365 return 0.0;
366 }
367
368
369