1 /* -------------------------------------------------------------------------- *
2 * Simbody(tm) *
3 * -------------------------------------------------------------------------- *
4 * This is part of the SimTK biosimulation toolkit originating from *
5 * Simbios, the NIH National Center for Physics-Based Simulation of *
6 * Biological Structures at Stanford, funded under the NIH Roadmap for *
7 * Medical Research, grant U54 GM072970. See https://simtk.org/home/simbody. *
8 * *
9 * Portions copyright (c) 2011-15 Stanford University and the Authors. *
10 * Authors: Peter Eastman *
11 * Contributors: Michael Sherman *
12 * *
13 * Licensed under the Apache License, Version 2.0 (the "License"); you may *
14 * not use this file except in compliance with the License. You may obtain a *
15 * copy of the License at http://www.apache.org/licenses/LICENSE-2.0. *
16 * *
17 * Unless required by applicable law or agreed to in writing, software *
18 * distributed under the License is distributed on an "AS IS" BASIS, *
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
20 * See the License for the specific language governing permissions and *
21 * limitations under the License. *
22 * -------------------------------------------------------------------------- */
23
24 #include "SimTKsimbody.h"
25 #include <string>
26 #include <ctime>
27
28 using std::cout;
29 using std::endl;
30 using std::string;
31
32 #ifdef _MSC_VER
33 #pragma warning(disable:4996) // don't warn about strerror, sprintf, etc.
34 #endif
35
36 using namespace SimTK;
37
38 /**
39 * This test measures the speed of various multibody calculations. It executes a collection of operations
40 * on various systems. The CPU time required to perform each operation 1000 times is measured and
41 * printed to the console.
42 *
43 * Each test system contains 256 identical bodies (plus ground), but they differ in the type of
44 * bodies and their arrangement into a multibody tree. The arrangements include 1) all bodies attached
45 * directly to ground, 2) the bodies linked in a single chain, and 3) the bodies arranged to form
46 * a binary tree.
47 */
48
49 // The following routines define the operations to be profiled.
50
doRealizeTime(MultibodySystem & system,State & state)51 void doRealizeTime(MultibodySystem& system, State& state) {
52 state.invalidateAllCacheAtOrAbove(Stage::Time);
53 system.realize(state, Stage::Time);
54 }
55
doRealizePosition(MultibodySystem & system,State & state)56 void doRealizePosition(MultibodySystem& system, State& state) {
57 state.invalidateAllCacheAtOrAbove(Stage::Position);
58 system.realize(state, Stage::Position);
59 }
60
doRealizeVelocity(MultibodySystem & system,State & state)61 void doRealizeVelocity(MultibodySystem& system, State& state) {
62 state.invalidateAllCacheAtOrAbove(Stage::Velocity);
63 system.realize(state, Stage::Velocity);
64 }
65
doRealizePositionKinematics(MultibodySystem & system,State & state)66 void doRealizePositionKinematics(MultibodySystem& system, State& state) {
67 const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
68 matter.invalidatePositionKinematics(state);
69 matter.realizePositionKinematics(state);
70 }
71
doRealizeVelocityKinematics(MultibodySystem & system,State & state)72 void doRealizeVelocityKinematics(MultibodySystem& system, State& state) {
73 const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
74 matter.invalidateVelocityKinematics(state);
75 matter.realizeVelocityKinematics(state);
76 }
77
doRealizeArticulatedBodyInertias(MultibodySystem & system,State & state)78 void doRealizeArticulatedBodyInertias(MultibodySystem& system, State& state) {
79 const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
80 matter.invalidateArticulatedBodyInertias(state);
81 matter.realizeArticulatedBodyInertias(state);
82 }
83
doRealizeArticulatedBodyVelocity(MultibodySystem & system,State & state)84 void doRealizeArticulatedBodyVelocity(MultibodySystem& system, State& state) {
85 const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
86 matter.invalidateArticulatedBodyVelocity(state);
87 matter.realizeArticulatedBodyVelocity(state);
88 }
89
doRealizeDynamics(MultibodySystem & system,State & state)90 void doRealizeDynamics(MultibodySystem& system, State& state) {
91 state.invalidateAllCacheAtOrAbove(Stage::Dynamics);
92 system.realize(state, Stage::Dynamics);
93 }
94
doRealizeAcceleration(MultibodySystem & system,State & state)95 void doRealizeAcceleration(MultibodySystem& system, State& state) {
96 state.invalidateAllCacheAtOrAbove(Stage::Acceleration);
97 system.realize(state, Stage::Acceleration);
98 }
99
100 // Cost to re-evaluate accelerations after applying some new forces, but leaving
101 // the state variables unchanged.
doRealizeDynamics2Acceleration(MultibodySystem & system,State & state)102 void doRealizeDynamics2Acceleration(MultibodySystem& system, State& state) {
103 state.invalidateAllCacheAtOrAbove(Stage::Dynamics);
104 system.realize(state, Stage::Acceleration);
105 }
106
107 // Cost to re-evaluate accelerations after updating velocities, but leaving
108 // the positions unchanged (e.g. semi-implicit Euler iterating velocities).
doRealizeVelocity2Acceleration(MultibodySystem & system,State & state)109 void doRealizeVelocity2Acceleration(MultibodySystem& system, State& state) {
110 state.updU(); // should invalidate velocity kinematics
111 state.invalidateAllCacheAtOrAbove(Stage::Velocity);
112 system.realize(state, Stage::Acceleration);
113 }
114
115 // Cost for a complete acceleration calculation at a new time and state.
116 // This includes the cost of articulated body inertias.
doRealizeTime2Acceleration(MultibodySystem & system,State & state)117 void doRealizeTime2Acceleration(MultibodySystem& system, State& state) {
118 state.updQ(); // should invalidate position & velocity kinematics
119 state.invalidateAllCacheAtOrAbove(Stage::Time);
120 system.realize(state, Stage::Acceleration);
121 }
122
doMultiplyByM(MultibodySystem & system,State & state)123 void doMultiplyByM(MultibodySystem& system, State& state) {
124 const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
125 Vector v(matter.getNumMobilities(), 1.0);
126 Vector mv;
127 matter.multiplyByM(state, v, mv);
128 }
129
doMultiplyByMInv(MultibodySystem & system,State & state)130 void doMultiplyByMInv(MultibodySystem& system, State& state) {
131 const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
132 Vector v(matter.getNumMobilities(), 1.0);
133 Vector minvv;
134 matter.multiplyByMInv(state, v, minvv);
135 }
136
doCalcResidualForceIgnoringConstraints(MultibodySystem & system,State & state)137 void doCalcResidualForceIgnoringConstraints(MultibodySystem& system, State& state) {
138 const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
139 Vector appliedMobilityForces(matter.getNumMobilities(), 1.0);
140 Vector_<SpatialVec> appliedBodyForces(matter.getNumBodies(), SpatialVec(Vec3(1, 0, 0), Vec3(0, 1, 0)));
141 Vector knownUdot, residualMobilityForces;
142 matter.calcResidualForceIgnoringConstraints(state, appliedMobilityForces, appliedBodyForces, knownUdot, residualMobilityForces);
143 }
144
doCalcMobilizerReactionForces(MultibodySystem & system,State & state)145 void doCalcMobilizerReactionForces(MultibodySystem& system, State& state) {
146 Vector_<SpatialVec> forces;
147 system.getMatterSubsystem().calcMobilizerReactionForces(state, forces);
148 }
149
doMultiplyBySystemJacobianTranspose(MultibodySystem & system,State & state)150 void doMultiplyBySystemJacobianTranspose(MultibodySystem& system, State& state) {
151 const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
152 Vector_<SpatialVec> dEdR(matter.getNumBodies(), SpatialVec(Vec3(1, 0, 0), Vec3(0, 1, 0)));
153 Vector dEdQ;
154 matter.multiplyBySystemJacobianTranspose(state, dEdR, dEdQ);
155 }
156
doCalcCompositeBodyInertias(MultibodySystem & system,State & state)157 void doCalcCompositeBodyInertias(MultibodySystem& system, State& state) {
158 Array_<SpatialInertia, MobilizedBodyIndex> r;
159 system.getMatterSubsystem().calcCompositeBodyInertias(state, r);
160 }
161
162 static double flopTimeInNs;
163
164 /**
165 * Time how long it takes to perform an operation 1000 times. The test is repeated 5 times,
166 * and the average is returned. The return value represents CPU time, *not* clock time.
167 */
timeComputation(MultibodySystem & system,void function (MultibodySystem & system,State & state),const string & name,int iterations,bool useEulerAngles)168 void timeComputation(MultibodySystem& system, void function(MultibodySystem& system, State& state),
169 const string& name, int iterations, bool useEulerAngles) {
170 const int repeats = 3;
171 Vector cpuTimes(repeats);
172 State state = system.getDefaultState();
173 if (useEulerAngles) {
174 system.getMatterSubsystem().setUseEulerAngles(state, true);
175 system.realizeModel(state);
176 }
177 system.realize(state, Stage::Acceleration);
178
179 const int ndof = system.getMatterSubsystem().getNumMobilities();
180 const int nmovbod = system.getMatterSubsystem().getNumBodies()-1; // not Ground
181
182 // Repeatedly measure the CPU time for performing the operation 1000 times.
183
184 for (int i = 0; i < repeats; i++) {
185 double startCpu = threadCpuTime();
186 for (int j = 0; j < iterations; j++)
187 function(system, state);
188 double endCpu = threadCpuTime();
189 cpuTimes[i] = Real(endCpu-startCpu);
190 }
191
192 double timePerIterUs = mean(cpuTimes)*1000000/iterations; // us
193 double flopTimeUs = flopTimeInNs / 1000;
194 double flopTimePerIter = timePerIterUs/flopTimeUs;
195 std::printf("%40s:%6.4gus -> %4d flp/dof, %4d flp/bod\n",
196 name.c_str(), timePerIterUs, (int)(flopTimePerIter/ndof),
197 (int)(flopTimePerIter/nmovbod));
198 }
199
200 /**
201 * Time all the different calculations for one system.
202 */
runAllTests(MultibodySystem & system,bool useEulerAngles=false)203 void runAllTests(MultibodySystem& system, bool useEulerAngles=false) {
204 std::cout << "# dofs=" << system.getMatterSubsystem().getNumMobilities() << "\n";
205 timeComputation(system, doRealizeTime, "realizeTime", 5000, useEulerAngles);
206 timeComputation(system, doRealizePositionKinematics, "realizePositionKinematics", 5000, useEulerAngles);
207 timeComputation(system, doRealizePosition, "realizePosition", 5000, useEulerAngles);
208 timeComputation(system, doRealizeVelocityKinematics, "realizeVelocityKinematics", 5000, useEulerAngles);
209 timeComputation(system, doRealizeVelocity, "realizeVelocity", 5000, useEulerAngles);
210 timeComputation(system, doRealizeArticulatedBodyInertias, "doRealizeArticulatedBodyInertias", 3000, useEulerAngles);
211 timeComputation(system, doRealizeArticulatedBodyVelocity, "doRealizeArticulatedBodyVelocity", 5000, useEulerAngles);
212 timeComputation(system, doRealizeDynamics, "realizeDynamics", 5000, useEulerAngles);
213 timeComputation(system, doRealizeAcceleration, "realizeAcceleration", 5000, useEulerAngles);
214 timeComputation(system, doRealizeDynamics2Acceleration, "doRealizeDynamics2Acceleration", 5000, useEulerAngles);
215 timeComputation(system, doRealizeVelocity2Acceleration, "realizeVelocity2Acceleration", 3000, useEulerAngles);
216 timeComputation(system, doRealizeTime2Acceleration, "realizeTime2Acceleration", 2000, useEulerAngles);
217 timeComputation(system, doMultiplyByM, "multiplyByM", 5000, useEulerAngles);
218 timeComputation(system, doMultiplyByMInv, "multiplyByMInv", 5000, useEulerAngles);
219 timeComputation(system, doCalcResidualForceIgnoringConstraints, "calcResidualForceIgnoringConstraints", 5000, useEulerAngles);
220 timeComputation(system, doCalcMobilizerReactionForces, "calcMobilizerReactionForces", 1000, useEulerAngles);
221 timeComputation(system, doMultiplyBySystemJacobianTranspose, "multiplyBySystemJacobianTranspose", 5000, useEulerAngles);
222 timeComputation(system, doCalcCompositeBodyInertias, "calcCompositeBodyInertias", 5000, useEulerAngles);
223 }
224
225 // The following routines create the systems to be profiled.
226
createParticles(MultibodySystem & system)227 void createParticles(MultibodySystem& system) {
228 SimbodyMatterSubsystem matter(system);
229 Body::Rigid body;
230 for (int i = 0; i < 256; i++)
231 MobilizedBody::Translation next(matter.updGround(), Vec3(1, 0, 0), body, Vec3(0));
232 system.realizeTopology();
233 }
234
createFreeBodies(MultibodySystem & system)235 void createFreeBodies(MultibodySystem& system) {
236 SimbodyMatterSubsystem matter(system);
237 Body::Rigid body;
238 for (int i = 0; i < 256; i++)
239 MobilizedBody::Free next(matter.updGround(), Vec3(1, 0, 0), body, Vec3(0));
240 system.realizeTopology();
241 }
242
createPinChain(MultibodySystem & system)243 void createPinChain(MultibodySystem& system) {
244 SimbodyMatterSubsystem matter(system);
245 Body::Rigid body;
246 MobilizedBody last = matter.updGround();
247 for (int i = 0; i < 256; i++) {
248 MobilizedBody::Pin next(last, Vec3(1, 0, 0), body, Vec3(0));
249 last = next;
250 }
251 system.realizeTopology();
252 }
253
createSliderChain(MultibodySystem & system)254 void createSliderChain(MultibodySystem& system) {
255 SimbodyMatterSubsystem matter(system);
256 Body::Rigid body;
257 MobilizedBody last = matter.updGround();
258 for (int i = 0; i < 256; i++) {
259 MobilizedBody::Slider next(last, Vec3(1, 0, 0), body, Vec3(0));
260 last = next;
261 }
262 system.realizeTopology();
263 }
264
createBallChain(MultibodySystem & system)265 void createBallChain(MultibodySystem& system) {
266 SimbodyMatterSubsystem matter(system);
267 Body::Rigid body;
268 MobilizedBody last = matter.updGround();
269 for (int i = 0; i < 256; i++) {
270 MobilizedBody::Ball next(last, Vec3(1, 0, 0), body, Vec3(0));
271 last = next;
272 }
273 system.realizeTopology();
274 }
275
276
createGimbalChain(MultibodySystem & system)277 void createGimbalChain(MultibodySystem& system) {
278 SimbodyMatterSubsystem matter(system);
279 Body::Rigid body;
280 MobilizedBody last = matter.updGround();
281 for (int i = 0; i < 256; i++) {
282 MobilizedBody::Gimbal next(last, Vec3(1, 0, 0), body, Vec3(0));
283 last = next;
284 }
285 system.realizeTopology();
286 }
287
createPinTree(MultibodySystem & system)288 void createPinTree(MultibodySystem& system) {
289 SimbodyMatterSubsystem matter(system);
290 Body::Rigid body;
291 for (int i = 0; i < 256; i++) {
292 MobilizedBody& parent = matter.updMobilizedBody(MobilizedBodyIndex(i/2));
293 MobilizedBody::Pin next(parent, Vec3(1, 0, 0), body, Vec3(0));
294 }
295 system.realizeTopology();
296 }
297
createBallTree(MultibodySystem & system)298 void createBallTree(MultibodySystem& system) {
299 SimbodyMatterSubsystem matter(system);
300 Body::Rigid body;
301 for (int i = 0; i < 256; i++) {
302 MobilizedBody& parent = matter.updMobilizedBody(MobilizedBodyIndex(i/2));
303 MobilizedBody::Ball next(parent, Vec3(1, 0, 0), body, Vec3(0));
304 }
305 system.realizeTopology();
306 }
307
308 static int tenInts[10];
309 static Real tenReals[10];
310 // These should multiply out to about 1.
311 static Real tenMults[10] =
312 {Real(0.501),Real(0.2501),Real(0.201),Real(0.101),Real(1.000000001),
313 Real(1/1.000000002),Real(1/.101), Real(1/.201), Real(1/.2501), Real(1/.501)};
testFunctions(double & flopTime,bool flopTimeOnly=false)314 void testFunctions(double& flopTime, bool flopTimeOnly=false) {
315 Real addRes=1,subRes=1,mulRes=1,divRes=1,sqrtRes=1,oosqrtRes=1,
316 sinRes=1,cosRes=1,atan2Res=1,logRes=1,expRes=1;
317 int intAddRes=1;
318 Random::Uniform rand; rand.setMin(-5); rand.setMax(5);
319 for (int i=0; i<10; i++) tenInts[i] = rand.getIntValue();
320 for (int i=0; i<10; i++) tenReals[i] = rand.getValue();
321
322 double tprev = threadCpuTime();
323 for (int i = 0; i < 10*100000000; i++) {
324 intAddRes += tenInts[0];
325 intAddRes -= tenInts[1];
326 intAddRes += tenInts[2];
327 intAddRes -= tenInts[3];
328 intAddRes += tenInts[4];
329 intAddRes -= tenInts[5];
330 intAddRes += tenInts[6];
331 intAddRes -= tenInts[7];
332 intAddRes += tenInts[8];
333 intAddRes -= tenInts[9];
334 }
335 double t = threadCpuTime(); double intAddTime = (t-tprev)/10; // time for 1e9 ops
336 printf("intAdd %gs\n", t-tprev);
337 tprev = threadCpuTime();
338 for (int i = 0; i < 5*100000000; i++) {
339 addRes += Real(1.1);
340 addRes += Real(1.2);
341 addRes += Real(1.3);
342 addRes += Real(1.4);
343 addRes += Real(1.501);
344 addRes += Real(1.6);
345 addRes += Real(1.7);
346 addRes += Real(1.8);
347 addRes += Real(1.9);
348 addRes += Real(2.007);
349 }
350 t = threadCpuTime(); double addTime = (t-tprev)/5;
351 printf("add %gs\n", t-tprev);
352 tprev = threadCpuTime();
353 for (int i = 0; i < 5*100000000; i++) {
354 subRes -= Real(1.1);
355 subRes -= Real(1.2);
356 subRes -= Real(1.3);
357 subRes -= Real(1.4);
358 subRes -= Real(1.501);
359 subRes -= Real(1.6);
360 subRes -= Real(1.7);
361 subRes -= Real(1.8);
362 subRes -= Real(1.9);
363 subRes -= Real(2.007);
364 }
365 t = threadCpuTime(); double subTime = (t-tprev)/5;
366 printf("sub %gs\n", t-tprev);
367 tprev = threadCpuTime();
368 for (int i = 0; i < 3*100000000; i++) {
369 mulRes *= Real(0.501);
370 mulRes *= Real(0.2501);
371 mulRes *= Real(0.201);
372 mulRes *= Real(0.101);
373 mulRes *= Real(1.000000001);
374 mulRes *= Real(1/1.000000002); // done at compile time
375 mulRes *= Real(1/.101);
376 mulRes *= Real(1/.201);
377 mulRes *= Real(1/.2501);
378 mulRes *= Real(1/.501);
379 }
380 t = threadCpuTime(); double mulTime=(t-tprev)/3;
381 printf("mul %gs\n", t-tprev);
382 flopTime = (addTime+mulTime)/2;
383 std::cout << "1 flop=avg(add,mul)=" << flopTime << "ns\n";
384 if (flopTimeOnly)
385 return;
386
387 tprev = threadCpuTime();
388 for (int i = 0; i < 100000000; i++) {
389 divRes /= tenMults[7];
390 divRes /= tenMults[9];
391 divRes /= tenMults[8];
392 divRes /= tenMults[6];
393 divRes /= tenMults[2];
394 divRes /= tenMults[3];
395 divRes /= tenMults[1];
396 divRes /= tenMults[0];
397 divRes /= tenMults[4];
398 divRes /= tenMults[5];
399 // prevent clever optimization VC10 did to turn divides
400 // into multiplies.
401 tenMults[i%10] = Real(tenMults[i%10]*1.0000000000001);
402 tenMults[(i+5)%10] = Real(tenMults[(i+5)%10]*0.9999999999999);
403 }
404 t = threadCpuTime(); double divTime=(t-tprev);
405 printf("div %gs\n", t-tprev);
406 tprev = threadCpuTime();
407
408 for (int i = 0; i < 100000000/2; i++) {
409 const Real ir = (Real)i;
410 sqrtRes += std::sqrt(ir+(Real)0.001); // two adds
411 sqrtRes += std::sqrt(ir+(Real)0.1);
412 sqrtRes += std::sqrt(ir+(Real)0.2);
413 sqrtRes += std::sqrt(ir+(Real)0.3);
414 sqrtRes += std::sqrt(ir+(Real)0.4);
415 sqrtRes += std::sqrt(ir+(Real)0.501);
416 sqrtRes += std::sqrt(ir+(Real)0.6);
417 sqrtRes += std::sqrt(ir+(Real)0.7);
418 sqrtRes += std::sqrt(ir+(Real)0.8);
419 sqrtRes += std::sqrt(ir+(Real)0.9);
420 }
421 t = threadCpuTime(); double sqrtTime=2*(t-tprev)-2*addTime;
422 printf("sqrt %gs\n", t-tprev);
423 tprev = threadCpuTime();
424 for (int i = 0; i < 100000000/4; i++) {
425 const Real ir = (Real)i;
426 oosqrtRes += 1/std::sqrt(ir+(Real)0.001); // two adds
427 oosqrtRes += 1/std::sqrt(ir+(Real)0.1);
428 oosqrtRes += 1/std::sqrt(ir+(Real)0.2);
429 oosqrtRes += 1/std::sqrt(ir+(Real)0.3);
430 oosqrtRes += 1/std::sqrt(ir+(Real)0.4);
431 oosqrtRes += 1/std::sqrt(ir+(Real)0.501);
432 oosqrtRes += 1/std::sqrt(ir+(Real)0.6);
433 oosqrtRes += 1/std::sqrt(ir+(Real)0.7);
434 oosqrtRes += 1/std::sqrt(ir+(Real)0.8);
435 oosqrtRes += 1/std::sqrt(ir+(Real)0.9);
436 }
437 t = threadCpuTime(); double oosqrtTime=4*(t-tprev)-2*addTime;
438 printf("1/sqrt %gs\n", t-tprev);
439 tprev = threadCpuTime();
440 for (int i = 0; i < 100000000/5; i++) {
441 const Real ir = (Real)i;
442 logRes += std::log(ir+(Real)0.001); // two adds
443 logRes += std::log(ir+(Real)0.1);
444 logRes += std::log(ir+(Real)0.2);
445 logRes += std::log(ir+(Real)0.3);
446 logRes += std::log(ir+(Real)0.4);
447 logRes += std::log(ir+(Real)0.501);
448 logRes += std::log(ir+(Real)0.6);
449 logRes += std::log(ir+(Real)0.7);
450 logRes += std::log(ir+(Real)0.8);
451 logRes += std::log(ir+(Real)0.9);
452 }
453 t = threadCpuTime(); double logTime=(t-tprev)*5-2*addTime;
454 printf("log %gs\n", t-tprev);
455 tprev = threadCpuTime();
456 for (int i = 0; i < 100000000/5; i++) {
457 const Real ir = (Real).000000001*(Real)i;
458 expRes += std::exp(ir+(Real)0.001); // two adds
459 expRes += std::exp(ir+(Real)0.1);
460 expRes += std::exp(ir+(Real)0.2);
461 expRes += std::exp(ir+(Real)0.3);
462 expRes += std::exp(ir+(Real)0.4);
463 expRes += std::exp(ir+(Real)0.501);
464 expRes += std::exp(ir+(Real)0.6);
465 expRes += std::exp(ir+(Real)0.7);
466 expRes += std::exp(ir+(Real)0.8);
467 expRes += std::exp(ir+(Real)0.9);
468 }
469 t = threadCpuTime(); double expTime=(t-tprev)*5-2*addTime;
470 printf("exp %gs\n", t-tprev);
471 tprev = threadCpuTime();
472 for (int i = 0; i < 100000000/10; i++) {
473 const Real ir = (Real)i;
474 sinRes += std::sin(ir+(Real)0.001); // two adds
475 sinRes += std::sin(ir+(Real)0.1);
476 sinRes += std::sin(ir+(Real)0.2);
477 sinRes += std::sin(ir+(Real)0.3);
478 sinRes += std::sin(ir+(Real)0.4);
479 sinRes += std::sin(ir+(Real)0.501);
480 sinRes += std::sin(ir+(Real)0.6);
481 sinRes += std::sin(ir+(Real)0.7);
482 sinRes += std::sin(ir+(Real)0.8);
483 sinRes += std::sin(ir+(Real)0.9);
484 }
485 t = threadCpuTime(); double sinTime=(t-tprev)*10-2*addTime;
486 printf("sin %gs\n", t-tprev);
487 tprev = threadCpuTime();
488 for (int i = 0; i < 100000000/10; i++) {
489 const Real ir = (Real)i;
490 cosRes += std::cos(ir+(Real)0.001); // two adds
491 cosRes += std::cos(ir+(Real)0.1);
492 cosRes += std::cos(ir+(Real)0.2);
493 cosRes += std::cos(ir+(Real)0.3);
494 cosRes += std::cos(ir+(Real)0.4);
495 cosRes += std::cos(ir+(Real)0.501);
496 cosRes += std::cos(ir+(Real)0.6);
497 cosRes += std::cos(ir+(Real)0.7);
498 cosRes += std::cos(ir+(Real)0.8);
499 cosRes += std::cos(ir+(Real)0.9);
500 }
501 t = threadCpuTime(); double cosTime=(t-tprev)*10-2*addTime;
502 printf("cos %gs\n", t-tprev);
503 tprev = threadCpuTime();
504 for (int i = 0; i < 100000000/10; i++) {
505 const Real ir = (Real)i;
506 atan2Res += std::atan2(ir+(Real)0.001,ir-(Real)0.001); // three adds
507 atan2Res += std::atan2(ir+(Real)0.1,ir-(Real)0.1);
508 atan2Res += std::atan2(ir+(Real)0.2,ir-(Real)0.2);
509 atan2Res += std::atan2(ir+(Real)0.3,ir-(Real)0.3);
510 atan2Res += std::atan2(ir+(Real)0.4,ir-(Real)0.4);
511 atan2Res += std::atan2(ir+(Real)0.501,ir-(Real)0.501);
512 atan2Res += std::atan2(ir+(Real)0.6,ir-(Real)0.6);
513 atan2Res += std::atan2(ir+(Real)0.7,ir-(Real)0.7);
514 atan2Res += std::atan2(ir+(Real)0.8,ir-(Real)0.8);
515 atan2Res += std::atan2(ir+(Real)0.9,ir-(Real)0.9);
516 }
517 t = threadCpuTime(); double atan2Time=(t-tprev)*10-3*addTime;
518 printf("atan2 %gs\n", t-tprev);
519 tprev = threadCpuTime();
520
521 std::cout << std::setprecision(5);
522 std::cout << "1 flop=avg(add,mul)=" << flopTime << "ns\n";
523 printf("op\t t/10^9\t flops\t final result\n");
524 std::cout << "int+:\t" <<intAddTime<<"\t"<<intAddTime /flopTime<<"\t"<<intAddRes<< "\n";
525 std::cout << "+:\t" <<addTime<<"\t"<<addTime /flopTime<<"\t"<<addRes<< "\n";
526 std::cout << "-:\t" <<subTime<<"\t"<<subTime /flopTime<<"\t"<<subRes<< "\n";
527 std::cout << "*:\t" <<mulTime<<"\t"<<mulTime /flopTime<<"\t"<<mulRes<< "\n";
528 std::cout << "/:\t" <<divTime<<"\t"<<divTime /flopTime<<"\t"<<divRes<< "\n";
529 std::cout << "sqrt:\t" <<sqrtTime<<"\t"<<sqrtTime /flopTime<<"\t"<<sqrtRes<< "\n";
530 std::cout << "1/sqrt:\t"<<oosqrtTime<<"\t"<<oosqrtTime/flopTime<<"\t"<<oosqrtRes<< "\n";
531 std::cout << "log:\t" <<logTime<<"\t"<<logTime /flopTime<<"\t"<<logRes<< "\n";
532 std::cout << "exp:\t" <<expTime<<"\t"<<expTime /flopTime<<"\t"<<expRes<< "\n";
533 std::cout << "sin:\t" <<sinTime<<"\t"<<sinTime /flopTime<<"\t"<<sinRes<< "\n";
534 std::cout << "cos:\t" <<cosTime<<"\t"<<cosTime /flopTime<<"\t"<<cosRes<< "\n";
535 std::cout << "atan2:\t" <<atan2Time<<"\t"<<atan2Time /flopTime<<"\t"<<atan2Res<< "\n";
536 }
537
538
main()539 int main() {
540 time_t now;
541 time(&now);
542 printf("Starting: %s\n", ctime(&now));
543 { std::cout << "\nCPU performance\n" << std::endl;
544 testFunctions(flopTimeInNs, true /*flop time only*/);
545 }
546 double startClock = realTime();
547 double startCpu = cpuTime();
548 double startThread = threadCpuTime();
549
550 {
551 std::cout << "\nParticles:\n" << std::endl;
552 MultibodySystem system;
553 createParticles(system);
554 runAllTests(system);
555 }
556
557 {
558 std::cout << "\nFree Bodies (Quaternions):\n" << std::endl;
559 MultibodySystem system;
560 createFreeBodies(system);
561 runAllTests(system, false);
562 }
563 {
564 std::cout << "\nFree Bodies (Euler angles):\n" << std::endl;
565 MultibodySystem system;
566 createFreeBodies(system);
567 runAllTests(system, true);
568 }
569
570 {
571 std::cout << "\nPin Chain:\n" << std::endl;
572 MultibodySystem system;
573 createPinChain(system);
574 runAllTests(system);
575 }
576 {
577 std::cout << "\nSlider Chain:\n" << std::endl;
578 MultibodySystem system;
579 createSliderChain(system);
580 runAllTests(system);
581 }
582 {
583 std::cout << "\nBall Chain (Quaternions):\n" << std::endl;
584 MultibodySystem system;
585 createBallChain(system);
586 runAllTests(system, false);
587 }
588 {
589 std::cout << "\nBall Chain (Euler angles):\n" << std::endl;
590 MultibodySystem system;
591 createBallChain(system);
592 runAllTests(system, true);
593 }
594 {
595 std::cout << "\nGimbal Chain:\n" << std::endl;
596 MultibodySystem system;
597 createGimbalChain(system);
598 runAllTests(system);
599 }
600
601 {
602 std::cout << "\nPin Tree:\n" << std::endl;
603 MultibodySystem system;
604 createPinTree(system);
605 runAllTests(system);
606 }
607 {
608 std::cout << "\nBall Tree:\n" << std::endl;
609 MultibodySystem system;
610 createBallTree(system);
611 runAllTests(system);
612 }
613
614
615 std::cout << "Total time:\n";
616 std::cout << " process CPU=" << cpuTime()-startCpu << "s\n";
617 std::cout << " thread CPU =" << threadCpuTime()-startThread << "s\n";
618 std::cout << " real time =" << realTime()-startClock << "s\n";
619 }