1 /* -------------------------------------------------------------------------- *
2  *                               Simbody(tm)                                  *
3  * -------------------------------------------------------------------------- *
4  * This is part of the SimTK biosimulation toolkit originating from           *
5  * Simbios, the NIH National Center for Physics-Based Simulation of           *
6  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
7  * Medical Research, grant U54 GM072970. See https://simtk.org/home/simbody.  *
8  *                                                                            *
9  * Portions copyright (c) 2011-15 Stanford University and the Authors.        *
10  * Authors: Peter Eastman                                                     *
11  * Contributors: Michael Sherman                                              *
12  *                                                                            *
13  * Licensed under the Apache License, Version 2.0 (the "License"); you may    *
14  * not use this file except in compliance with the License. You may obtain a  *
15  * copy of the License at http://www.apache.org/licenses/LICENSE-2.0.         *
16  *                                                                            *
17  * Unless required by applicable law or agreed to in writing, software        *
18  * distributed under the License is distributed on an "AS IS" BASIS,          *
19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   *
20  * See the License for the specific language governing permissions and        *
21  * limitations under the License.                                             *
22  * -------------------------------------------------------------------------- */
23 
24 #include "SimTKsimbody.h"
25 #include <string>
26 #include <ctime>
27 
28 using std::cout;
29 using std::endl;
30 using std::string;
31 
32 #ifdef _MSC_VER
33 #pragma warning(disable:4996) // don't warn about strerror, sprintf, etc.
34 #endif
35 
36 using namespace SimTK;
37 
38 /**
39  * This test measures the speed of various multibody calculations.  It executes a collection of operations
40  * on various systems.  The CPU time required to perform each operation 1000 times is measured and
41  * printed to the console.
42  *
43  * Each test system contains 256 identical bodies (plus ground), but they differ in the type of
44  * bodies and their arrangement into a multibody tree.  The arrangements include 1) all bodies attached
45  * directly to ground, 2) the bodies linked in a single chain, and 3) the bodies arranged to form
46  * a binary tree.
47  */
48 
49 // The following routines define the operations to be profiled.
50 
doRealizeTime(MultibodySystem & system,State & state)51 void doRealizeTime(MultibodySystem& system, State& state) {
52     state.invalidateAllCacheAtOrAbove(Stage::Time);
53     system.realize(state, Stage::Time);
54 }
55 
doRealizePosition(MultibodySystem & system,State & state)56 void doRealizePosition(MultibodySystem& system, State& state) {
57     state.invalidateAllCacheAtOrAbove(Stage::Position);
58     system.realize(state, Stage::Position);
59 }
60 
doRealizeVelocity(MultibodySystem & system,State & state)61 void doRealizeVelocity(MultibodySystem& system, State& state) {
62     state.invalidateAllCacheAtOrAbove(Stage::Velocity);
63     system.realize(state, Stage::Velocity);
64 }
65 
doRealizePositionKinematics(MultibodySystem & system,State & state)66 void doRealizePositionKinematics(MultibodySystem& system, State& state) {
67     const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
68     matter.invalidatePositionKinematics(state);
69     matter.realizePositionKinematics(state);
70 }
71 
doRealizeVelocityKinematics(MultibodySystem & system,State & state)72 void doRealizeVelocityKinematics(MultibodySystem& system, State& state) {
73     const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
74     matter.invalidateVelocityKinematics(state);
75     matter.realizeVelocityKinematics(state);
76 }
77 
doRealizeArticulatedBodyInertias(MultibodySystem & system,State & state)78 void doRealizeArticulatedBodyInertias(MultibodySystem& system, State& state) {
79     const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
80     matter.invalidateArticulatedBodyInertias(state);
81     matter.realizeArticulatedBodyInertias(state);
82 }
83 
doRealizeArticulatedBodyVelocity(MultibodySystem & system,State & state)84 void doRealizeArticulatedBodyVelocity(MultibodySystem& system, State& state) {
85     const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
86     matter.invalidateArticulatedBodyVelocity(state);
87     matter.realizeArticulatedBodyVelocity(state);
88 }
89 
doRealizeDynamics(MultibodySystem & system,State & state)90 void doRealizeDynamics(MultibodySystem& system, State& state) {
91     state.invalidateAllCacheAtOrAbove(Stage::Dynamics);
92     system.realize(state, Stage::Dynamics);
93 }
94 
doRealizeAcceleration(MultibodySystem & system,State & state)95 void doRealizeAcceleration(MultibodySystem& system, State& state) {
96     state.invalidateAllCacheAtOrAbove(Stage::Acceleration);
97     system.realize(state, Stage::Acceleration);
98 }
99 
100 // Cost to re-evaluate accelerations after applying some new forces, but leaving
101 // the state variables unchanged.
doRealizeDynamics2Acceleration(MultibodySystem & system,State & state)102 void doRealizeDynamics2Acceleration(MultibodySystem& system, State& state) {
103     state.invalidateAllCacheAtOrAbove(Stage::Dynamics);
104     system.realize(state, Stage::Acceleration);
105 }
106 
107 // Cost to re-evaluate accelerations after updating velocities, but leaving
108 // the positions unchanged (e.g. semi-implicit Euler iterating velocities).
doRealizeVelocity2Acceleration(MultibodySystem & system,State & state)109 void doRealizeVelocity2Acceleration(MultibodySystem& system, State& state) {
110     state.updU(); // should invalidate velocity kinematics
111     state.invalidateAllCacheAtOrAbove(Stage::Velocity);
112     system.realize(state, Stage::Acceleration);
113 }
114 
115 // Cost for a complete acceleration calculation at a new time and state.
116 // This includes the cost of articulated body inertias.
doRealizeTime2Acceleration(MultibodySystem & system,State & state)117 void doRealizeTime2Acceleration(MultibodySystem& system, State& state) {
118     state.updQ(); // should invalidate position & velocity kinematics
119     state.invalidateAllCacheAtOrAbove(Stage::Time);
120     system.realize(state, Stage::Acceleration);
121 }
122 
doMultiplyByM(MultibodySystem & system,State & state)123 void doMultiplyByM(MultibodySystem& system, State& state) {
124     const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
125     Vector v(matter.getNumMobilities(), 1.0);
126     Vector mv;
127     matter.multiplyByM(state, v, mv);
128 }
129 
doMultiplyByMInv(MultibodySystem & system,State & state)130 void doMultiplyByMInv(MultibodySystem& system, State& state) {
131     const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
132     Vector v(matter.getNumMobilities(), 1.0);
133     Vector minvv;
134     matter.multiplyByMInv(state, v, minvv);
135 }
136 
doCalcResidualForceIgnoringConstraints(MultibodySystem & system,State & state)137 void doCalcResidualForceIgnoringConstraints(MultibodySystem& system, State& state) {
138     const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
139     Vector appliedMobilityForces(matter.getNumMobilities(), 1.0);
140     Vector_<SpatialVec> appliedBodyForces(matter.getNumBodies(), SpatialVec(Vec3(1, 0, 0), Vec3(0, 1, 0)));
141     Vector knownUdot, residualMobilityForces;
142     matter.calcResidualForceIgnoringConstraints(state, appliedMobilityForces, appliedBodyForces, knownUdot, residualMobilityForces);
143 }
144 
doCalcMobilizerReactionForces(MultibodySystem & system,State & state)145 void doCalcMobilizerReactionForces(MultibodySystem& system, State& state) {
146     Vector_<SpatialVec> forces;
147     system.getMatterSubsystem().calcMobilizerReactionForces(state, forces);
148 }
149 
doMultiplyBySystemJacobianTranspose(MultibodySystem & system,State & state)150 void doMultiplyBySystemJacobianTranspose(MultibodySystem& system, State& state) {
151     const SimbodyMatterSubsystem& matter = system.getMatterSubsystem();
152     Vector_<SpatialVec> dEdR(matter.getNumBodies(), SpatialVec(Vec3(1, 0, 0), Vec3(0, 1, 0)));
153     Vector dEdQ;
154     matter.multiplyBySystemJacobianTranspose(state, dEdR, dEdQ);
155 }
156 
doCalcCompositeBodyInertias(MultibodySystem & system,State & state)157 void doCalcCompositeBodyInertias(MultibodySystem& system, State& state) {
158     Array_<SpatialInertia, MobilizedBodyIndex> r;
159     system.getMatterSubsystem().calcCompositeBodyInertias(state, r);
160 }
161 
162 static double flopTimeInNs;
163 
164 /**
165  * Time how long it takes to perform an operation 1000 times.  The test is repeated 5 times,
166  * and the average is returned.  The return value represents CPU time, *not* clock time.
167  */
timeComputation(MultibodySystem & system,void function (MultibodySystem & system,State & state),const string & name,int iterations,bool useEulerAngles)168 void timeComputation(MultibodySystem& system, void function(MultibodySystem& system, State& state),
169                      const string& name, int iterations, bool useEulerAngles) {
170     const int repeats = 3;
171     Vector cpuTimes(repeats);
172     State state = system.getDefaultState();
173     if (useEulerAngles) {
174         system.getMatterSubsystem().setUseEulerAngles(state, true);
175         system.realizeModel(state);
176     }
177     system.realize(state, Stage::Acceleration);
178 
179     const int ndof = system.getMatterSubsystem().getNumMobilities();
180     const int nmovbod = system.getMatterSubsystem().getNumBodies()-1; // not Ground
181 
182     // Repeatedly measure the CPU time for performing the operation 1000 times.
183 
184     for (int i = 0; i < repeats; i++) {
185         double startCpu = threadCpuTime();
186         for (int j = 0; j < iterations; j++)
187             function(system, state);
188         double endCpu = threadCpuTime();
189         cpuTimes[i] = Real(endCpu-startCpu);
190     }
191 
192     double timePerIterUs = mean(cpuTimes)*1000000/iterations; // us
193     double flopTimeUs = flopTimeInNs / 1000;
194     double flopTimePerIter = timePerIterUs/flopTimeUs;
195     std::printf("%40s:%6.4gus -> %4d flp/dof, %4d flp/bod\n",
196         name.c_str(), timePerIterUs, (int)(flopTimePerIter/ndof),
197         (int)(flopTimePerIter/nmovbod));
198 }
199 
200 /**
201  * Time all the different calculations for one system.
202  */
runAllTests(MultibodySystem & system,bool useEulerAngles=false)203 void runAllTests(MultibodySystem& system, bool useEulerAngles=false) {
204     std::cout << "# dofs=" << system.getMatterSubsystem().getNumMobilities() << "\n";
205     timeComputation(system, doRealizeTime, "realizeTime", 5000, useEulerAngles);
206     timeComputation(system, doRealizePositionKinematics, "realizePositionKinematics", 5000, useEulerAngles);
207     timeComputation(system, doRealizePosition, "realizePosition", 5000, useEulerAngles);
208     timeComputation(system, doRealizeVelocityKinematics, "realizeVelocityKinematics", 5000, useEulerAngles);
209     timeComputation(system, doRealizeVelocity, "realizeVelocity", 5000, useEulerAngles);
210     timeComputation(system, doRealizeArticulatedBodyInertias, "doRealizeArticulatedBodyInertias", 3000, useEulerAngles);
211     timeComputation(system, doRealizeArticulatedBodyVelocity, "doRealizeArticulatedBodyVelocity", 5000, useEulerAngles);
212     timeComputation(system, doRealizeDynamics, "realizeDynamics", 5000, useEulerAngles);
213     timeComputation(system, doRealizeAcceleration, "realizeAcceleration", 5000, useEulerAngles);
214     timeComputation(system, doRealizeDynamics2Acceleration, "doRealizeDynamics2Acceleration", 5000, useEulerAngles);
215     timeComputation(system, doRealizeVelocity2Acceleration, "realizeVelocity2Acceleration", 3000, useEulerAngles);
216     timeComputation(system, doRealizeTime2Acceleration, "realizeTime2Acceleration", 2000, useEulerAngles);
217     timeComputation(system, doMultiplyByM, "multiplyByM", 5000, useEulerAngles);
218     timeComputation(system, doMultiplyByMInv, "multiplyByMInv", 5000, useEulerAngles);
219     timeComputation(system, doCalcResidualForceIgnoringConstraints, "calcResidualForceIgnoringConstraints", 5000, useEulerAngles);
220     timeComputation(system, doCalcMobilizerReactionForces, "calcMobilizerReactionForces", 1000, useEulerAngles);
221     timeComputation(system, doMultiplyBySystemJacobianTranspose, "multiplyBySystemJacobianTranspose", 5000, useEulerAngles);
222     timeComputation(system, doCalcCompositeBodyInertias, "calcCompositeBodyInertias", 5000, useEulerAngles);
223 }
224 
225 // The following routines create the systems to be profiled.
226 
createParticles(MultibodySystem & system)227 void createParticles(MultibodySystem& system) {
228     SimbodyMatterSubsystem matter(system);
229     Body::Rigid body;
230     for (int i = 0; i < 256; i++)
231         MobilizedBody::Translation next(matter.updGround(), Vec3(1, 0, 0), body, Vec3(0));
232     system.realizeTopology();
233 }
234 
createFreeBodies(MultibodySystem & system)235 void createFreeBodies(MultibodySystem& system) {
236     SimbodyMatterSubsystem matter(system);
237     Body::Rigid body;
238     for (int i = 0; i < 256; i++)
239         MobilizedBody::Free next(matter.updGround(), Vec3(1, 0, 0), body, Vec3(0));
240     system.realizeTopology();
241 }
242 
createPinChain(MultibodySystem & system)243 void createPinChain(MultibodySystem& system) {
244     SimbodyMatterSubsystem matter(system);
245     Body::Rigid body;
246     MobilizedBody last = matter.updGround();
247     for (int i = 0; i < 256; i++) {
248         MobilizedBody::Pin next(last, Vec3(1, 0, 0), body, Vec3(0));
249         last = next;
250     }
251     system.realizeTopology();
252 }
253 
createSliderChain(MultibodySystem & system)254 void createSliderChain(MultibodySystem& system) {
255     SimbodyMatterSubsystem matter(system);
256     Body::Rigid body;
257     MobilizedBody last = matter.updGround();
258     for (int i = 0; i < 256; i++) {
259         MobilizedBody::Slider next(last, Vec3(1, 0, 0), body, Vec3(0));
260         last = next;
261     }
262     system.realizeTopology();
263 }
264 
createBallChain(MultibodySystem & system)265 void createBallChain(MultibodySystem& system) {
266     SimbodyMatterSubsystem matter(system);
267     Body::Rigid body;
268     MobilizedBody last = matter.updGround();
269     for (int i = 0; i < 256; i++) {
270         MobilizedBody::Ball next(last, Vec3(1, 0, 0), body, Vec3(0));
271         last = next;
272     }
273     system.realizeTopology();
274 }
275 
276 
createGimbalChain(MultibodySystem & system)277 void createGimbalChain(MultibodySystem& system) {
278     SimbodyMatterSubsystem matter(system);
279     Body::Rigid body;
280     MobilizedBody last = matter.updGround();
281     for (int i = 0; i < 256; i++) {
282         MobilizedBody::Gimbal next(last, Vec3(1, 0, 0), body, Vec3(0));
283         last = next;
284     }
285     system.realizeTopology();
286 }
287 
createPinTree(MultibodySystem & system)288 void createPinTree(MultibodySystem& system) {
289     SimbodyMatterSubsystem matter(system);
290     Body::Rigid body;
291     for (int i = 0; i < 256; i++) {
292         MobilizedBody& parent = matter.updMobilizedBody(MobilizedBodyIndex(i/2));
293         MobilizedBody::Pin next(parent, Vec3(1, 0, 0), body, Vec3(0));
294     }
295     system.realizeTopology();
296 }
297 
createBallTree(MultibodySystem & system)298 void createBallTree(MultibodySystem& system) {
299     SimbodyMatterSubsystem matter(system);
300     Body::Rigid body;
301     for (int i = 0; i < 256; i++) {
302         MobilizedBody& parent = matter.updMobilizedBody(MobilizedBodyIndex(i/2));
303         MobilizedBody::Ball next(parent, Vec3(1, 0, 0), body, Vec3(0));
304     }
305     system.realizeTopology();
306 }
307 
308 static int tenInts[10];
309 static Real tenReals[10];
310 // These should multiply out to about 1.
311 static Real tenMults[10] =
312     {Real(0.501),Real(0.2501),Real(0.201),Real(0.101),Real(1.000000001),
313     Real(1/1.000000002),Real(1/.101), Real(1/.201), Real(1/.2501), Real(1/.501)};
testFunctions(double & flopTime,bool flopTimeOnly=false)314 void testFunctions(double& flopTime, bool flopTimeOnly=false) {
315     Real addRes=1,subRes=1,mulRes=1,divRes=1,sqrtRes=1,oosqrtRes=1,
316          sinRes=1,cosRes=1,atan2Res=1,logRes=1,expRes=1;
317     int intAddRes=1;
318     Random::Uniform rand; rand.setMin(-5); rand.setMax(5);
319     for (int i=0; i<10; i++) tenInts[i] = rand.getIntValue();
320     for (int i=0; i<10; i++) tenReals[i] = rand.getValue();
321 
322     double tprev = threadCpuTime();
323     for (int i = 0; i < 10*100000000; i++) {
324         intAddRes += tenInts[0];
325         intAddRes -= tenInts[1];
326         intAddRes += tenInts[2];
327         intAddRes -= tenInts[3];
328         intAddRes += tenInts[4];
329         intAddRes -= tenInts[5];
330         intAddRes += tenInts[6];
331         intAddRes -= tenInts[7];
332         intAddRes += tenInts[8];
333         intAddRes -= tenInts[9];
334     }
335     double t = threadCpuTime(); double intAddTime = (t-tprev)/10; // time for 1e9 ops
336     printf("intAdd %gs\n", t-tprev);
337     tprev = threadCpuTime();
338     for (int i = 0; i < 5*100000000; i++) {
339         addRes += Real(1.1);
340         addRes += Real(1.2);
341         addRes += Real(1.3);
342         addRes += Real(1.4);
343         addRes += Real(1.501);
344         addRes += Real(1.6);
345         addRes += Real(1.7);
346         addRes += Real(1.8);
347         addRes += Real(1.9);
348         addRes += Real(2.007);
349     }
350     t = threadCpuTime(); double addTime = (t-tprev)/5;
351     printf("add %gs\n", t-tprev);
352     tprev = threadCpuTime();
353     for (int i = 0; i < 5*100000000; i++) {
354         subRes -= Real(1.1);
355         subRes -= Real(1.2);
356         subRes -= Real(1.3);
357         subRes -= Real(1.4);
358         subRes -= Real(1.501);
359         subRes -= Real(1.6);
360         subRes -= Real(1.7);
361         subRes -= Real(1.8);
362         subRes -= Real(1.9);
363         subRes -= Real(2.007);
364     }
365     t = threadCpuTime(); double subTime = (t-tprev)/5;
366     printf("sub %gs\n", t-tprev);
367     tprev = threadCpuTime();
368     for (int i = 0; i < 3*100000000; i++) {
369         mulRes *= Real(0.501);
370         mulRes *= Real(0.2501);
371         mulRes *= Real(0.201);
372         mulRes *= Real(0.101);
373         mulRes *= Real(1.000000001);
374         mulRes *= Real(1/1.000000002); // done at compile time
375         mulRes *= Real(1/.101);
376         mulRes *= Real(1/.201);
377         mulRes *= Real(1/.2501);
378         mulRes *= Real(1/.501);
379     }
380     t = threadCpuTime(); double mulTime=(t-tprev)/3;
381     printf("mul %gs\n", t-tprev);
382     flopTime = (addTime+mulTime)/2;
383     std::cout << "1 flop=avg(add,mul)=" << flopTime << "ns\n";
384     if (flopTimeOnly)
385         return;
386 
387     tprev = threadCpuTime();
388     for (int i = 0; i < 100000000; i++) {
389         divRes /= tenMults[7];
390         divRes /= tenMults[9];
391         divRes /= tenMults[8];
392         divRes /= tenMults[6];
393         divRes /= tenMults[2];
394         divRes /= tenMults[3];
395         divRes /= tenMults[1];
396         divRes /= tenMults[0];
397         divRes /= tenMults[4];
398         divRes /= tenMults[5];
399         // prevent clever optimization VC10 did to turn divides
400         // into multiplies.
401         tenMults[i%10]     = Real(tenMults[i%10]*1.0000000000001);
402         tenMults[(i+5)%10] = Real(tenMults[(i+5)%10]*0.9999999999999);
403     }
404     t = threadCpuTime(); double divTime=(t-tprev);
405     printf("div %gs\n", t-tprev);
406     tprev = threadCpuTime();
407 
408     for (int i = 0; i < 100000000/2; i++) {
409         const Real ir = (Real)i;
410         sqrtRes += std::sqrt(ir+(Real)0.001); // two adds
411         sqrtRes += std::sqrt(ir+(Real)0.1);
412         sqrtRes += std::sqrt(ir+(Real)0.2);
413         sqrtRes += std::sqrt(ir+(Real)0.3);
414         sqrtRes += std::sqrt(ir+(Real)0.4);
415         sqrtRes += std::sqrt(ir+(Real)0.501);
416         sqrtRes += std::sqrt(ir+(Real)0.6);
417         sqrtRes += std::sqrt(ir+(Real)0.7);
418         sqrtRes += std::sqrt(ir+(Real)0.8);
419         sqrtRes += std::sqrt(ir+(Real)0.9);
420     }
421     t = threadCpuTime(); double sqrtTime=2*(t-tprev)-2*addTime;
422     printf("sqrt %gs\n", t-tprev);
423     tprev = threadCpuTime();
424     for (int i = 0; i < 100000000/4; i++) {
425         const Real ir = (Real)i;
426         oosqrtRes += 1/std::sqrt(ir+(Real)0.001); // two adds
427         oosqrtRes += 1/std::sqrt(ir+(Real)0.1);
428         oosqrtRes += 1/std::sqrt(ir+(Real)0.2);
429         oosqrtRes += 1/std::sqrt(ir+(Real)0.3);
430         oosqrtRes += 1/std::sqrt(ir+(Real)0.4);
431         oosqrtRes += 1/std::sqrt(ir+(Real)0.501);
432         oosqrtRes += 1/std::sqrt(ir+(Real)0.6);
433         oosqrtRes += 1/std::sqrt(ir+(Real)0.7);
434         oosqrtRes += 1/std::sqrt(ir+(Real)0.8);
435         oosqrtRes += 1/std::sqrt(ir+(Real)0.9);
436     }
437     t = threadCpuTime(); double oosqrtTime=4*(t-tprev)-2*addTime;
438     printf("1/sqrt %gs\n", t-tprev);
439     tprev = threadCpuTime();
440     for (int i = 0; i < 100000000/5; i++) {
441         const Real ir = (Real)i;
442         logRes += std::log(ir+(Real)0.001); // two adds
443         logRes += std::log(ir+(Real)0.1);
444         logRes += std::log(ir+(Real)0.2);
445         logRes += std::log(ir+(Real)0.3);
446         logRes += std::log(ir+(Real)0.4);
447         logRes += std::log(ir+(Real)0.501);
448         logRes += std::log(ir+(Real)0.6);
449         logRes += std::log(ir+(Real)0.7);
450         logRes += std::log(ir+(Real)0.8);
451         logRes += std::log(ir+(Real)0.9);
452     }
453     t = threadCpuTime(); double logTime=(t-tprev)*5-2*addTime;
454     printf("log %gs\n", t-tprev);
455     tprev = threadCpuTime();
456     for (int i = 0; i < 100000000/5; i++) {
457         const Real ir = (Real).000000001*(Real)i;
458         expRes += std::exp(ir+(Real)0.001); // two adds
459         expRes += std::exp(ir+(Real)0.1);
460         expRes += std::exp(ir+(Real)0.2);
461         expRes += std::exp(ir+(Real)0.3);
462         expRes += std::exp(ir+(Real)0.4);
463         expRes += std::exp(ir+(Real)0.501);
464         expRes += std::exp(ir+(Real)0.6);
465         expRes += std::exp(ir+(Real)0.7);
466         expRes += std::exp(ir+(Real)0.8);
467         expRes += std::exp(ir+(Real)0.9);
468     }
469     t = threadCpuTime(); double expTime=(t-tprev)*5-2*addTime;
470     printf("exp %gs\n", t-tprev);
471     tprev = threadCpuTime();
472     for (int i = 0; i < 100000000/10; i++) {
473         const Real ir = (Real)i;
474         sinRes += std::sin(ir+(Real)0.001); // two adds
475         sinRes += std::sin(ir+(Real)0.1);
476         sinRes += std::sin(ir+(Real)0.2);
477         sinRes += std::sin(ir+(Real)0.3);
478         sinRes += std::sin(ir+(Real)0.4);
479         sinRes += std::sin(ir+(Real)0.501);
480         sinRes += std::sin(ir+(Real)0.6);
481         sinRes += std::sin(ir+(Real)0.7);
482         sinRes += std::sin(ir+(Real)0.8);
483         sinRes += std::sin(ir+(Real)0.9);
484     }
485     t = threadCpuTime(); double sinTime=(t-tprev)*10-2*addTime;
486     printf("sin %gs\n", t-tprev);
487     tprev = threadCpuTime();
488     for (int i = 0; i < 100000000/10; i++) {
489         const Real ir = (Real)i;
490         cosRes += std::cos(ir+(Real)0.001); // two adds
491         cosRes += std::cos(ir+(Real)0.1);
492         cosRes += std::cos(ir+(Real)0.2);
493         cosRes += std::cos(ir+(Real)0.3);
494         cosRes += std::cos(ir+(Real)0.4);
495         cosRes += std::cos(ir+(Real)0.501);
496         cosRes += std::cos(ir+(Real)0.6);
497         cosRes += std::cos(ir+(Real)0.7);
498         cosRes += std::cos(ir+(Real)0.8);
499         cosRes += std::cos(ir+(Real)0.9);
500     }
501     t = threadCpuTime(); double cosTime=(t-tprev)*10-2*addTime;
502     printf("cos %gs\n", t-tprev);
503     tprev = threadCpuTime();
504     for (int i = 0; i < 100000000/10; i++) {
505         const Real ir = (Real)i;
506         atan2Res += std::atan2(ir+(Real)0.001,ir-(Real)0.001); // three adds
507         atan2Res += std::atan2(ir+(Real)0.1,ir-(Real)0.1);
508         atan2Res += std::atan2(ir+(Real)0.2,ir-(Real)0.2);
509         atan2Res += std::atan2(ir+(Real)0.3,ir-(Real)0.3);
510         atan2Res += std::atan2(ir+(Real)0.4,ir-(Real)0.4);
511         atan2Res += std::atan2(ir+(Real)0.501,ir-(Real)0.501);
512         atan2Res += std::atan2(ir+(Real)0.6,ir-(Real)0.6);
513         atan2Res += std::atan2(ir+(Real)0.7,ir-(Real)0.7);
514         atan2Res += std::atan2(ir+(Real)0.8,ir-(Real)0.8);
515         atan2Res += std::atan2(ir+(Real)0.9,ir-(Real)0.9);
516     }
517     t = threadCpuTime(); double atan2Time=(t-tprev)*10-3*addTime;
518     printf("atan2 %gs\n", t-tprev);
519     tprev = threadCpuTime();
520 
521     std::cout << std::setprecision(5);
522     std::cout << "1 flop=avg(add,mul)=" << flopTime << "ns\n";
523     printf("op\t t/10^9\t flops\t final result\n");
524     std::cout << "int+:\t"  <<intAddTime<<"\t"<<intAddTime   /flopTime<<"\t"<<intAddRes<< "\n";
525     std::cout << "+:\t"     <<addTime<<"\t"<<addTime   /flopTime<<"\t"<<addRes<< "\n";
526     std::cout << "-:\t"     <<subTime<<"\t"<<subTime   /flopTime<<"\t"<<subRes<< "\n";
527     std::cout << "*:\t"     <<mulTime<<"\t"<<mulTime   /flopTime<<"\t"<<mulRes<< "\n";
528     std::cout << "/:\t"     <<divTime<<"\t"<<divTime   /flopTime<<"\t"<<divRes<< "\n";
529     std::cout << "sqrt:\t"  <<sqrtTime<<"\t"<<sqrtTime  /flopTime<<"\t"<<sqrtRes<< "\n";
530     std::cout << "1/sqrt:\t"<<oosqrtTime<<"\t"<<oosqrtTime/flopTime<<"\t"<<oosqrtRes<< "\n";
531     std::cout << "log:\t"   <<logTime<<"\t"<<logTime   /flopTime<<"\t"<<logRes<< "\n";
532     std::cout << "exp:\t"   <<expTime<<"\t"<<expTime   /flopTime<<"\t"<<expRes<< "\n";
533     std::cout << "sin:\t"   <<sinTime<<"\t"<<sinTime   /flopTime<<"\t"<<sinRes<< "\n";
534     std::cout << "cos:\t"   <<cosTime<<"\t"<<cosTime   /flopTime<<"\t"<<cosRes<< "\n";
535     std::cout << "atan2:\t" <<atan2Time<<"\t"<<atan2Time /flopTime<<"\t"<<atan2Res<< "\n";
536 }
537 
538 
main()539 int main() {
540     time_t now;
541     time(&now);
542     printf("Starting: %s\n", ctime(&now));
543     {   std::cout << "\nCPU performance\n" << std::endl;
544         testFunctions(flopTimeInNs, true /*flop time only*/);
545     }
546     double startClock  = realTime();
547     double startCpu    = cpuTime();
548     double startThread = threadCpuTime();
549 
550     {
551         std::cout << "\nParticles:\n" << std::endl;
552         MultibodySystem system;
553         createParticles(system);
554         runAllTests(system);
555     }
556 
557     {
558         std::cout << "\nFree Bodies (Quaternions):\n" << std::endl;
559         MultibodySystem system;
560         createFreeBodies(system);
561         runAllTests(system, false);
562     }
563     {
564         std::cout << "\nFree Bodies (Euler angles):\n" << std::endl;
565         MultibodySystem system;
566         createFreeBodies(system);
567         runAllTests(system, true);
568     }
569 
570     {
571         std::cout << "\nPin Chain:\n" << std::endl;
572         MultibodySystem system;
573         createPinChain(system);
574         runAllTests(system);
575     }
576     {
577         std::cout << "\nSlider Chain:\n" << std::endl;
578         MultibodySystem system;
579         createSliderChain(system);
580         runAllTests(system);
581     }
582     {
583         std::cout << "\nBall Chain (Quaternions):\n" << std::endl;
584         MultibodySystem system;
585         createBallChain(system);
586         runAllTests(system, false);
587     }
588     {
589         std::cout << "\nBall Chain (Euler angles):\n" << std::endl;
590         MultibodySystem system;
591         createBallChain(system);
592         runAllTests(system, true);
593     }
594     {
595         std::cout << "\nGimbal Chain:\n" << std::endl;
596         MultibodySystem system;
597         createGimbalChain(system);
598         runAllTests(system);
599     }
600 
601     {
602         std::cout << "\nPin Tree:\n" << std::endl;
603         MultibodySystem system;
604         createPinTree(system);
605         runAllTests(system);
606     }
607     {
608         std::cout << "\nBall Tree:\n" << std::endl;
609         MultibodySystem system;
610         createBallTree(system);
611         runAllTests(system);
612     }
613 
614 
615     std::cout << "Total time:\n";
616     std::cout << "  process CPU=" << cpuTime()-startCpu << "s\n";
617     std::cout << "  thread CPU =" << threadCpuTime()-startThread << "s\n";
618     std::cout << "  real time  =" << realTime()-startClock << "s\n";
619 }