1 /* $Header: /var/cvs/mbdyn/mbdyn/mbdyn-1.0/libraries/libmbmath/matvectest.cc,v 1.8 2017/01/12 14:43:54 masarati Exp $ */
2 /*
3  * MBDyn (C) is a multibody analysis code.
4  * http://www.mbdyn.org
5  *
6  * Copyright (C) 1996-2017
7  *
8  * Pierangelo Masarati	<masarati@aero.polimi.it>
9  * Paolo Mantegazza	<mantegazza@aero.polimi.it>
10  *
11  * Dipartimento di Ingegneria Aerospaziale - Politecnico di Milano
12  * via La Masa, 34 - 20156 Milano, Italy
13  * http://www.aero.polimi.it
14  *
15  * Changing this copyright notice is forbidden.
16  *
17  * This program is free software; you can redistribute it and/or modify
18  * it under the terms of the GNU General Public License as published by
19  * the Free Software Foundation (version 2 of the License).
20  *
21  *
22  * This program is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25  * GNU General Public License for more details.
26  *
27  * You should have received a copy of the GNU General Public License
28  * along with this program; if not, write to the Free Software
29  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
30  */
31 
32 /*
33  AUTHOR: Reinhard Resch <r.resch@secop.com>
34         Copyright (C) 2013(-2017) all rights reserved.
35 
36         The copyright of this code is transferred
37         to Pierangelo Masarati and Paolo Mantegazza
38         for use in the software MBDyn as described
39         in the GNU Public License version 2.1
40 */
41 
42 #include "mbconfig.h"           /* This goes first in every *.c,*.cc file */
43 
44 #include <cassert>
45 #include <ctime>
46 #include <cstdlib>
47 #include <iostream>
48 #include <cmath>
49 #include <typeinfo>
50 
51 #ifdef HAVE_BLITZ
52 #include <blitz/blitz.h>
53 #include <blitz/array.h>
54 #include <blitz/tinyvec.h>
55 #include <blitz/tinyvec-et.h>
56 #include <blitz/tinymat.h>
57 #include <blitz/matrix.h>
58 #endif
59 
60 #ifdef HAVE_FEENABLEEXCEPT
61 #define _GNU_SOURCE 1
62 #include <fenv.h>
63 #endif
64 
65 #ifndef MATVEC_DEBUG
66 	#define MATVEC_DEBUG 1
67 #endif
68 
69 #ifndef GRADIENT_DEBUG
70 	#define GRADIENT_DEBUG 1
71 #endif
72 
73 #include "ac/f2c.h"
74 #include "clock_time.h"
75 #include "matvec.h"
76 #include "matvec3.h"
77 #include "Rot.hh"
78 #include "matvecass.h"
79 
80 using namespace grad;
81 
82 int NLoops = 1;
83 int NLoopsAss = 1;
84 void tic();
85 void tic(doublereal& dTime);
86 doublereal toc();
87 
random1()88 doublereal random1() {
89 	return 2 * (doublereal(rand()) / RAND_MAX) - 1;
90 }
91 
testScalarTypeTraits()92 void testScalarTypeTraits() {
93 	typedef ScalarBinaryExpressionTraits<FuncPlus, doublereal, doublereal, doublereal>::ExpressionType Expr001;
94 	typedef ScalarBinaryExpressionTraits<FuncPlus, Gradient<0>, Gradient<0>, Gradient<0> >::ExpressionType Expr002;
95 	typedef ScalarBinaryExpressionTraits<FuncPlus, Gradient<0>, doublereal, Gradient<0> >::ExpressionType Expr003;
96 	typedef ScalarBinaryExpressionTraits<FuncPlus, Gradient<0>, Gradient<0>, doublereal>::ExpressionType Expr004;
97 	typedef ScalarBinaryExpressionTraits<FuncPlus, Gradient<0>, Expr003, Expr004>::ExpressionType Expr005;
98 	typedef ScalarBinaryExpressionTraits<FuncPlus, Gradient<0>, Expr005, Expr001>::ExpressionType Expr006;
99 	typedef ScalarBinaryExpressionTraits<FuncPlus, Gradient<0>, Expr005, Expr006>::ExpressionType Expr007;
100 	typedef ScalarBinaryExpressionTraits<FuncPlus, doublereal, Expr001, Expr001>::ExpressionType Expr008;
101 	typedef ScalarBinaryExpressionTraits<FuncPlus, doublereal, Expr001, Expr008>::ExpressionType Expr009;
102 	typedef ScalarBinaryExpressionTraits<FuncPlus, doublereal, Expr008, Expr009>::ExpressionType Expr010;
103 	typedef ScalarBinaryExpressionTraits<FuncPlus, Gradient<0>, Expr010, Expr007>::ExpressionType Expr011;
104 	typedef BasicScalarType<Expr001>::ScalarType T001;
105 	typedef BasicScalarType<Expr002>::ScalarType T002;
106 	typedef BasicScalarType<Expr003>::ScalarType T003;
107 	typedef BasicScalarType<Expr004>::ScalarType T004;
108 	typedef BasicScalarType<Expr005>::ScalarType T005;
109 	typedef BasicScalarType<Expr006>::ScalarType T006;
110 	typedef BasicScalarType<Expr007>::ScalarType T007;
111 	typedef BasicScalarType<Expr008>::ScalarType T008;
112 	typedef BasicScalarType<Expr009>::ScalarType T009;
113 	typedef BasicScalarType<Expr010>::ScalarType T010;
114 	typedef BasicScalarType<Expr011>::ScalarType T011;
115 	typedef CrossTraits<VectorDirectExpr<Vector<Gradient<0>, 3> >, VectorDirectExpr<Vector<Gradient<0>, 3> > >::ExpressionType C001;
116 	typedef CrossTraits<VectorDirectExpr<Vector<doublereal, 3> >, VectorDirectExpr<Vector<doublereal, 3> > >::ExpressionType C002;
117 	std::cout << "Expr001=" << typeid(Expr001).name() << std::endl;
118 	std::cout << "Expr002=" << typeid(Expr002).name() << std::endl;
119 	std::cout << "Expr003=" << typeid(Expr003).name() << std::endl;
120 	std::cout << "Expr004=" << typeid(Expr004).name() << std::endl;
121 	std::cout << "Expr005=" << typeid(Expr005).name() << std::endl;
122 	std::cout << "Expr006=" << typeid(Expr006).name() << std::endl;
123 	std::cout << "Expr007=" << typeid(Expr007).name() << std::endl;
124 	std::cout << "T001=" << typeid(T001).name() << std::endl;
125 	std::cout << "T002=" << typeid(T002).name() << std::endl;
126 	std::cout << "T003=" << typeid(T003).name() << std::endl;
127 	std::cout << "T004=" << typeid(T004).name() << std::endl;
128 	std::cout << "T005=" << typeid(T005).name() << std::endl;
129 	std::cout << "T006=" << typeid(T006).name() << std::endl;
130 	std::cout << "T007=" << typeid(T007).name() << std::endl;
131 	std::cout << "T008=" << typeid(T008).name() << std::endl;
132 	std::cout << "T009=" << typeid(T009).name() << std::endl;
133 	std::cout << "T010=" << typeid(T010).name() << std::endl;
134 	std::cout << "T011=" << typeid(T011).name() << std::endl;
135 	assert(typeid(T001) == typeid(doublereal));
136 	assert(typeid(T002) == typeid(Gradient<0>));
137 	assert(typeid(T003) == typeid(Gradient<0>));
138 	assert(typeid(T004) == typeid(Gradient<0>));
139 	assert(typeid(T005) == typeid(Gradient<0>));
140 	assert(typeid(T006) == typeid(Gradient<0>));
141 	assert(typeid(T007) == typeid(Gradient<0>));
142 	assert(typeid(T008) == typeid(doublereal));
143 	assert(typeid(T009) == typeid(doublereal));
144 	assert(typeid(T010) == typeid(doublereal));
145 	assert(typeid(T011) == typeid(Gradient<0>));
146 
147 	const index_type N_rows = 3;
148 	typedef VectorExpression<VectorVectorVectorBinaryExpr<ScalarBinaryOperation<FuncPlus, ScalarTypeTraits<Gradient<3> >::DirectExpressionType, ScalarTypeTraits<Gradient<3> >::DirectExpressionType>, VectorDirectExpr<Vector<Gradient<3>, N_rows> >, VectorDirectExpr<Vector<Gradient<3>, N_rows> > >, N_rows> V001;
149 	typedef SumTraits<V001, N_rows, 2> Sum003;
150 	Sum003 s003;
151 	std::cout << typeid(s003).name() << std::endl;
152 
153 	std::cout << "sizeof(Matrix<Gradient<12>, 3, 3>())="
154 			<< sizeof(Matrix<Gradient<12>, 3, 3>) << std::endl;
155 
156 	std::cout << "sizeof(Vector<Gradient<12> >())="
157 			<< sizeof(Vector<Gradient<12>, 3>) << std::endl;
158 
159 	std::cout << "sizeof(Matrix<Gradient<12>, 3, 3>() * Vector<Gradient<12>, 3>())="
160 			<< sizeof(Matrix<Gradient<12>, 3, 3>() * Vector<Gradient<12>, 3>()) << std::endl;
161 
162 	RangeVector<doublereal, 3> v3;
163 	std::cout << "v3.iGetMaxSize()=" << v3.iGetMaxSize() << std::endl;
164 	RangeVector<doublereal, 0> v0;
165 	std::cout << "v0.iGetMaxSize()=" << v0.iGetMaxSize() << std::endl;
166 
167 	std::cout << typeid(C001).name() << std::endl;
168 	std::cout << typeid(C002).name() << std::endl;
169 
170 	Gradient<0> g0;
171 	std::cout << "g0 max derivatives: " << g0.iGetMaxDerivatives() << std::endl;
172 }
173 
174 #ifdef HAVE_BLITZ
175 template <typename T, index_type N>
func(LocalDofMap * pDofMap,const blitz::TinyVector<T,N> & a,const blitz::TinyVector<T,N> & b,blitz::TinyVector<T,N> & c)176 void func(LocalDofMap* pDofMap, const blitz::TinyVector<T, N>& a, const blitz::TinyVector<T, N>& b, blitz::TinyVector<T, N>& c) {
177 	doublereal r1, r2, r3;
178 	r1 = random1();
179 	r2 = random1();
180 	r3 = random1();
181 #if 1
182 	const T d1 = blitz::dot(a, b) * (2. / 1.5);
183 
184 	blitz::TinyVector<T, N> b_d1;
185 
186 	for (int i = 0; i < N; ++i) {
187 		b_d1(i) = b(i) * d1;
188 	}
189 
190 	c = (a * r1 + b * r2) / 2. - (a * r3 - b) * r1 + b_d1;
191 #else
192 	T d1 = blitz::dot(a, b);
193 	d1 *=  (2. / 1.5);
194 	c = (a * r1 + b * r2) / 2. - (a * r3 - b) * r1 + b * d1;
195 #endif
196 }
197 #endif
198 
199 template <typename T, index_type N>
func(const Vector<T,N> & a,const Vector<T,N> & b,Vector<T,N> & c)200 void func(const Vector<T, N>& a, const Vector<T, N>& b, Vector<T, N>& c) {
201 	doublereal r1, r2, r3;
202 	r1 = random1();
203 	r2 = random1();
204 	r3 = random1();
205 	const T d1 = Dot(a, b) * (2. / 1.5);
206 	c = (a * (r1 / 2) + b * (r2 / 2.)) - (a * (r3 * r1) - b * r1) + b * d1;
207 }
208 
209 template <typename T>
func(const T a[],const T b[],T c[],index_type N)210 void func(const T a[], const T b[], T c[], index_type N) {
211 	doublereal r1, r2, r3;
212 	r1 = random1();
213 	r2 = random1();
214 	r3 = random1();
215 
216 	T d1 = T(0.);
217 
218 	for (int i = 0; i < N; ++i) {
219 		d1 += a[i] * b[i] * (2. / 1.5);
220 	}
221 
222 	for (int i = 0; i < N; ++i) {
223 		c[i] = (a[i] * r1 + b[i] * r2) / 2. - (a[i] * r3 - b[i]) * r1 + b[i] * d1;
224 	}
225 }
226 
227 template <typename T, index_type N>
func2(const Matrix<T,N,N> & A,const Vector<T,N> & b,const Vector<T,N> & c,Vector<T,N> & d,doublereal e,doublereal & dt)228 void func2(const Matrix<T, N, N>& A, const Vector<T, N>& b, const Vector<T, N>& c, Vector<T, N>& d, doublereal e, doublereal& dt) {
229 	doublereal start, stop;
230 	tic(start);
231 	d = (Transpose(A) * Vector<T, N>(A * Vector<T, N>((b - c) * e)));
232 	tic(stop);
233 	dt += stop - start;
234 }
235 
236 template <typename T>
func2(const T * A,const T * b,const T * c__,T * d__,const doublereal & e,doublereal & dt)237 void func2(const T *A, const T *b, const T *c__, T *d__, const doublereal& e, doublereal& dt) {
238 	doublereal start, stop;
239 	tic(start);
240     /* Parameter adjustments */
241     --d__;
242     --c__;
243     --b;
244     A -= 4;
245     const T tmp1 = b[3] - c__[3];
246     const T tmp2 = b[2] - c__[2];
247     const T tmp3 = b[1] - c__[1];
248     const T tmp4 = tmp3 * A[7];
249     const T tmp5 = tmp1 * A[9];
250     const T tmp6 = tmp1 * A[12];
251     const T tmp7 = tmp1 * A[6];
252     const T tmp8 = tmp2 * A[11];
253     const T tmp10 = tmp3 * A[10];
254     const T tmp11 = tmp6 + tmp8 + tmp10;
255     const T tmp13 = tmp5 + tmp2 * A[8] + tmp4;
256     const T tmp14 = tmp7 + tmp2 * A[5] + tmp3 * A[4];
257     /* Function Body */
258     d__[1] = (A[10] * tmp11 + A[7] * tmp13 + A[4] * tmp14) * e;
259     d__[2] = (A[11] * tmp11 + A[8] * tmp13 + A[5] * tmp14) * e;
260     d__[3] = (A[12] * tmp11 + A[9] * tmp13 + A[6] * tmp14) * e;
261 
262     tic(stop);
263     dt += stop - start;
264 } /* func2_ */
265 
266 /*
267 	  SUBROUTINE FUNC2AD(A, b, c, d, e)
268       IMPLICIT NONE
269       DOUBLE PRECISION A(3, 3), b(3), c(3), d(3), e
270  */
271 extern "C" void __FC_DECL__(func2ad)(const doublereal A[3][3],
272 						 const doublereal b[3],
273 						 const doublereal c[3],
274 						 doublereal d[3],
275 						 const doublereal& e);
276 
277 const integer nbdirsmax = 12;
278 /*
279      SUBROUTINE FUNC2AD_DV(a, ad, b, bd, c, cd, d, dd, e, nbdirs)
280       IMPLICIT INTEGER (n-n)
281       PARAMETER (nbdirsmax = 12)
282       DOUBLE PRECISION a(3, 3), b(3), c(3), d(3), e
283       DOUBLE PRECISION ad(nbdirsmax, 3, 3), bd(nbdirsmax, 3), cd(
284      +                 nbdirsmax, 3), dd(nbdirsmax, 3)
285 */
286 extern "C" void __FC_DECL__(func2ad_dv)(const doublereal A[3][3],
287 							const doublereal Ad[3][3][nbdirsmax],
288 							const doublereal b[3],
289 							const doublereal bd[3][nbdirsmax],
290 							const doublereal c[3],
291 							const doublereal cd[3][nbdirsmax],
292 							doublereal d[3],
293 							doublereal dd[3][nbdirsmax],
294 							const doublereal& e,
295 							const integer& nbdirs);
296 
func2ad(const Matrix<doublereal,3,3> & A,const Vector<doublereal,3> & b,const Vector<doublereal,3> & c,Vector<doublereal,3> & d,const doublereal & e,LocalDofMap *,doublereal & dt)297 inline void func2ad(const Matrix<doublereal, 3, 3>& A, const Vector<doublereal, 3>& b, const Vector<doublereal, 3>& c, Vector<doublereal, 3>& d, const doublereal& e, LocalDofMap*, doublereal& dt) {
298 
299 	doublereal A_F[3][3];
300 
301 	for (int i = 0; i < 3; ++i) {
302 		for (int j = 0; j < 3; ++j) {
303 			A_F[j][i] = A(i + 1, j + 1);
304 		}
305 	}
306 
307 	doublereal start, stop;
308 
309 	tic(start);
310 	func2ad_(A_F, b.pGetVec(), c.pGetVec(), d.pGetVec(), e);
311 	tic(stop);
312 
313 	dt += stop - start;
314 }
315 
316 template <index_type N_SIZE>
func2ad(const Matrix<Gradient<N_SIZE>,3,3> & A,const Vector<Gradient<N_SIZE>,3> & b,const Vector<Gradient<N_SIZE>,3> & c,Vector<Gradient<N_SIZE>,3> & d,const doublereal & e,LocalDofMap * pDofMap,doublereal & dt)317 inline void func2ad(const Matrix<Gradient<N_SIZE>, 3, 3>& A, const Vector<Gradient<N_SIZE>, 3>& b, const Vector<Gradient<N_SIZE>, 3>& c, Vector<Gradient<N_SIZE>, 3>& d, const doublereal& e, LocalDofMap* pDofMap, doublereal& dt) {
318 	typedef typename MaxSizeCheck<N_SIZE <= index_type(nbdirsmax)>::CheckType check_nbdirsmax;
319 
320 	doublereal A_F[3][3], Ad_F[3][3][nbdirsmax], b_F[3], bd_F[3][nbdirsmax], c_F[3], cd_F[3][nbdirsmax], d_F[3], dd_F[3][nbdirsmax];
321 
322 	for (int i = 0; i < 3; ++i) {
323 		for (int j = 0; j < 3; ++j) {
324 			const Gradient<N_SIZE>& A_ij = A(i + 1, j + 1);
325 			A_F[j][i] = A_ij.dGetValue();
326 			for (index_type k = 0; k < N_SIZE; ++k) {
327 				Ad_F[j][i][k] = A_ij.dGetDerivativeGlobal(k);
328 			}
329 		}
330 
331 		b_F[i] = b(i + 1).dGetValue();
332 		c_F[i] = c(i + 1).dGetValue();
333 
334 		for (index_type k = 0; k < N_SIZE; ++k) {
335 			bd_F[i][k] = b(i + 1).dGetDerivativeGlobal(k);
336 			cd_F[i][k] = c(i + 1).dGetDerivativeGlobal(k);
337 		}
338 	}
339 
340 	doublereal start, stop;
341 	tic(start);
342 	func2ad_dv_(A_F, Ad_F, b_F, bd_F, c_F, cd_F, d_F, dd_F, e, N_SIZE);
343 	tic(stop);
344 	dt += stop - start;
345 
346 	for (int i = 0; i < 3; ++i) {
347 		d(i + 1).SetValuePreserve(d_F[i]);
348 		d(i + 1).DerivativeResizeReset(pDofMap, 0L, N_SIZE, MapVectorBase::GLOBAL, 0.);
349 		for (index_type k = 0; k < N_SIZE; ++k) {
350 			d(i + 1).SetDerivativeGlobal(k, dd_F[i][k]);
351 		}
352 	}
353 }
354 
355 template <typename T>
func3(const Matrix<T,3,3> & R1,const Matrix<T,3,3> & R2,const Vector<T,3> & a,const Vector<T,3> & b,Vector<T,3> & c,doublereal e)356 void func3(const Matrix<T, 3, 3>& R1, const Matrix<T, 3, 3>& R2, const Vector<T, 3>& a, const Vector<T, 3>& b, Vector<T, 3>& c, doublereal e) {
357 	c = (Cross(R1 * a, R2 * b) + Cross(R2 * a, R1 * b)) * e;
358 }
359 
360 template
361 void func3<doublereal>(const Matrix<doublereal, 3, 3>& R1, const Matrix<doublereal, 3, 3>& R2, const Vector<doublereal, 3>& a, const Vector<doublereal, 3>& b, Vector<doublereal, 3>& c, doublereal e);
362 
363 template
364 void func3<Gradient<0> >(const Matrix<Gradient<0> , 3, 3>& R1, const Matrix<Gradient<0> , 3, 3>& R2, const Vector<Gradient<0> , 3>& a, const Vector<Gradient<0> , 3>& b, Vector<Gradient<0> , 3>& c, doublereal e);
365 
366 template <typename T>
bCompare(const T & a,const T & b,doublereal dTolRel=0.)367 bool bCompare(const T& a, const T& b, doublereal dTolRel = 0.) {
368     assert(!std::isnan(a));
369     assert(!std::isnan(b));
370 	doublereal dTolAbs = std::max<T>(1., std::max<T>(std::abs(a), std::abs(b))) * dTolRel;
371 	return std::abs(a - b) <= dTolAbs;
372 }
373 
374 template <index_type N_SIZE>
bCompare(const Gradient<N_SIZE> & a,const Gradient<N_SIZE> & b,doublereal dTolRel=0.)375 bool bCompare(const Gradient<N_SIZE>& a, const Gradient<N_SIZE>& b, doublereal dTolRel = 0.) {
376     assert(!std::isnan(a.dGetValue()));
377     assert(!std::isnan(b.dGetValue()));
378 	doublereal dTolAbs = std::max(1., std::max(std::abs(a.dGetValue()), std::abs(b.dGetValue()))) * dTolRel;
379 
380 	if (std::abs(a.dGetValue() - b.dGetValue()) > dTolAbs) {
381 		std::cerr << "a = " << a.dGetValue() << " != b = " << b.dGetValue() << std::endl;
382 		return false;
383 	}
384 
385 	index_type iStart = std::min(a.iGetStartIndexLocal(), b.iGetStartIndexLocal());
386 	index_type iEnd = std::max(a.iGetEndIndexLocal(), b.iGetEndIndexLocal());
387 
388 	for (index_type i = iStart; i < iEnd; ++i) {
389 		doublereal dTolAbs = std::max<scalar_deriv_type>(1., std::max<scalar_deriv_type>(std::abs(a.dGetDerivativeLocal(i)), std::abs(b.dGetDerivativeLocal(i)))) * dTolRel;
390         assert(!std::isnan(a.dGetDerivativeLocal(i)));
391         assert(!std::isnan(b.dGetDerivativeLocal(i)));
392 		if (std::abs(a.dGetDerivativeLocal(i) - b.dGetDerivativeLocal(i)) > dTolAbs) {
393 			std::cerr << "ad(" << i << ") = " << a.dGetDerivativeLocal(i) << " != bd(" << i << ") = " << b.dGetDerivativeLocal(i) << std::endl;
394 			return false;
395 		}
396 	}
397 
398 	return true;
399 }
400 
401 template <typename T, index_type N>
callFunc(LocalDofMap * pDofMap,const Vector<T,N> & a,const Vector<T,N> & b,Vector<T,N> & c,Vector<T,N> & c1)402 void callFunc(LocalDofMap* pDofMap, const Vector<T, N>& a, const Vector<T, N>& b, Vector<T, N>& c, Vector<T, N>& c1) {
403 	srand(0);
404     tic();
405     for (int i = 0; i < NLoops; ++i) {
406     	func(a, b, c);
407     }
408 
409     std::cerr << "Vector (" << typeid(T).name() << "): " << toc() << "s" << std::endl;
410 
411     srand(0);
412     tic();
413     for (int i = 0; i < NLoops; ++i) {
414     	func(a.pGetVec(), b.pGetVec(), c1.pGetVec(), N);
415     }
416 
417     std::cerr << "Array (" << typeid(T).name() << "): " << toc() << "s" << std::endl;
418 
419     std::cout << "a=" << std::endl << a << std::endl;
420     std::cout << "b=" << std::endl << b << std::endl;
421     std::cout << "c=" << std::endl << c << std::endl;
422     std::cout << "c1=" << std::endl << c1 << std::endl;
423 
424     const doublereal dTol = sqrt(std::numeric_limits<doublereal>::epsilon());
425 
426     for (int i = 1; i <= N; ++i) {
427     	assert(bCompare(c(i), c1(i), dTol));
428     }
429 }
430 
431 template <typename T>
callFunc2(LocalDofMap * pDofMap,const Matrix<T,3,3> & A,const Vector<T,3> & b,const Vector<T,3> & c,Vector<T,3> & d,Vector<T,3> & d_C,Vector<T,3> & d_F)432 void callFunc2(LocalDofMap* pDofMap, const Matrix<T, 3, 3>& A, const Vector<T, 3>& b, const Vector<T, 3>& c, Vector<T, 3>& d, Vector<T, 3>& d_C, Vector<T, 3>& d_F) {
433 	srand(0);
434     doublereal dtMatVec = 0., dtC = 0., dtF = 0.;
435     for (int i = 0; i < NLoops; ++i) {
436     	func2(A, b, c, d, random1(), dtMatVec);
437     }
438 
439     std::cerr << "matvec (" << typeid(T).name() << "): " << dtMatVec << "s" << std::endl;
440 
441     srand(0);
442     for (int i = 0; i < NLoops; ++i) {
443     	func2(A.pGetMat(), b.pGetVec(), c.pGetVec(), d_C.pGetVec(), random1(), dtC);
444     }
445 
446     std::cerr << "C (" << typeid(T).name() << "): " << dtC << "s" << std::endl;
447 
448     srand(0);
449     for (int i = 0; i < NLoops; ++i) {
450     	func2ad(A, b, c, d_F, random1(), pDofMap, dtF);
451     }
452 
453     std::cerr << "F77 (" << typeid(T).name() << "): " << dtF << "s" << std::endl;
454 
455     std::cerr << "overhead matvec:" << dtMatVec / std::max(std::numeric_limits<double>::epsilon(), dtF) << std::endl;
456     std::cerr << "overhead C:" << dtC / std::max(std::numeric_limits<double>::epsilon(), dtF) << std::endl;
457 
458     std::cout << "A=" << std::endl << A << std::endl;
459     std::cout << "b=" << std::endl << b << std::endl;
460     std::cout << "c=" << std::endl << c << std::endl;
461     std::cout << "d=" << std::endl << d << std::endl;
462     std::cout << "d_C=" << std::endl << d_C << std::endl;
463     std::cout << "d_F=" << std::endl << d_F << std::endl;
464 
465     const doublereal dTol = 10 * std::numeric_limits<scalar_deriv_type>::epsilon();
466 
467     for (int i = 1; i <= 3; ++i) {
468     	assert(bCompare(d(i), d_C(i), dTol));
469     	assert(bCompare(d(i), d_F(i), dTol));
470     }
471 }
472 
473 template <index_type N>
testMatVecGradient(doublereal c_C[N],doublereal cd_C[N][N])474 void testMatVecGradient(doublereal c_C[N], doublereal cd_C[N][N]) {
475     LocalDofMap dof;
476     Vector<Gradient<N>, N> a, b;
477 
478     for (index_type i = 0; i < N; ++i) {
479         a(i + 1).SetValuePreserve(100*(i + 1));
480         b(i + 1).SetValuePreserve(1000*(i + 10));
481 
482         a(i + 1).DerivativeResizeReset(&dof, i, MapVectorBase::GLOBAL, 0.);
483         a(i + 1).SetDerivativeGlobal(i, -1. - 10.);
484         b(i + 1).DerivativeResizeReset(&dof, i, MapVectorBase::GLOBAL, 0.);
485         b(i + 1).SetDerivativeGlobal(i, -2. - 20.);
486     }
487 
488     Vector<Gradient<N>, N> c, c1;
489 
490     callFunc(&dof, a, b, c, c1);
491 
492     Vector<Gradient<N>, N> d = -c;
493 
494     std::cout << "c=" << std::endl << c << std::endl;
495     std::cout << "d=" << std::endl << d << std::endl;
496 
497     const doublereal dTol = sqrt(std::numeric_limits<scalar_deriv_type>::epsilon());
498 
499     for (int i = 1; i <= N; ++i) {
500     	assert(bCompare(c(i), c1(i), dTol));
501     	assert(bCompare(d(i), Gradient<N>(-c(i))));
502     }
503 
504     for (int i = 0; i < N; ++i) {
505     	c_C[i] = c(i + 1).dGetValue();
506 
507     	for (int j = 0; j < N; ++j) {
508     		cd_C[i][j] = c(i + 1).dGetDerivativeGlobal(j);
509     	}
510     }
511 
512     Gradient<N> s1;
513 
514     for (int i = 1; i <= N; ++i) {
515     	s1 += c(i);
516     }
517 
518     Gradient<N> s2 = Sum(c);
519 
520     Gradient<N> s4;
521 
522     for (int i = 1; i <= N; ++i) {
523     	s4 += (a(i) * 2 - b(i) * 3) / 1.5 + c(i);
524     }
525 
526     Gradient<N> s3 = Sum((a * 2 - b * 3) / 1.5 + c);
527 
528     Gradient<N> d1 = Dot(a, b);
529     Gradient<N> d2;
530 
531     for (int i = 1; i <= N; ++i) {
532     	d2 += a(i) * b(i);
533     }
534 
535     Gradient<N> d3 = Dot(a + b, a - b);
536     Gradient<N> d4;
537 
538     for (int i = 1; i <= N; ++i) {
539     	d4 += (a(i) + b(i)) * (a(i) - b(i));
540     }
541 
542     assert(d3.bIsEqual(d4));
543 
544     d3 = Dot(a + b, b);
545 
546     d4.SetValue(0.);
547 
548     for (int i = 1; i <= N; ++i) {
549     	d4 += (a(i) + b(i)) * b(i);
550     }
551 
552     assert(d3.bIsEqual(d4));
553 
554     d3 = Dot(b, a + b);
555 
556     std::cout << "s1=" << s1 << std::endl;
557     std::cout << "s2=" << s2 << std::endl;
558     std::cout << "s3=" << s3 << std::endl;
559     std::cout << "s4=" << s4 << std::endl;
560     std::cout << "d1=" << d1 << std::endl;
561     std::cout << "d2=" << d2 << std::endl;
562     std::cout << "d3=" << d3 << std::endl;
563     std::cout << "d4=" << d4 << std::endl;
564 
565     assert(bCompare(d3, d4, dTol));
566     assert(bCompare(s2, s1, dTol));
567     assert(bCompare(s4, s3, dTol));
568     assert(bCompare(d2, d1, dTol));
569 
570     const Vector<Gradient<N>, N> d5 = (d + c + c1) * (3.5 / (c(1) + c(2)) * c(1) / 2. * (c(1) - c(3)) / c(2));
571 
572     d += c + c1;
573 
574     d *= 3.5;
575 
576     d /= c(1) + c(2);
577 
578     d *= c(1);
579 
580     d /= 2.;
581 
582     d *= c(1) - c(3);
583 
584     d /= c(2);
585 
586     std::cout << "d=" << d << std::endl;
587     std::cout << "d5=" << d5 << std::endl;
588 
589     for (int i = 1; i <= N; ++i) {
590     	assert(bCompare(d(i), d5(i), dTol));
591     }
592 }
593 
594 #ifdef HAVE_BLITZ
595 template <index_type N>
testMatVecGradientBlitz(doublereal c_C[N],doublereal cd_C[N][N])596 void testMatVecGradientBlitz(doublereal c_C[N], doublereal cd_C[N][N]) {
597     LocalDofMap dof;
598     blitz::TinyVector<Gradient<N>, N> a, b;
599 
600     for (index_type i = 0; i < N; ++i) {
601         a(i).SetValuePreserve(100*(i + 1));
602         b(i).SetValuePreserve(1000*(i + 10));
603 
604         a(i).DerivativeResizeReset(&dof, i, MapVectorBase::GLOBAL, 0.);
605         a(i).SetDerivativeGlobal(i, -1. - 10.);
606         b(i).DerivativeResizeReset(&dof, i, MapVectorBase::GLOBAL, 0.);
607         b(i).SetDerivativeGlobal(i, -2. - 20.);
608     }
609 
610     blitz::TinyVector<Gradient<N>, N> c;
611 
612 	srand(0);
613     tic();
614     for (int i = 0; i < NLoops; ++i) {
615     	func(&dof, a, b, c);
616     }
617 
618     for (int i = 0; i < N; ++i) {
619     	c_C[i] = c(i).dGetValue();
620 
621     	for (int j = 0; j < N; ++j) {
622     		cd_C[i][j] = c(i).dGetDerivativeGlobal(j);
623     	}
624     }
625 
626     std::cerr << "blitz vector (Gradient): " << toc() << "s" << std::endl;
627 
628     std::cout << "a=" << std::endl << a << std::endl;
629     std::cout << "b=" << std::endl << b << std::endl;
630     std::cout << "c=" << std::endl << c << std::endl;
631 }
632 #endif
633 
634 template <index_type N>
testMatVecDouble(doublereal c_C[N])635 void testMatVecDouble(doublereal c_C[N]) {
636     Vector<doublereal, N> a, b;
637 
638     for (index_type i = 0; i < N; ++i) {
639         a(i + 1) = 100*(i + 1);
640         b(i + 1) = 1000*(i + 10);
641     }
642 
643     Vector<doublereal, N> c, c1;
644 
645     callFunc(0, a, b, c, c1);
646 
647     Vector<doublereal, N> d = -c;
648 
649     const doublereal dTol = sqrt(std::numeric_limits<scalar_deriv_type>::epsilon());
650 
651     for (int i = 1; i <= N; ++i) {
652     	assert(bCompare(c(i), c1(i), dTol));
653     	assert(d(i) == -c(i));
654     }
655 
656     for (int i = 0; i < N; ++i) {
657     	c_C[i] = c(i + 1);
658     }
659 
660     doublereal s1 = 0.;
661 
662     for (int i = 0; i < N; ++i) {
663     	s1 += c(i + 1);
664     }
665 
666     doublereal s2 = Sum(c);
667 
668     assert(bCompare(s2, s1, dTol));
669 
670     doublereal s3 = Sum((a * 2 - b * 3) / 1.5 + c);
671 
672     doublereal s4 = 0.;
673 
674     for (int i = 1; i <= N; ++i) {
675     	s4 += (a(i) * 2 - b(i) * 3) / 1.5 + c(i);
676     }
677 
678     assert(bCompare(s2, s1, dTol));
679     assert(bCompare(s4, s3, dTol));
680 
681     doublereal d1 = Dot(a, b);
682     doublereal d2 = 0.;
683 
684     for (int i = 1; i <= N; ++i) {
685     	d2 += a(i) * b(i);
686     }
687 
688     assert(bCompare(d2, d1, std::numeric_limits<scalar_deriv_type>::epsilon()));
689 
690     std::cout << "s1=" << s1 << std::endl;
691     std::cout << "s2=" << s2 << std::endl;
692     std::cout << "s3=" << s3 << std::endl;
693     std::cout << "s4=" << s4 << std::endl;
694     std::cout << "d1=" << d1 << std::endl;
695     std::cout << "d2=" << d2 << std::endl;
696 
697     const Vector<doublereal, N> d5 = (d + c + c1) * (3.5 / (c(1) + c(2)) * c(1) / 2. * (c(1) - c(3)) / c(2));
698 
699     d += c + c1;
700 
701     d *= 3.5;
702 
703     d /= c(1) + c(2);
704 
705     d *= c(1);
706 
707     d /= 2.;
708 
709     d *= c(1) - c(3);
710 
711     d /= c(2);
712 
713     for (int i = 1; i <= N; ++i) {
714     	assert(bCompare(d(i), d5(i), dTol));
715     }
716 
717     std::cout << "d=" << d << std::endl;
718     std::cout << "d5=" << d5 << std::endl;
719 }
720 
721 template <index_type N_SIZE>
testMatVecGradient2()722 void testMatVecGradient2() {
723     LocalDofMap dof;
724     Matrix<Gradient<N_SIZE>, 3, 3> A;
725     Vector<Gradient<N_SIZE>, 3> b, c;
726 
727     for (index_type i = 0; i < 3; ++i) {
728         b(i + 1).SetValuePreserve(100*(i + 1));
729         b(i + 1).DerivativeResizeReset(&dof, 0L, N_SIZE, MapVectorBase::GLOBAL, 0.);
730         c(i + 1).SetValuePreserve(1000*(i + 10));
731         c(i + 1).DerivativeResizeReset(&dof, 0L, N_SIZE, MapVectorBase::GLOBAL, 0.);
732 
733         for (index_type k = 0; k < N_SIZE; ++k) {
734         	c(i + 1).SetDerivativeGlobal(k, 3.5 * (k + 1) + 3.2 * (i + 1));
735         	b(i + 1).SetDerivativeGlobal(k, -17.4 * (k + 1) + 7.5 *(i + 1));
736         }
737 
738         for (index_type j = 0; j < 3; ++j) {
739             A(i + 1, j + 1).SetValuePreserve(100*(i + 1) + j + 1);
740             A(i + 1, j + 1).DerivativeResizeReset(&dof, 0L, N_SIZE, MapVectorBase::GLOBAL, 0.);
741             for (index_type k = 0; k < N_SIZE; ++k) {
742             	A(i + 1, j + 1).SetDerivativeGlobal(k, -9.2 * (k + 1.) + 10.5 * (i + 1) + 3.2 * (j + 1) + 7.5);
743             }
744         }
745     }
746 
747     Vector<Gradient<N_SIZE>, 3> d, d_C, d_F;
748 
749     callFunc2(&dof, A, b, c, d, d_C, d_F);
750 
751     Matrix<Gradient<N_SIZE>, 3, 3> A_2 = A * 2.;
752     Vector<Gradient<N_SIZE>, 3> b_2 = b * 2.;
753 
754     A = Alias(A) * 2.; // Test self assignment
755     b = Alias(b) * 2.;
756 
757     for (index_type i = 1; i < 3; ++i) {
758     	assert(bCompare(b(i), b_2(i)));
759 
760     	for (index_type j = 1; j < 3; ++j) {
761     		assert(bCompare(A(i, j), A_2(i, j)));
762     	}
763     }
764 
765     const doublereal dTol = sqrt(std::numeric_limits<scalar_deriv_type>::epsilon());
766 
767     Matrix<Gradient<N_SIZE>, 3, 3> A_3 = (A + Transpose(A) * 0.75) * (2. / b(1));
768 
769     A += Transpose(Alias(A)) * 0.75;
770     A *= 2.;
771     A /= b(1);
772 
773     for (index_type i = 1; i < 3; ++i) {
774     	for (index_type j = 1; j < 3; ++j) {
775     		assert(bCompare(A(i, j), A_3(i, j), dTol));
776     	}
777     }
778 
779     Matrix<Gradient<N_SIZE>, 3, 3> A_4 = A * A(2, 3);
780     A *= Alias(A(2, 3));
781 
782     for (index_type i = 1; i < 3; ++i) {
783     	for (index_type j = 1; j < 3; ++j) {
784     		assert(bCompare(A(i, j), A_4(i, j), dTol));
785     	}
786     }
787 
788     Vector<Gradient<N_SIZE>, 3> b_3 = b * b(2);
789 
790     b *= Alias(b(2));
791 
792     for (index_type i = 1; i <= 3; ++i) {
793     	assert(bCompare(b(i), b_3(i), dTol));
794     }
795 
796     Vector<Gradient<N_SIZE>, 3> b_4 = b / sqrt(Dot(b, b));
797 
798     b /= sqrt(Dot(Alias(b), b));
799 
800     for (index_type i = 1; i <= 3; ++i) {
801     	assert(bCompare(b(i), b_4(i), dTol));
802     }
803 
804     Vector<Gradient<N_SIZE>, 3> b_5 = ((b + b * 0.75 / b(1)) * (2. / (A(1, 3) + A(2, 3))) * A(2, 1) + Transpose(A).GetCol(3)) / A(1, 1) / 3.8;
805 
806     b += b * 0.75 / Alias(b(1));
807     b *= 2.;
808     b /= (A(1, 3) + A(2, 3));
809     b *= A(2, 1);
810     b += Transpose(A).GetCol(3);
811     b /= A(1, 1);
812     b /= 3.8;
813 
814     for (index_type i = 1; i <= 3; ++i) {
815     	assert(bCompare(b(i), b_5(i), dTol));
816     }
817 
818     Vector<Gradient<N_SIZE>, 2> b23 = SubVector<2, 3>(b);
819     Vector<Gradient<N_SIZE>, 2> b12 = SubVector<1, 2>(b);
820     assert(bCompare(b23(1), b(2)));
821     assert(bCompare(b23(2), b(3)));
822     assert(bCompare(b12(1), b(1)));
823     assert(bCompare(b12(2), b(2)));
824     Vector<Gradient<N_SIZE>, 2> e = SubVector<2, 3>(b / 2.) + SubVector<1, 2>(b * 3.);
825     assert(bCompare(e(1), Gradient<N_SIZE>(b(2) / 2. + b(1) * 3), dTol));
826     assert(bCompare(e(2), Gradient<N_SIZE>(b(3) / 2. + b(2) * 3), dTol));
827 
828     A = Alias(A) * 0.5 + Transpose(A) * 3.5;
829     b = Alias(b) * 2.3 + c * 5.0;
830 }
831 
testMatVecDouble2()832 void testMatVecDouble2() {
833     Matrix<doublereal, 3, 3> A;
834     Vector<doublereal, 3> b, c;
835 
836     for (index_type i = 0; i < 3; ++i) {
837         b(i + 1)=(100*(i + 1));
838         c(i + 1)=(1000*(i + 10));
839 
840         for (index_type j = 0; j < 3; ++j) {
841             A(i + 1, j + 1) = (100*(i + 1) + j + 1);
842         }
843     }
844 
845     Vector<doublereal, 3> d, d_C, d_F;
846 
847     callFunc2(0, A, b, c, d, d_C, d_F);
848 }
849 
testMatVecProduct()850 void testMatVecProduct() {
851     Matrix<doublereal, 3, 4> A;
852     Matrix<doublereal, 3, 3> R;
853     Matrix<doublereal, 4, 2> B;
854 
855     for (index_type i = 0; i < A.iGetNumRows(); ++i) {
856     	for (index_type j = 0; j < A.iGetNumCols(); ++j) {
857     		A(i + 1, j + 1) = 10 * (i + 1) + j + 1;
858     	}
859     }
860 
861     for (index_type i = 0; i < R.iGetNumRows(); ++i) {
862     	for (index_type j = 0; j < R.iGetNumCols(); ++j) {
863     		R(i + 1, j + 1) = 10 * (i + 1) + j + 1;
864     	}
865     }
866 
867     for (index_type i = 0; i < B.iGetNumRows(); ++i) {
868     	for (index_type j = 0; j < B.iGetNumCols(); ++j) {
869     		B(i + 1, j + 1) = (10 * (i + 1) + j + 1);
870     	}
871     }
872 
873     std::cout << "A=" << std::endl << A << std::endl;
874 
875     Matrix<doublereal, 4, 3> A_T = Transpose(A);
876 
877     std::cout << "A^T=" << std::endl << A_T << std::endl;
878 
879     Matrix<doublereal, 4, 3> A_T2 = Transpose(Direct(A));
880 
881     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
882     	for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
883     		assert(bCompare(A(i, j), A_T(j, i)));
884     		assert(bCompare(A(i, j), A_T2(j, i)));
885     	}
886     }
887 
888     Matrix<doublereal, 3, 4> A2 = Transpose(Transpose(A));
889 
890     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
891     	for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
892     		assert(bCompare(A2(i, j), A(i, j)));
893     	}
894     }
895 
896     std::cout << "\nrows of A:" << std::endl;
897 
898     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
899     	Matrix<doublereal, 3, 4>::RowVectorType r = A.GetRow(i);
900     	std::cout << i - 1 << ": ";
901 
902     	for (index_type j = 1; j <= r.iGetNumRows(); ++j) {
903     		assert(r(j) == A(i, j));
904     		std::cout << r(j) << " ";
905     	}
906 
907     	std::cout << std::endl;
908     }
909 
910     std::cout << "\ncolumns of A:" << std::endl;
911 
912     for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
913     	Matrix<doublereal, 3, 4>::ColumnVectorType c = A.GetCol(j);
914     	std::cout << j - 1 << ": ";
915 
916     	for (index_type i = 1; i <= c.iGetNumRows(); ++i) {
917     		assert(c(i) == A(i, j));
918     		std::cout << c(i) << " ";
919     	}
920 
921     	std::cout << std::endl;
922     }
923 
924     Matrix<doublereal, 3, 4> D = -A;
925     Matrix<doublereal, 3, 3> E = -(A * Transpose(A));
926 
927 
928     for (int i = 1; i <= 3; ++i) {
929     	for (int j = 1; j <= 3; ++j) {
930     		assert(bCompare(D(i, j), -A(i, j)));
931     	}
932     }
933 
934     assert(E(1, 1) == -630);
935     assert(E(1, 2) == -1130);
936     assert(E(1, 3) == -1630);
937     assert(E(2, 1) == -1130);
938     assert(E(2, 2) == -2030);
939     assert(E(2, 3) == -2930);
940     assert(E(3, 1) == -1630);
941     assert(E(3, 2) == -2930);
942     assert(E(3, 3) == -4230);
943 
944     Vector<doublereal, 4> x;
945     Vector<doublereal, 3> b;
946 
947     for (index_type i = 0; i < x.iGetNumRows(); ++i) {
948     	x(i + 1) = 100 * (i + 1);
949     }
950 
951     for (index_type i = 1; i <= b.iGetNumRows(); ++i) {
952     	b(i) = Dot(A.GetRow(i), x);
953     }
954 
955     Vector<Gradient<0>, 3> grad_b = Direct(b);
956     Vector<doublereal, 3> b2 = A * x;
957     Vector<doublereal, 3> b3 = b + A * x + b;
958     Vector<doublereal, 3> b4 = Direct(A) * Direct(x);
959     Vector<doublereal, 3> b5 = Direct(A) * x;
960     Vector<doublereal, 3> b6 = A * Direct(x);
961     Vector<doublereal, 3> c = R * (A * x);
962     Vector<doublereal, 4> d = Transpose(A) * b;
963     Vector<doublereal, 4> e = Transpose(A) * (A * x);
964     Vector<doublereal, 3> f = A * (Transpose(A) * (A * (x + x)));
965 
966     for (index_type i = 1; i <= d.iGetNumRows(); ++i) {
967     	assert(d(i) == e(i));
968     }
969 
970     Matrix<doublereal, 3, 2> C = Direct(A) * Direct(B);
971     Vector<doublereal, 2> g;
972     g(1) = 523;
973     g(2) = -786;
974     Vector<doublereal, 3> h = A * (B * g);
975     Vector<doublereal, 3> h2 = (Direct(A) * Direct(B)) * g;
976     Vector<doublereal, 3> h3 = (Direct(A) * B) * g;
977     Vector<doublereal, 3> h4 = (A * Direct(B)) * g;
978     Vector<doublereal, 3> h5 = (A * B) * g;
979     Vector<doublereal, 3> h6 = (A * B) * Direct(g);
980 
981     for (index_type i = 1; i <= h.iGetNumRows(); ++i) {
982     	assert(h(i) == h2(i));
983     	assert(h(i) == h3(i));
984     	assert(h(i) == h4(i));
985     	assert(h(i) == h5(i));
986     	assert(h(i) == h6(i));
987     }
988 
989     std::cout << "B=" << std::endl << B << std::endl;
990     std::cout << "A * B  = C" << std::endl << C << std::endl;
991 
992     std::cout << "R=" << std::endl << R << std::endl;
993     std::cout << "x = " << std::endl << x << std::endl;
994     std::cout << "A * x = b" << std::endl << b << std::endl;
995     std::cout << "R * A * x = c" << std::endl << c << std::endl;
996     std::cout << "A^T * b = d" << std::endl << d << std::endl;
997     std::cout << "A^T * A * x = e" << std::endl << e << std::endl;
998     std::cout << "A * A^T * A * 2 * x = f" << std::endl << f << std::endl;
999     std::cout << "A * B * g = h" << std::endl << h << std::endl;
1000 
1001     for (index_type i = 1; i <= b2.iGetNumRows(); ++i) {
1002     	assert(b2(i) == b(i));
1003     	assert(b3(i) == 3 * b(i));
1004     	assert(b4(i) == b(i));
1005     	assert(b5(i) == b(i));
1006     	assert(b6(i) == b(i));
1007     }
1008 
1009     assert(b(1) == 13000);
1010     assert(b(2) == 23000);
1011     assert(b(3) == 33000);
1012 
1013     assert(c(1) == 848000);
1014     assert(c(2) == 1538000);
1015     assert(c(3) == 2228000);
1016 
1017     assert(d(1) == 1649000.00);
1018     assert(d(2) == 1718000.00);
1019     assert(d(3) == 1787000.00);
1020     assert(d(4) == 1856000.00);
1021 
1022     assert(C(1, 1) == 1.35e3);
1023     assert(C(2, 1) == 2.39e3);
1024     assert(C(3, 1) == 3.43e3);
1025     assert(C(1, 2) == 1.4e3);
1026     assert(C(2, 2) == 2.48e3);
1027     assert(C(3, 2) == 3.56e3);
1028 
1029     assert(h(1) == -394350);
1030     assert(h(2) == -699310);
1031     assert(h(3) == -1004270);
1032 
1033     Vector<doublereal, 2> b23 = SubVector<2, 3>(b);
1034     Vector<doublereal, 2> b12 = SubVector<1, 2>(b);
1035     assert(bCompare(b23(1), b(2)));
1036     assert(bCompare(b23(2), b(3)));
1037     assert(bCompare(b12(1), b(1)));
1038     assert(bCompare(b12(2), b(2)));
1039     Vector<doublereal, 2> e123 = SubVector<2, 3>(b / 2.) + SubVector<1, 2>(b * 3.);
1040     assert(bCompare(e123(1), (b(2) / 2. + b(1) * 3)));
1041     assert(bCompare(e123(2), (b(3) / 2. + b(2) * 3)));
1042 
1043     Matrix<doublereal, 5, 7> F;
1044 
1045     for (index_type i = 1; i <= 5; ++i) {
1046     	for (index_type j = 1; j <= 7; ++j) {
1047     		F(i, j) = i * 10 + j;
1048     	}
1049     }
1050 
1051     Matrix<doublereal, 2, 3> G = SubMatrix<3, 4, 5, 7>(F);
1052 
1053     for (index_type i = 1; i <= 2; ++i) {
1054     	for (index_type j = 1; j <= 3; ++j) {
1055     		assert(bCompare(G(i, j), F(i + 2, j + 4)));
1056     	}
1057     }
1058 
1059     Vector<doublereal, 3> G1 = SubMatrix<4, 4, 2, 4>(F).GetRow(1);
1060 
1061     assert(bCompare(G1(1), F(4, 2)));
1062     assert(bCompare(G1(2), F(4, 3)));
1063     assert(bCompare(G1(3), F(4, 4)));
1064 
1065     Vector<doublereal, 2> G2 = SubMatrix<1, 2, 5, 7>(F).GetCol(1);
1066 
1067     assert(bCompare(G2(1), F(1, 5)));
1068     assert(bCompare(G2(2), F(2, 5)));
1069 
1070     Vector<doublereal, 2> G3 = SubMatrix<1, 2, 5, 7>(F).GetCol(2);
1071 
1072     assert(bCompare(G3(1), F(1, 6)));
1073     assert(bCompare(G3(2), F(2, 6)));
1074 
1075 
1076     Vector<doublereal, 2> G4 = SubMatrix<1, 2, 5, 7>(F).GetCol(3);
1077 
1078     assert(bCompare(G4(1), F(1, 7)));
1079     assert(bCompare(G4(2), F(2, 7)));
1080 
1081     Matrix<doublereal, 3, 4> M = SubMatrix<2, 4, 2, 5>(F) * SubMatrix<2, 5, 4, 7>(F);
1082 
1083     assert(bCompare(M(1, 1), 3716.));
1084     assert(bCompare(M(1, 2), 3810.));
1085     assert(bCompare(M(1, 3), 3904.));
1086     assert(bCompare(M(1, 4), 3998.));
1087     assert(bCompare(M(2, 1), 5276.));
1088     assert(bCompare(M(2, 2), 5410.));
1089     assert(bCompare(M(2, 3), 5544.));
1090     assert(bCompare(M(2, 4), 5678.));
1091     assert(bCompare(M(3, 1), 6836.));
1092     assert(bCompare(M(3, 2), 7010.));
1093     assert(bCompare(M(3, 3), 7184.));
1094     assert(bCompare(M(3, 4), 7358.));
1095 
1096     std::cout << "F=\n" << Tabular(F, 5) << std::endl;
1097     std::cout << "G=\n" << Tabular(G, 5) << std::endl;
1098     std::cout << "G1=\n" << G1 << std::endl;
1099     std::cout << "G2=\n" << G2 << std::endl;
1100     std::cout << "G3=\n" << G3 << std::endl;
1101     std::cout << "G4=\n" << G4 << std::endl;
1102     std::cout << "M=\n" << Tabular(M, 5) << std::endl;
1103 }
1104 
1105 namespace testMatVecProductGradient_testData {
1106 
1107 template <typename S, index_type N_rows, index_type N_SIZE>
testGradient(const S & ref,const Vector<Gradient<N_SIZE>,N_rows> & v,doublereal dTol)1108 void testGradient(const S& ref, const Vector<Gradient<N_SIZE>, N_rows>& v, doublereal dTol) {
1109 	for (index_type i = 0; i < index_type(sizeof(ref.val)/sizeof(ref.val[0])); ++i) {
1110 		assert(bCompare(v(i + 1).dGetValue(), ref.val[i], dTol));
1111 		for (index_type j = 0; j < index_type(sizeof(ref.der[0])/sizeof(ref.der[0][0])); ++j) {
1112 			std::cout << "v(" << i + 1 << ")=" << v(i + 1).dGetDerivativeGlobal(j + 1) << std::endl;
1113 			std::cout << "ref.der[" << i << "][" << j << "]=" << ref.der[i][j] << std::endl;
1114 
1115 			const bool bOK = bCompare<scalar_deriv_type>(v(i + 1).dGetDerivativeGlobal(j + 1), ref.der[i][j], dTol);
1116 			std::cout << "dTol=" << dTol << " :[" << (bOK ? "OK" : "NOK") << "]" << std::endl;
1117 			assert(bOK);
1118 		}
1119 	}
1120 }
1121 
1122 template <typename S, index_type N_SIZE>
testGradient(const S & ref,const Gradient<N_SIZE> & g,doublereal dTol)1123 void testGradient(const S& ref, const Gradient<N_SIZE>& g, doublereal dTol) {
1124 	for (index_type i = 0; i < index_type(sizeof(ref.val)/sizeof(ref.val[0])); ++i) {
1125 		assert(bCompare(g.dGetValue(), ref.val[i], dTol));
1126 		for (index_type j = 0; j < index_type(sizeof(ref.der[0])/sizeof(ref.der[0][0])); ++j) {
1127 			assert(bCompare<scalar_deriv_type>(g.dGetDerivativeGlobal(j + 1), ref.der[i][j], dTol));
1128 		}
1129 	}
1130 }
1131 
1132 static const struct test_b {
1133 doublereal val[3];
1134 doublereal der[3][12];
1135 } oct_b = {
1136 {6300,
1137 11300,
1138 16300},
1139 {{220, 0, 0, 240, 0, 0, 260, 0, 0, 280, 0, 0},
1140 {210, 110, 0, 220, 120, 0, 230, 130, 0, 240, 140, 0},
1141 {310, 0, 110, 320, 0, 120, 330, 0, 130, 340, 0, 140}}};
1142 
1143 static const struct test_c {
1144 doublereal val[3];
1145 doublereal der[3][12];
1146 } oct_c = {
1147 {-6300,
1148 -11300,
1149 -16300},
1150 {{-220, 0, 0, -240, 0, 0, -260, 0, 0, -280, 0, 0},
1151 {-210, -110, 0, -220, -120, 0, -230, -130, 0, -240, -140, 0},
1152 {-310, 0, -110, -320, 0, -120, -330, 0, -130, -340, 0, -140}}};
1153 
1154 static const struct test_d {
1155 doublereal val[3];
1156 doublereal der[3][12];
1157 } oct_d = {
1158 {0,
1159 0,
1160 0},
1161 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1162 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1163 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}};
1164 
1165 static const struct test_e {
1166 doublereal val[3];
1167 doublereal der[3][12];
1168 } oct_e = {
1169 {0,
1170 0,
1171 0},
1172 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1173 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1174 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}};
1175 
1176 static const struct test_f {
1177 doublereal val[3];
1178 doublereal der[3][12];
1179 } oct_f = {
1180 {0,
1181 0,
1182 0},
1183 {{0, 0, -0, 0, 0, -0, 0, 0, -0, 0, 0, -0},
1184 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1185 {0, -0, 0, 0, -0, 0, 0, -0, 0, 0, -0, 0}}};
1186 
1187 static const struct test_g {
1188 doublereal val[3];
1189 doublereal der[3][12];
1190 } oct_g = {
1191 {6300,
1192 11300,
1193 16300},
1194 {{220, 0, 0, 240, 0, 0, 260, 0, 0, 280, 0, 0},
1195 {210, 110, 0, 220, 120, 0, 230, 130, 0, 240, 140, 0},
1196 {310, 0, 110, 320, 0, 120, 330, 0, 130, 340, 0, 140}}};
1197 
1198 static const struct test_i {
1199 doublereal val[3];
1200 doublereal der[3][12];
1201 } oct_i = {
1202 {0,
1203 0,
1204 -0},
1205 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1206 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1207 {-0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0}}};
1208 
1209 static const struct test_j {
1210 doublereal val[3];
1211 doublereal der[3][12];
1212 } oct_j = {
1213 {-74031.42857142857,
1214 -50797.14285714286,
1215 63828.57142857143},
1216 {{-1390.571428571429, -389.7142857142857, -229.4285714285714, -1446.857142857143, -425.1428571428572, -250.2857142857143, -1503.142857142857, -460.5714285714286, -271.1428571428572, -1559.428571428571, -496, -292},
1217 {-611.1428571428571, 0, -493.4285714285714, -585.1428571428571, 0, -538.2857142857143, -559.1428571428571, 0, -583.1428571428571, -533.1428571428571, 0, -628},
1218 {1400.857142857143, 493.4285714285714, 0, 1487.428571428571, 538.2857142857143, 0, 1574, 583.1428571428571, 0, 1660.571428571429, 628, 0}}};
1219 
1220 static const struct test_l {
1221 doublereal val[3];
1222 doublereal der[3][12];
1223 } oct_l = {
1224 {163936.0000000001,
1225 -1920098.285714285,
1226 -1337944.571428571},
1227 {{-2648.085714285714, -3602.028571428571, 6118.514285714286, -3602.457142857142, -3929.485714285714, 6674.742857142858, -4556.82857142857, -4256.942857142857, 7230.971428571428, -5511.200000000002, -4584.4, 7787.2},
1228 {-39236.54285714286, -12579.28571428571, -2844.914285714286, -41293.65714285714, -13722.85714285714, -3103.542857142857, -43350.77142857143, -14866.42857142857, -3362.171428571429, -45407.88571428572, -16010, -3620.8},
1229 {-19746.11428571428, -2844.914285714286, -9421.657142857142, -19748.8, -3103.542857142857, -10278.17142857143, -19751.48571428571, -3362.171428571428, -11134.68571428571, -19754.17142857143, -3620.8, -11991.2}}};
1230 
1231 static const struct test_m {
1232 doublereal val[3];
1233 doublereal der[3][12];
1234 } oct_m = {
1235 {819680.0000000002,
1236 -9600491.428571427,
1237 -6689722.857142856},
1238 {{-13240.42857142857, -18010.14285714286, 30592.57142857143, -18012.28571428571, -19647.42857142857, 33373.71428571429, -22784.14285714285, -21284.71428571429, 36154.85714285714, -27556.00000000001, -22922, 38936},
1239 {-196182.7142857143, -62896.42857142857, -14224.57142857143, -206468.2857142857, -68614.28571428572, -15517.71428571428, -216753.8571428572, -74332.14285714284, -16810.85714285714, -227039.4285714286, -80050, -18104},
1240 {-98730.57142857142, -14224.57142857143, -47108.28571428571, -98743.99999999997, -15517.71428571429, -51390.85714285714, -98757.42857142858, -16810.85714285714, -55673.42857142857, -98770.85714285713, -18104, -59956}}};
1241 
1242 static const struct test_r {
1243 doublereal val[1];
1244 doublereal der[1][12];
1245 } oct_r = {
1246 {218540},
1247 {{5765, -803, 1364, 6130, -876, 1488, 6495, -949, 1612, 6860, -1022, 1736}}};
1248 
1249 static const struct test_s {
1250 doublereal val[1];
1251 doublereal der[1][12];
1252 } oct_s = {
1253 {218540},
1254 {{5765, -803, 1364, 6130, -876, 1488, 6495, -949, 1612, 6860, -1022, 1736}}};
1255 
1256 static const struct test_t {
1257 doublereal val[1];
1258 doublereal der[1][12];
1259 } oct_t = {
1260 {433070000},
1261 {{17624000, 2486000, 3586000, 18428000, 2712000, 3912000, 19232000, 2938000, 4238000, 20036000, 3164000, 4564000}}};
1262 
1263 static const struct test_norm_g {
1264 double val[1];
1265 doublereal der[1][12];
1266 } oct_norm_g = {
1267 {20810.33397137105},
1268 {{423.4434686210582, 59.72994002450923, 86.15911702650448, 442.760794357062, 65.15993457219189, 93.99176402891398, 462.0781200930658, 70.58992911987455, 101.8244110313235, 481.3954458290696, 76.01992366755721, 109.657058033733}}};
1269 
1270 }
1271 
1272 template <typename T, index_type N_rows>
1273 inline T
Norm_1(const Vector<T,N_rows> & u)1274 Norm_1(const Vector<T, N_rows>& u) {
1275 	return sqrt(Dot(u, u));
1276 }
1277 
1278 extern "C" void __FC_DECL__(func2addad_dv)(const doublereal x[],
1279 										   const doublereal xd[],
1280 										   const doublereal y[],
1281 										   const doublereal yd[],
1282 										   doublereal z[],
1283 										   doublereal zd[],
1284 										   const integer& n,
1285 										   const integer& nbdirs);
1286 
1287 extern "C" void __FC_DECL__(func2mulad_dv)(const doublereal x[],
1288 										   const doublereal xd[],
1289 										   const doublereal y[],
1290 										   const doublereal yd[],
1291 										   doublereal z[],
1292 										   doublereal zd[],
1293 										   const integer& n,
1294 										   const integer& nbdirs);
1295 
1296 template <typename T, index_type iRowCount>
doVecAdd(const Vector<T,iRowCount> & x,const Vector<T,iRowCount> & y,Vector<T,iRowCount> & z)1297 void doVecAdd(const Vector<T, iRowCount>& x, const Vector<T, iRowCount>& y, Vector<T, iRowCount>& z)
1298 {
1299 	z = x + y;
1300 }
1301 
1302 template <typename T, index_type iRowCount>
doVecMul(const Vector<T,iRowCount> & x,const Vector<T,iRowCount> & y,Vector<T,iRowCount> & z)1303 void doVecMul(const Vector<T, iRowCount>& x, const Vector<T, iRowCount>& y, Vector<T, iRowCount>& z)
1304 {
1305 	for (index_type i = 1; i <= z.iGetNumRows(); ++i)
1306 	{
1307 		z(i) = x(i) * y(i);
1308 	}
1309 }
1310 
1311 template <index_type iRowCount, index_type iMaxDeriv, typename Function, typename FunctionF77>
testVecOp(const int M,const int N,Function f,FunctionF77 f77,const char * function)1312 void testVecOp(const int M, const int N, Function f, FunctionF77 f77, const char* function) {
1313     Vector<Gradient<iMaxDeriv>, iRowCount> x, y, z;
1314     LocalDofMap dof;
1315 
1316     for (index_type i = 1; i <= x.iGetNumRows(); ++i) {
1317 		x(i).SetValuePreserve(i * 100);
1318 		x(i).DerivativeResizeReset(&dof, 0, N, MapVectorBase::GLOBAL, 0.);
1319 		y(i).SetValuePreserve(i);
1320 		y(i).DerivativeResizeReset(&dof, 0, N, MapVectorBase::GLOBAL, 0.);
1321 
1322         for (int j = 0; j < N; ++j) {
1323 			x(i).SetDerivativeGlobal(j, (j + 1));
1324 			y(i).SetDerivativeGlobal(j, (j + 1));
1325 		}
1326     }
1327 
1328     double start = mbdyn_clock_time();
1329 
1330     for (int i = 0; i < M; ++i) {
1331     	f(x, y, z);
1332     }
1333 
1334     const double dt = mbdyn_clock_time() - start;
1335 
1336     std::cerr << "C++: testVecAdd<" << iRowCount << "," << iMaxDeriv << ">(" << M << ", " << N << ", \"" << function << "\"):" << dt << std::endl;
1337 
1338     doublereal xF[iRowCount], yF[iRowCount], zF[iRowCount];
1339     doublereal *xdF = new doublereal[iRowCount*N];
1340     doublereal *ydF = new doublereal[iRowCount*N];
1341     doublereal *zdF = new doublereal[iRowCount*N];
1342 
1343     for (int i = 0; i < iRowCount; ++i) {
1344     	xF[i] = x(i + 1).dGetValue();
1345     	yF[i] = y(i + 1).dGetValue();
1346     	zF[i] = 0.;
1347 
1348     	for (int j = 0; j < N; ++j) {
1349     		xdF[i * N + j] = x(i + 1).dGetDerivativeLocal(j);
1350     		ydF[i * N + j] = y(i + 1).dGetDerivativeLocal(j);
1351     		zdF[i * N + j] = 0.;
1352     	}
1353     }
1354 
1355     start = mbdyn_clock_time();
1356 
1357     for (int i = 0; i < M; ++i) {
1358     	f77(xF, xdF, yF, ydF, zF, zdF, iRowCount, N);
1359     }
1360 
1361     const double dtF77 = mbdyn_clock_time() - start;
1362 
1363     std::cerr << "F77: testVecAdd<" << iRowCount << "," << iMaxDeriv << ">(" << M << ", " << N << ", \"" << function << "\"):" << dtF77 << std::endl;
1364     std::cerr << "overhead=" << dt/std::max(std::numeric_limits<doublereal>::epsilon(), dtF77) << std::endl;
1365 
1366     for (int i = 0; i < iRowCount; ++i) {
1367     	assert(xF[i] == x(i + 1).dGetValue());
1368     	assert(yF[i] == y(i + 1).dGetValue());
1369     	assert(zF[i] == z(i + 1).dGetValue());
1370 
1371     	for (int j = 0; j < N; ++j) {
1372     		assert(xdF[i * N + j] == x(i + 1).dGetDerivativeLocal(j));
1373     		assert(ydF[i * N + j] == y(i + 1).dGetDerivativeLocal(j));
1374     		assert(zdF[i * N + j] == z(i + 1).dGetDerivativeLocal(j));
1375     	}
1376     }
1377 
1378     delete [] xdF;
1379     delete [] ydF;
1380     delete [] zdF;
1381 }
1382 
testMatVecProductGradient()1383 void testMatVecProductGradient() {
1384     Matrix<Gradient<0>, 3, 4> A;
1385     LocalDofMap dof;
1386 
1387     for (index_type i = 0; i < A.iGetNumRows(); ++i) {
1388     	for (index_type j = 0; j < A.iGetNumCols(); ++j) {
1389     		A(i + 1, j + 1).SetValue(10 * (i + 1) + j + 1);
1390     		A(i + 1, j + 1).DerivativeResizeReset(&dof, j * A.iGetNumRows() + i + 1, MapVectorBase::GLOBAL, 0.);
1391     		A(i + 1, j + 1).SetDerivativeGlobal(j * A.iGetNumRows() + i + 1, 1.);
1392     	}
1393     }
1394 
1395     Matrix<Gradient<0>, 3, 4> B1;
1396     Matrix<doublereal, 3, 4> Ad;
1397 
1398     for (int i = 1; i <= 3; ++i) {
1399     	for (int j = 1; j <= 4; ++j) {
1400     		B1(i, j) = 3.5 * A(i, j);
1401     		Ad(i, j) = A(i, j).dGetValue();
1402     	}
1403     }
1404 
1405     Matrix<Gradient<0>, 3, 4> C1 = A + B1;
1406     Matrix<Gradient<0>, 3, 4> D1 = (A + B1) - (C1 + B1);
1407     Matrix<Gradient<0>, 3, 4> E1 = A + (C1 - B1);
1408     Matrix<Gradient<0>, 3, 4> F1 = (C1 - B1) - A;
1409     Matrix<Gradient<0>, 3, 4> G1 = A - B1;
1410     Matrix<Gradient<0>, 3, 4> H1 = A * 3.5 + B1 / 4.;
1411     Matrix<Gradient<0>, 3, 4> I1 = (A * 2. - B1 / 5.) * 3.5;
1412     Matrix<Gradient<0>, 3, 4> J1 = A * B1(1, 1) + I1 * H1(2, 2);
1413     Matrix<Gradient<0>, 3, 4> K1 = (I1 + J1) * H1(3, 4);
1414 
1415     const doublereal dTol = 10 * std::numeric_limits<scalar_deriv_type>::epsilon();
1416 
1417     for (int i = 1; i <= 3; ++i) {
1418     	for (int j = 1; j <= 4; ++j) {
1419     		assert(bCompare(C1(i, j), Gradient<0>(A(i, j) + B1(i, j)), dTol));
1420     		assert(bCompare(D1(i, j), Gradient<0>(A(i, j) + B1(i, j) - C1(i, j) - B1(i, j)), dTol));
1421     		assert(bCompare(E1(i, j), Gradient<0>(A(i, j) + C1(i, j) - B1(i, j)), dTol));
1422     		assert(bCompare(F1(i, j), Gradient<0>(C1(i, j) - B1(i, j) - A(i, j)), dTol));
1423     		assert(bCompare(G1(i, j), Gradient<0>(A(i, j) - B1(i, j)), dTol));
1424     		assert(bCompare(H1(i, j), Gradient<0>(A(i, j) * 3.5 + B1(i, j) / 4.), dTol));
1425     		assert(bCompare(I1(i, j), Gradient<0>((A(i, j) * 2. - B1(i, j) / 5.) * 3.5), dTol));
1426     		assert(bCompare(J1(i, j), Gradient<0>(A(i, j) * B1(1, 1) + I1(i, j) * H1(2, 2)), dTol));
1427     		assert(bCompare(K1(i, j), Gradient<0>((I1(i, j) + J1(i, j)) * H1(3, 4)), dTol));
1428     	}
1429     }
1430 
1431     std::cout << "A=" << std::endl << A << std::endl;
1432 
1433     std::cout << "A=" << std::endl << A << std::endl;
1434 
1435     Matrix<Gradient<0>, 4, 3> A_T = Transpose(A);
1436 
1437     std::cout << "A^T=" << std::endl << A_T << std::endl;
1438 
1439     Matrix<Gradient<0>, 4, 3> A_T2 = Transpose(Direct(A));
1440 
1441     Matrix<Gradient<0>, 3, 4> B = -A;
1442 
1443     Matrix<Gradient<0>, 3, 3> D = -(A * Transpose(A));
1444 
1445     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
1446     	for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
1447     		assert(bCompare(A(i, j), A_T(j, i)));
1448     		assert(bCompare(A(i, j), A_T2(j, i)));
1449     		assert(bCompare(B(i, j), Gradient<0>(-A(i, j))));
1450     	}
1451     }
1452 
1453     Matrix<Gradient<0>, 3, 4> A2 = Transpose(Transpose(A));
1454 
1455     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
1456     	for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
1457     		assert(bCompare(A2(i, j), A(i, j)));
1458     	}
1459     }
1460 
1461     std::cout << "\nrows of A:" << std::endl;
1462 
1463     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
1464     	Matrix<Gradient<0>, 3, 4>::RowVectorType r = A.GetRow(i);
1465     	std::cout << i - 1 << ": ";
1466 
1467     	for (index_type j = 1; j <= r.iGetNumRows(); ++j) {
1468     		assert(bCompare(r(j), A(i, j), std::numeric_limits<scalar_deriv_type>::epsilon()));
1469     		std::cout << r(j) << " ";
1470     	}
1471 
1472     	std::cout << std::endl;
1473     }
1474 
1475     std::cout << "\ncolumns of A:" << std::endl;
1476 
1477     for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
1478     	Matrix<Gradient<0>, 3, 4>::ColumnVectorType c = A.GetCol(j);
1479     	std::cout << j - 1 << ": ";
1480 
1481     	for (index_type i = 1; i <= c.iGetNumRows(); ++i) {
1482     		assert(bCompare(c(i), A(i, j), std::numeric_limits<scalar_deriv_type>::epsilon()));
1483     		std::cout << c(i) << " ";
1484     	}
1485 
1486     	std::cout << std::endl;
1487     }
1488 
1489     Vector<Gradient<0>, 4> x;
1490     Vector<Gradient<0>, 3> b;
1491     Vector<doublereal, 4> v;
1492 
1493     for (index_type i = 1; i <= x.iGetNumRows(); ++i) {
1494     	x(i) = A(1, i) * 10.;
1495     	v(i) = x(i).dGetValue();
1496     }
1497 
1498     Vector<Gradient<0>, 3> b_1;
1499     Gradient<0> b1_2;
1500     b1_2 = A(1, 1) * x(1);
1501     b1_2 += A(1, 2) * x(2);
1502     b1_2 += A(1, 3) * x(3);
1503     b1_2 += A(1, 4) * x(4);
1504 
1505     b_1(1) = A(1, 1) * x(1) + A(1, 2) * x(2) + A(1, 3) * x(3) + A(1, 4) * x(4);
1506 
1507     assert(b1_2.bIsEqual(b_1(1)));
1508 
1509     b_1(2) = A(2, 1) * x(1) + A(2, 2) * x(2) + A(2, 3) * x(3) + A(2, 4) * x(4);
1510     b_1(3) = A(3, 1) * x(1) + A(3, 2) * x(2) + A(3, 3) * x(3) + A(3, 4) * x(4);
1511 
1512 
1513     using namespace testMatVecProductGradient_testData;
1514     testGradient(oct_b, b_1, dTol);
1515 
1516     for (index_type i = 1; i <= b.iGetNumRows(); ++i) {
1517     	b(i) = Dot(A.GetRow(i), x);
1518     }
1519 
1520     testGradient(oct_b, b, dTol);
1521 
1522     Vector<Gradient<0>, 3> b2 = A * x;
1523     Vector<Gradient<0>, 3> b2d = Ad * x;
1524     Vector<Gradient<0>, 3> b2d1 = -Ad * x - Ad * x;
1525     Vector<Gradient<0>, 3> b3 = b + A * x + b2;
1526     Vector<Gradient<0>, 3> b4 = A * (x + x);
1527     Vector<Gradient<0>, 3> b5 = Direct(A) * Direct(x);
1528     Vector<Gradient<0>, 3> b6 = Direct(A) * x;
1529     Vector<Gradient<0>, 3> b7 = A * Direct(x);
1530     Vector<Gradient<0>, 3> b8 = A * v;
1531     Vector<Gradient<0>, 3> c = -(A * x);
1532     Vector<Gradient<0>, 3> d = Cross(c, b) * 5.;
1533     Vector<Gradient<0>, 3> e = Cross(c + d, b) / 2.;
1534     Vector<Gradient<0>, 3> f = Cross(e, d + c) - e;
1535     Vector<Gradient<0>, 3> g = Cross(d + e, f - c) + b;
1536     Matrix<Gradient<0>, 3, 3> skew_g(MatCrossVec(g));
1537     Matrix<Gradient<0>, 3, 3> skew_gmf(MatCrossVec(g - f));
1538     Vector<Gradient<0>, 3> gmf = g - f;
1539     Gradient<0> norm_Ax = Norm(A * x);
1540     Gradient<0> z = Norm(A * x) / Norm(Ad * x) + (Norm(A * x) / Norm(Ad * x) - Norm(A * x) / Norm(Ad * x)) * exp(-pow(Norm(x) / 5., 3./2.));
1541     doublereal zd = Norm(Ad * v) / Norm(Ad * v) + (Norm(Ad * v) / Norm(Ad * v) - Norm(Ad * v) / Norm(Ad * v)) * exp(-pow(Norm(v) / 5., 3./2.));
1542 
1543     for (int i = 1; i <= 3; ++i) {
1544     	for (int j = 1; j <= 3; ++j) {
1545     		if (i == j) {
1546     			assert(skew_g(i, j).bIsEqual(Gradient<0>()));
1547     			assert(skew_gmf(i, j).bIsEqual(Gradient<0>()));
1548     		} else {
1549     			assert(skew_g(i, j).bIsEqual(-skew_g(j, i)));
1550     			assert(skew_gmf(i, j).bIsEqual(-skew_gmf(j, i)));
1551     		}
1552     	}
1553     }
1554 
1555     assert(skew_g(1, 2).bIsEqual(-g(3)));
1556     assert(skew_g(2, 1).bIsEqual(g(3)));
1557     assert(skew_g(1, 3).bIsEqual(g(2)));
1558     assert(skew_g(3, 1).bIsEqual(-g(2)));
1559     assert(skew_g(2, 3).bIsEqual(-g(1)));
1560     assert(skew_g(3, 2).bIsEqual(g(1)));
1561 
1562     assert(skew_gmf(1, 2).bIsEqual(-gmf(3)));
1563     assert(skew_gmf(2, 1).bIsEqual(gmf(3)));
1564     assert(skew_gmf(1, 3).bIsEqual(gmf(2)));
1565     assert(skew_gmf(3, 1).bIsEqual(-gmf(2)));
1566     assert(skew_gmf(2, 3).bIsEqual(-gmf(1)));
1567     assert(skew_gmf(3, 2).bIsEqual(gmf(1)));
1568 
1569     Vector<doublereal, 3> h;
1570     h(1) = 15.7;
1571     h(2) = -7.3;
1572     h(3) = 12.4;
1573 
1574     Vector<Gradient<0>, 3> i = Cross(d, h) * 5.7;
1575     Vector<Gradient<0>, 3> j = Cross(h, g) / 3.5;
1576     Vector<doublereal, 3> k = Cross(h, h) + h * 3.;
1577     Vector<Gradient<0>, 3> l = Cross(h + h, j) / 2.;
1578     Vector<Gradient<0>, 3> m = Cross(h, j - i) * 5.;
1579     Vector<doublereal, 3> n = Cross(h + h, h) + h;
1580     Vector<doublereal, 3> o = Cross(h, h + h) * 2.;
1581     Vector<doublereal, 3> p = Cross(h * 2.5, h/3.) - h;
1582     Gradient<0> r = Dot(h, g);
1583     Gradient<0> s = Dot(g, h);
1584     Gradient<0> t = Dot(g, g);
1585     doublereal u = Dot(h, h);
1586     Gradient<0> r1 = Dot(Direct(h), g);
1587     Gradient<0> r2 = Dot(h, Direct(g));
1588     Gradient<0> r3 = Dot(Direct(h), Direct(g));
1589     Gradient<0> norm_g1 = Norm(g);
1590     Gradient<0> norm_g2 = Norm(Cross(d + e, f - c) + b);
1591 
1592     assert(r.bIsEqual(r1));
1593     assert(r.bIsEqual(r2));
1594     assert(r.bIsEqual(r3));
1595 
1596     std::cout << "x = " << std::endl << x << std::endl;
1597     std::cout << "b = A * x" << std::endl << b << std::endl;
1598     std::cout << "c = -A * x" << std::endl << c << std::endl;
1599     std::cout << "d = cross(c, b) * 5" << std::endl << d << std::endl;
1600     std::cout << "e = cross(c + d, b) / 2" << std::endl << e << std::endl;
1601     std::cout << "f = cross(e, d + c) - e" << std::endl << f << std::endl;
1602     std::cout << "g = cross(d + e, f - c) + b" << std::endl << g << std::endl;
1603     std::cout << "h = " << std::endl << h << std::endl;
1604     std::cout << "i = cross(d, h) * 5.7" << std::endl << i << std::endl;
1605     std::cout << "j = cross(h, g) / 3.5" << std::endl << j << std::endl;
1606     std::cout << "k = cross(h, h) + h * 3" << std::endl << k << std::endl;
1607     std::cout << "l = cross(h + h, j) / 2" << std::endl << l << std::endl;
1608     std::cout << "m = cross(h, j - i) * 5" << std::endl << m << std::endl;
1609     std::cout << "n = cross(h + h, h) + h" << std::endl << n << std::endl;
1610     std::cout << "o = cross(h, h + h) * 2" << std::endl << o << std::endl;
1611     std::cout << "p = cross(h * 2.5, h / 3) - h" << std::endl << p << std::endl;
1612     std::cout << "r = Dot(h, g) = " << r << std::endl;
1613     std::cout << "s = Dot(g, h) = " << s << std::endl;
1614     std::cout << "t = Dot(g, g) = " << t << std::endl;
1615     std::cout << "u = Dot(h, h) = " << u << std::endl;
1616     std::cout << "norm_g1 = Norm(g) = " << norm_g1 << std::endl;
1617     std::cout << "norm_g2 = Norm(Cross(d + e, f - c) + b) = " << norm_g2 << std::endl;
1618     std::cout << "z=" << z << std::endl;
1619     std::cout << "zd=" << zd << std::endl;
1620 
1621     for (index_type i = 1; i <= b2.iGetNumRows(); ++i) {
1622     	assert(bCompare(b2d(i).dGetValue(), b2(i).dGetValue(), dTol));
1623     	assert(bCompare(b2d1(i).dGetValue(), -2 * b2(i).dGetValue(), dTol));
1624     	assert(bCompare(b2(i), b(i), dTol));
1625     	assert(bCompare(b3(i), Gradient<0>(3 * b(i)), dTol));
1626     	assert(bCompare(b4(i), Gradient<0>(2 * b(i)), dTol));
1627     	assert(bCompare(b5(i), b(i), dTol));
1628     	assert(bCompare(b6(i), b(i), dTol));
1629     	assert(bCompare(b7(i), b(i), dTol));
1630     	assert(bCompare<scalar_deriv_type>(b8(i).dGetValue(), b(i).dGetValue(), dTol));
1631     	assert(bCompare(c(i), Gradient<0>(-b(i)), dTol));
1632     }
1633 
1634 
1635     assert(bCompare(b(1).dGetValue(), 6300., dTol));
1636     assert(bCompare(b(2).dGetValue(), 11300., dTol));
1637     assert(bCompare(b(3).dGetValue(), 16300., dTol));
1638 
1639     assert(bCompare(D(1, 1).dGetValue(), -630., dTol));
1640     assert(bCompare(D(1, 2).dGetValue(), -1130., dTol));
1641     assert(bCompare(D(1, 3).dGetValue(), -1630., dTol));
1642     assert(bCompare(D(2, 1).dGetValue(), -1130., dTol));
1643     assert(bCompare(D(2, 2).dGetValue(), -2030., dTol));
1644     assert(bCompare(D(2, 3).dGetValue(), -2930., dTol));
1645     assert(bCompare(D(3, 1).dGetValue(), -1630., dTol));
1646     assert(bCompare(D(3, 2).dGetValue(), -2930., dTol));
1647     assert(bCompare(D(3, 3).dGetValue(), -4230., dTol));
1648 
1649     testGradient(oct_b, b, dTol);
1650     testGradient(oct_c, c, dTol);
1651     testGradient(oct_d, d, dTol);
1652     testGradient(oct_e, e, dTol);
1653     testGradient(oct_f, f, dTol);
1654     testGradient(oct_g, g, dTol);
1655     testGradient(oct_i, i, dTol);
1656     testGradient(oct_j, j, dTol);
1657     testGradient(oct_l, l, dTol);
1658     testGradient(oct_m, m, dTol);
1659     testGradient(oct_r, r, dTol);
1660     testGradient(oct_s, s, dTol);
1661     testGradient(oct_t, t, dTol);
1662     testGradient(oct_norm_g, norm_g1, dTol);
1663     testGradient(oct_norm_g, norm_g2, dTol);
1664 
1665     A.Reset();
1666     x.Reset();
1667     b.Reset();
1668     std::cout << "after reset ...\n";
1669     std::cout << "A=" << A << std::endl;
1670     std::cout << "x=" << x << std::endl;
1671     std::cout << "b=" << b << std::endl;
1672 }
1673 
1674 template <index_type N_SIZE>
testMatVecProductGradient2(index_type iNumDeriv,int N)1675 void testMatVecProductGradient2(index_type iNumDeriv, int N) {
1676     Matrix<Gradient<N_SIZE>, 3, 5> A;
1677     Matrix<Gradient<N_SIZE>, 5, 7> B;
1678     LocalDofMap dof;
1679 
1680     srand(0);
1681 
1682     for (index_type i = 0; i < A.iGetNumRows(); ++i) {
1683     	for (index_type j = 0; j < A.iGetNumCols(); ++j) {
1684     		A(i + 1, j + 1).SetValue(10. * (i + 1) + j + 1 + rand());
1685     		A(i + 1, j + 1).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::GLOBAL, 0.);
1686             for (int k = 0; k < iNumDeriv; ++k) {
1687                 A(i + 1, j + 1).SetDerivativeGlobal(k, 1000. * (i + 1) + 100. * (j + 1) + k + 1 + rand());
1688             }
1689     	}
1690     }
1691 
1692     for (index_type i = 0; i < B.iGetNumRows(); ++i) {
1693     	for (index_type j = 0; j < B.iGetNumCols(); ++j) {
1694     		B(i + 1, j + 1).SetValue(1000. * (i + 1) + 100. * (j + 1) + rand());
1695     		B(i + 1, j + 1).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::GLOBAL, 0.);
1696             for (int k = 0; k < iNumDeriv; ++k) {
1697                 B(i + 1, j + 1).SetDerivativeGlobal(k, 10000. * (i + 1) + 1000. * (j + 1) + 10. * (k + 1) + rand());
1698             }
1699     	}
1700     }
1701 
1702     Matrix<Gradient<N_SIZE>, 3, 7> C;
1703 
1704     double start = mbdyn_clock_time();
1705 
1706     for (int i = 0; i < N; ++i) {
1707         C = A * B;
1708     }
1709 
1710     double dt = (mbdyn_clock_time() - start) / N;
1711 
1712     std::cerr << "iMaxDerivatives=" << iNumDeriv << std::endl;
1713     std::cerr << "dt=" << dt << std::endl;
1714     std::cout << "A=\n" << A << std::endl;
1715     std::cout << "B=\n" << B << std::endl;
1716     std::cout << "C=\n" << C << std::endl;
1717 }
1718 
1719 #ifdef HAVE_BLITZ
1720 template <index_type N>
testMatVecDoubleBlitz(doublereal c_C[N])1721 void testMatVecDoubleBlitz(doublereal c_C[N]) {
1722     blitz::TinyVector<doublereal, N> a, b;
1723 
1724     for (index_type i = 0; i < N; ++i) {
1725         a(i) = 100*(i + 1);
1726         b(i) = 1000*(i + 10);
1727     }
1728 
1729     blitz::TinyVector<doublereal, N> c, c1;
1730 
1731 	srand(0);
1732     tic();
1733     for (int i = 0; i < NLoops; ++i) {
1734     	func(0, a, b, c);
1735     }
1736 
1737     for (int i = 0; i < N; ++i) {
1738     	c_C[i] = c(i);
1739     }
1740 
1741     std::cerr << "blitz vector (doublereal): " << toc() << "s" << std::endl;
1742 
1743     std::cout << "a=" << std::endl << a << std::endl;
1744     std::cout << "b=" << std::endl << b << std::endl;
1745     std::cout << "c=" << std::endl << c << std::endl;
1746 }
1747 #endif
1748 
1749 template <index_type N_rows>
testMatVecCopy()1750 void testMatVecCopy() {
1751 	std::cerr << "---------------------------\ntestMatVecCopy<" << N_rows << ">()\n";
1752 
1753 	LocalDofMap dofMap1;
1754 	Vector<Gradient<0>, N_rows> v;
1755 
1756 	for (int i = 1; i <= v.iGetNumRows(); ++i) {
1757 		v(i).SetValuePreserve(i);
1758 		v(i).DerivativeResizeReset(&dofMap1, i, MapVectorBase::GLOBAL, i);
1759 	}
1760 
1761 	std::cout << "v=" << std::endl << v << std::endl;
1762 
1763 	for (int i = 2; i <= v.iGetNumRows() - 1; ++i) {
1764 		LocalDofMap dofMap2;
1765 		Gradient<4> g1(v(1), &dofMap2), gi(v(i), &dofMap2), gim1(v(i - 1), &dofMap2), gip1(v(i + 1), &dofMap2);
1766 		Gradient<4> x = 1000 * g1 + 100 * gi + 10 * gip1 + gim1;
1767 		std::cout << "x(" << i << ")=" << x << std::endl;
1768 
1769 		assert(x.dGetValue() == 1000 + 100 * i + 10 * (i + 1) + i - 1);
1770 
1771 		if (i == 2) {
1772 			assert(x.dGetDerivativeGlobal(1) == 1001.);
1773 		} else {
1774 			assert(x.dGetDerivativeGlobal(1) == 1000.);
1775 			assert(x.dGetDerivativeGlobal(i - 1) == i - 1);
1776 		}
1777 
1778 		assert(x.dGetDerivativeGlobal(i) == 100. * i);
1779 		assert(x.dGetDerivativeGlobal(i + 1) == 10. * (i + 1));
1780 	}
1781 
1782 	v.Reset();
1783 	std::cout << "v=" << v << std::endl;
1784 }
1785 
1786 namespace gradVecAssTest {
1787 const int I1 = 1, I2 = 2, I3 = 3;
1788 
1789 template <typename T>
Euler123ToMatR(const Vector<T,3> & v,Matrix<T,3,3> & R)1790 Matrix<T, 3, 3>& Euler123ToMatR(const Vector<T, 3>& v, Matrix<T, 3, 3>& R) {
1791 	T d = v(1);
1792 	T dCosAlpha(cos(d));
1793 	T dSinAlpha(sin(d));
1794 	d = v(2);
1795 	T dCosBeta(cos(d));
1796 	T dSinBeta(sin(d));
1797 	d = v(3);
1798 	T dCosGamma(cos(d));
1799 	T dSinGamma(sin(d));
1800 
1801 	R(1, 1) = dCosBeta*dCosGamma;
1802 	R(2, 1) = dCosAlpha*dSinGamma + dSinAlpha*dSinBeta*dCosGamma;
1803 	R(3, 1) = dSinAlpha*dSinGamma - dCosAlpha*dSinBeta*dCosGamma;
1804 	R(1, 2) = -dCosBeta*dSinGamma;
1805 	R(2, 2) = dCosAlpha*dCosGamma - dSinAlpha*dSinBeta*dSinGamma;
1806 	R(3, 2) = dSinAlpha*dCosGamma + dCosAlpha*dSinBeta*dSinGamma;
1807 	R(1, 3) = dSinBeta;
1808 	R(2, 3) = -dSinAlpha*dCosBeta;
1809 	R(3, 3) = dCosAlpha*dCosBeta;
1810 
1811 	return R;
1812 }
1813 
1814 #ifdef HAVE_BLITZ
1815 
1816 class Node {
1817 public:
Node(const Vector<doublereal,3> & X_0,const Vector<doublereal,3> & XP_0,const Matrix<doublereal,3,3> & R_0,const Vector<doublereal,3> & W_0)1818     Node(const Vector<doublereal, 3>& X_0,
1819          const Vector<doublereal, 3>& XP_0,
1820          const Matrix<doublereal, 3, 3>& R_0,
1821          const Vector<doublereal, 3>& W_0)
1822         :iFirstDofIndex(-1), R0(R_0), W0(W_0) {
1823 
1824         for (int i = 0; i < 3; ++i) {
1825             X(i + 1).SetValuePreserve(X_0(i + 1));
1826             X(i + 1).DerivativeResizeReset(&dof, i, MapVectorBase::LOCAL, 0.);
1827 
1828             XP(i + 1).SetValuePreserve(XP_0(i + 1));
1829             XP(i + 1).DerivativeResizeReset(&dof, i, MapVectorBase::LOCAL, 0.);
1830             XP(i + 1).SetDerivativeLocal(i, -1.); // derivative will be always -1
1831 
1832             g(i + 1).SetValuePreserve(0.);
1833             g(i + 1).DerivativeResizeReset(&dof, i, MapVectorBase::LOCAL, 0.);
1834 
1835             gP(i + 1).SetValuePreserve(0.);
1836             gP(i + 1).DerivativeResizeReset(&dof, i, MapVectorBase::LOCAL, 0.);
1837             gP(i + 1).SetDerivativeLocal(i, -1.); // derivative will be always -1
1838         }
1839     }
1840 
SetValue(blitz::Array<doublereal,1> & XCurr,blitz::Array<doublereal,1> & XPrimeCurr)1841     void SetValue(blitz::Array<doublereal, 1>& XCurr, blitz::Array<doublereal, 1>& XPrimeCurr) {
1842         assert(iFirstDofIndex != -1);
1843 
1844         for (int i = 0; i < 3; ++i) {
1845             XCurr(iFirstDofIndex + i) = X(i + 1).dGetValue();
1846             XPrimeCurr(iFirstDofIndex + i) = XP(i + 1).dGetValue();
1847             XCurr(iFirstDofIndex + i + 3) = g(i + 1).dGetValue();
1848             XPrimeCurr(iFirstDofIndex + i + 3) = gP(i + 1).dGetValue();
1849         }
1850     }
1851 
Update(const blitz::Array<doublereal,1> & XCurr,const blitz::Array<doublereal,1> & XPrimeCurr,doublereal dCoef)1852     void Update(const blitz::Array<doublereal, 1>& XCurr, const blitz::Array<doublereal, 1>& XPrimeCurr, doublereal dCoef) {
1853         assert(iFirstDofIndex != -1);
1854 
1855         for (int i = 0; i < 3; ++i) {
1856             X(i + 1).SetValuePreserve(XCurr(iFirstDofIndex + i));
1857             X(i + 1).SetDerivativeLocal(i, -dCoef);
1858             XP(i + 1).SetValuePreserve(XPrimeCurr(iFirstDofIndex + i));
1859             g(i + 1).SetValuePreserve(XCurr(iFirstDofIndex + i + 3));
1860             g(i + 1).SetDerivativeLocal(i, -dCoef);
1861             gP(i + 1).SetValuePreserve(XPrimeCurr(iFirstDofIndex + i + 3));
1862         }
1863 
1864         UpdateRotation();
1865     }
1866 
UpdateRotation()1867     void UpdateRotation() {
1868         const Matrix<Gradient<NADVars>, 3, 3> skew_g(MatCrossVec(g));
1869         const Matrix<Gradient<NADVars>, 3, 3> skew_skew_g(MatCrossCrossVec(g));
1870 
1871         const Gradient<NADVars> d = 4. / (4. + Dot(g, g));
1872 #if 0
1873         Matrix<Gradient<NADVars>, 3, 3> RDelta, G;
1874         for (int i = 1; i <= 3; ++i) {
1875             RDelta(i, i).SetValuePreserve(1.);
1876             G(i, i) = d;
1877         }
1878 
1879         for (int i = 1; i <= 3; ++i) {
1880             for (int j = 1; j <= 3; ++j) {
1881                 RDelta(i, j) += d * (skew_g(i, j) + 0.5 * skew_skew_g(i, j));
1882                 G(i, j) += 0.5 * d * skew_g(i, j);
1883             }
1884         }
1885 #else
1886         Matrix<Gradient<NADVars>, 3, 3> RDelta = (skew_g + skew_skew_g * 0.5) * d;
1887         Matrix<Gradient<NADVars>, 3, 3> G = skew_g * Gradient<NADVars>(0.5 * d);
1888 
1889         for (int i = 1; i <= 3; ++i) {
1890             RDelta(i, i) += 1.;
1891             G(i, i) += d;
1892         }
1893 #endif
1894         R = RDelta * R0;
1895         W = G * gP + RDelta * W0;
1896     }
1897 
AfterConvergence(const blitz::Array<doublereal,1> & XCurr,const blitz::Array<doublereal,1> & XPrimeCurr)1898     void AfterConvergence(const blitz::Array<doublereal, 1>& XCurr, const blitz::Array<doublereal, 1>& XPrimeCurr) {
1899         for (int i = 1; i <= 3; ++i) {
1900             W0(i) = W(i).dGetValue();
1901             g(i).SetValuePreserve(0.);
1902             gP(i).SetValuePreserve(0.);
1903 
1904             for (int j = 1; j <= 3; ++j) {
1905                 R0(i, j) = R(i, j).dGetValue();
1906             }
1907         }
1908     }
1909 
SetFirstDofIndex(int iDofIndex)1910     void SetFirstDofIndex(int iDofIndex) {
1911         iFirstDofIndex = iDofIndex;
1912     }
1913 
iGetFirstIndex() const1914     int iGetFirstIndex() const {
1915         return iFirstDofIndex;
1916     }
1917 
iGetNumDof() const1918     int iGetNumDof() const {
1919         return 6;
1920     }
1921 
GetXCurr(Vector<doublereal,3> & XCurr,LocalDofMap *) const1922     void GetXCurr(Vector<doublereal, 3>& XCurr, LocalDofMap*) const {
1923         for (int i = 1; i <= 3; ++i) {
1924             XCurr(i) = X(i).dGetValue();
1925         }
1926     }
1927 
1928     template <index_type N_SIZE>
GetXCurr(Vector<Gradient<N_SIZE>,3> & XCurr,LocalDofMap * pDofMap) const1929     void GetXCurr(Vector<Gradient<N_SIZE>, 3>& XCurr, LocalDofMap* pDofMap) const {
1930         assert(iFirstDofIndex != -1);
1931         assert(pDofMap != 0);
1932 
1933         for (int i = 1; i <= 3; ++i) {
1934             XCurr(i).SetValuePreserve(X(i).dGetValue());
1935             XCurr(i).DerivativeResizeReset(pDofMap, iFirstDofIndex + X(i).iGetStartIndexLocal(), iFirstDofIndex + X(i).iGetEndIndexLocal(), MapVectorBase::GLOBAL, 0.);
1936 
1937             for (index_type j = X(i).iGetStartIndexLocal(); j < X(i).iGetEndIndexLocal(); ++j) {
1938             	XCurr(i).SetDerivativeGlobal(iFirstDofIndex + j, X(i).dGetDerivativeLocal(j));
1939             }
1940         }
1941     }
1942 
GetVCurr(Vector<doublereal,3> & VCurr,LocalDofMap *) const1943     void GetVCurr(Vector<doublereal, 3>& VCurr, LocalDofMap*) const {
1944         for (int i = 1; i <= 3; ++i) {
1945             VCurr(i) = XP(i).dGetValue();
1946         }
1947     }
1948 
1949     template <index_type N_SIZE>
GetVCurr(Vector<Gradient<N_SIZE>,3> & VCurr,LocalDofMap * pDofMap) const1950     void GetVCurr(Vector<Gradient<N_SIZE>, 3>& VCurr, LocalDofMap* pDofMap) const {
1951         assert(iFirstDofIndex != -1);
1952         assert(pDofMap != 0);
1953 
1954         for (int i = 1; i <= 3; ++i) {
1955             VCurr(i).SetValuePreserve(XP(i).dGetValue());
1956             VCurr(i).DerivativeResizeReset(pDofMap, iFirstDofIndex + XP(i).iGetStartIndexLocal(), iFirstDofIndex + XP(i).iGetEndIndexLocal(), MapVectorBase::GLOBAL, 0.);
1957 
1958             for (index_type j = XP(i).iGetStartIndexLocal(); j < XP(i).iGetEndIndexLocal(); ++j) {
1959             	VCurr(i).SetDerivativeGlobal(iFirstDofIndex + j, XP(i).dGetDerivativeLocal(j));
1960             }
1961         }
1962     }
1963 
GetRCurr(Matrix<doublereal,3,3> & RCurr,LocalDofMap *) const1964     void GetRCurr(Matrix<doublereal, 3, 3>& RCurr, LocalDofMap*) const {
1965         for (int i = 1; i <= 3; ++i) {
1966             for (int j = 1; j <= 3; ++j) {
1967                 RCurr(i, j) = R(i, j).dGetValue();
1968             }
1969         }
1970     }
1971 
1972     template <index_type N_SIZE>
GetRCurr(Matrix<Gradient<N_SIZE>,3,3> & RCurr,LocalDofMap * pDofMap) const1973     void GetRCurr(Matrix<Gradient<N_SIZE>, 3, 3>& RCurr, LocalDofMap* pDofMap) const {
1974         assert(iFirstDofIndex != -1);
1975         assert(pDofMap != 0);
1976 
1977         for (int i = 1; i <= 3; ++i) {
1978             for (int j = 1; j <= 3; ++j) {
1979                 RCurr(i, j).SetValuePreserve(R(i, j).dGetValue());
1980                 RCurr(i, j).DerivativeResizeReset(pDofMap, iFirstDofIndex + R(i, j).iGetStartIndexLocal() + 3, iFirstDofIndex + R(i, j).iGetEndIndexLocal() + 3, MapVectorBase::GLOBAL, 0.);
1981 
1982                 for (index_type k = R(i, j).iGetStartIndexLocal(); k < R(i, j).iGetEndIndexLocal(); ++k) {
1983                 	RCurr(i, j).SetDerivativeGlobal(iFirstDofIndex + k + 3, R(i, j).dGetDerivativeLocal(k));
1984                 }
1985             }
1986         }
1987     }
1988 
GetRRef() const1989     const Matrix<doublereal, 3, 3>& GetRRef() const {
1990         return R0;
1991     }
1992 
GetWCurr(Vector<doublereal,3> & WCurr,LocalDofMap *) const1993     void GetWCurr(Vector<doublereal, 3>& WCurr, LocalDofMap*) const {
1994         for (int i = 1; i <= 3; ++i) {
1995             WCurr(i) = W(i).dGetValue();
1996         }
1997     }
1998 
1999     template <index_type N_SIZE>
GetWCurr(Vector<Gradient<N_SIZE>,3> & WCurr,LocalDofMap * pDofMap) const2000     void GetWCurr(Vector<Gradient<N_SIZE>, 3>& WCurr, LocalDofMap* pDofMap) const {
2001         assert(iFirstDofIndex != -1);
2002         assert(pDofMap != 0);
2003 
2004         for (int i = 1; i <= 3; ++i) {
2005             WCurr(i).SetValuePreserve(W(i).dGetValue());
2006             WCurr(i).DerivativeResizeReset(pDofMap, iFirstDofIndex + W(i).iGetStartIndexLocal() + 3, iFirstDofIndex + W(i).iGetEndIndexLocal() + 3, MapVectorBase::GLOBAL, 0.);
2007 
2008             for (index_type j = W(i).iGetStartIndexLocal(); j < W(i).iGetEndIndexLocal(); ++j) {
2009             	WCurr(i).SetDerivativeGlobal(iFirstDofIndex + j + 3, W(i).dGetDerivativeLocal(j));
2010             }
2011         }
2012     }
2013 
GetWRef() const2014     const Vector<doublereal, 3>& GetWRef() const {
2015         return W0;
2016     }
2017 
2018 private:
2019     int iFirstDofIndex;
2020     Matrix<doublereal, 3, 3> R0;
2021     Vector<doublereal, 3> W0;
2022     static const int NADVars = 3;
2023     Vector<Gradient<NADVars>, 3> X, XP, g, gP, W;
2024     Matrix<Gradient<NADVars>, 3, 3> R;
2025     LocalDofMap dof;
2026 };
2027 
2028 template <typename T>
2029 struct ResItem {
2030     int iEquIndex;
2031     T dCoef;
2032 
ResItemgradVecAssTest::ResItem2033     ResItem(int iEquIndex_=-1, T dCoef_=T(0.))
2034         :iEquIndex(iEquIndex_), dCoef(dCoef_) {
2035     }
2036 };
2037 
2038 class FullSubMatrixHandler {
2039 public:
FullSubMatrixHandler(index_type iNumRows=0,index_type iNumCols=0)2040     FullSubMatrixHandler(index_type iNumRows=0, index_type iNumCols=0) {
2041         ResizeReset(iNumRows, iNumCols);
2042     }
2043 
ResizeReset(index_type iNumRows,index_type iNumCols)2044     void ResizeReset(index_type iNumRows, index_type iNumCols) {
2045         oWorkMat.resize(iNumRows, iNumCols);
2046         oRowIndex.resize(iNumRows);
2047         oColIndex.resize(iNumCols);
2048         oWorkMat.initialize(0.);
2049         oRowIndex.initialize(-1);
2050         oColIndex.initialize(-1);
2051     }
2052 
PutRowIndex(int iSubRow,int iRow)2053     void PutRowIndex(int iSubRow, int iRow) {
2054         assert(iSubRow < oRowIndex.rows());
2055         oRowIndex(iSubRow) = iRow;
2056     }
2057 
PutColIndex(int iSubCol,int iCol)2058     void PutColIndex(int iSubCol, int iCol) {
2059         assert(iSubCol < oColIndex.rows());
2060         oColIndex(iSubCol) = iCol;
2061     }
2062 
PutCoef(int iSubRow,int iSubCol,doublereal dCoef)2063     void PutCoef(int iSubRow, int iSubCol, doublereal dCoef) {
2064         assert(iSubRow >= 0 && iSubRow < oWorkMat.rows());
2065         assert(iSubCol >= 0 && iSubCol < oWorkMat.cols());
2066 
2067         oWorkMat(iSubRow, iSubCol) = dCoef;
2068     }
2069 
IncCoef(int iSubRow,int iSubCol,doublereal dCoef)2070     void IncCoef(int iSubRow, int iSubCol, doublereal dCoef) {
2071         assert(iSubRow >= 0 && iSubRow < oWorkMat.rows());
2072         assert(iSubCol >= 0 && iSubCol < oWorkMat.cols());
2073 
2074         oWorkMat(iSubRow, iSubCol) += dCoef;
2075     }
2076 
AddTo(blitz::Array<doublereal,2> & JacMat) const2077     void AddTo(blitz::Array<doublereal, 2>& JacMat) const {
2078         for (int i = 0; i < oWorkMat.rows(); ++i) {
2079             for (int j = 0; j < oWorkMat.cols(); ++j) {
2080                 assert(oRowIndex(i) >= 0);
2081                 assert(oRowIndex(i) < JacMat.rows());
2082                 assert(oColIndex(j) >= 0);
2083                 assert(oColIndex(j) < JacMat.cols());
2084                 JacMat(oRowIndex(i), oColIndex(j)) += oWorkMat(i, j);
2085             }
2086         }
2087     }
2088 
2089 private:
2090     blitz::Array<doublereal, 2> oWorkMat;
2091     blitz::Array<int, 1> oRowIndex, oColIndex;
2092 };
2093 
2094 class SparseSubMatrixHandler {
2095 public:
2096     struct JacItem {
2097         int iEquIndex;
2098         int iDofIndex;
2099         doublereal dCoef;
2100 
JacItemgradVecAssTest::SparseSubMatrixHandler::JacItem2101         JacItem(int iEquIndex=-1, int iDofIndex=-1, doublereal dCoef=0.)
2102             :iEquIndex(iEquIndex), iDofIndex(iDofIndex), dCoef(dCoef) {
2103         }
2104     };
2105 
2106     typedef std::vector<JacItem> VectorType;
2107     typedef VectorType::const_iterator const_iterator;
2108 
SparseSubMatrixHandler(int iNumItems=0)2109     SparseSubMatrixHandler(int iNumItems=0) {
2110         if (iNumItems > 0) {
2111             WorkMat.reserve(iNumItems);
2112         }
2113     }
2114 
2115     template <index_type N_SIZE, typename T>
AssJac(T * pElem,LocalDofMap * pDofMap,blitz::Array<ResItem<Gradient<N_SIZE>>,1> & WorkVec,doublereal dCoef)2116     SparseSubMatrixHandler& AssJac(T* pElem, LocalDofMap* pDofMap, blitz::Array<ResItem<Gradient<N_SIZE> >, 1>& WorkVec, doublereal dCoef) {
2117         pElem->AssRes(WorkVec, dCoef, pDofMap);
2118         ResizeReset(0);
2119 
2120         for (int i = 0; i < WorkVec.rows(); ++i) {
2121             const ResItem<Gradient<N_SIZE> >& resItem = WorkVec(i);
2122 
2123             for (index_type j = resItem.dCoef.iGetStartIndexLocal(); j < resItem.dCoef.iGetEndIndexLocal(); ++j) {
2124                 index_type iDofIndex = resItem.dCoef.iGetGlobalDof(j);
2125                 InsertItem(JacItem(resItem.iEquIndex, iDofIndex, resItem.dCoef.dGetDerivativeLocal(j)));
2126             }
2127         }
2128 
2129         return *this;
2130     }
2131 
ResizeReset(int iNumItems)2132     void ResizeReset(int iNumItems) {
2133         WorkMat.resize(iNumItems);
2134     }
2135 
iGetSize() const2136     int iGetSize() const { return WorkMat.size(); }
2137 
InsertItem(const JacItem & item)2138     void InsertItem(const JacItem& item) {
2139         WorkMat.push_back(item);
2140     }
2141 
AddTo(blitz::Array<doublereal,2> & JacMat) const2142     void AddTo(blitz::Array<doublereal, 2>& JacMat) const {
2143         for (const_iterator j = begin(); j != end(); ++j) {
2144             JacMat(j->iEquIndex, j->iDofIndex) += j->dCoef;
2145         }
2146     }
begin() const2147     const_iterator begin() const { return WorkMat.begin(); }
end() const2148     const_iterator end() const { return WorkMat.end(); }
2149 
2150 private:
2151     VectorType WorkMat;
2152 };
2153 
2154 class Element {
2155 public:
2156     virtual blitz::Array<ResItem<doublereal>, 1>& AssRes(blitz::Array<ResItem<doublereal>, 1>& WorkVec, doublereal dCoef)=0;
2157     virtual SparseSubMatrixHandler& AssJac(SparseSubMatrixHandler& WorkMat, doublereal dCoef)=0;
2158     virtual FullSubMatrixHandler& AssJac(FullSubMatrixHandler& WorkMat, doublereal dCoef)=0;
2159     virtual index_type iGetNumRows() const=0;
2160     virtual index_type iGetNumCols() const=0;
~Element()2161     virtual ~Element(){ }
2162 };
2163 
2164 class Element1: public Element {
2165 private:
2166     Node* node1;
2167     Node* node2;
2168     Vector<doublereal, 3> o1, o2;
2169     Matrix<doublereal, 3, 3> S, D;
2170     static const int NADVars = 12;
2171     LocalDofMap dofMap;
2172 
2173 public:
Element1(Node * node1_,const Vector<doublereal,3> & o1_,Node * node2_,const Vector<doublereal,3> & o2_,doublereal s,doublereal d)2174     Element1(Node* node1_,
2175              const Vector<doublereal, 3>& o1_,
2176              Node* node2_,
2177              const Vector<doublereal, 3>& o2_,
2178              doublereal s,
2179              doublereal d)
2180         :node1(node1_),
2181          node2(node2_),
2182          o1(o1_),
2183          o2(o2_),
2184          dofMap(iGetNumCols()) {
2185 
2186         /*
2187             S=[ s,  -s,    0;
2188                -s, 2*s,   -s;
2189                 0,  -s,  2*s];
2190         */
2191 
2192         S(1, 1) = s;
2193         S(2, 1) = -s;
2194         S(1, 2) = -s;
2195         S(2, 2) = 2*s;
2196         S(3, 2) = -s;
2197         S(2, 3) = -s;
2198         S(3, 3) = 2*s;
2199 
2200         /*
2201             D=[ d,     -d,      0;
2202                -d,  2 * d,     -d;
2203                 0,     -d,  2 * d];
2204         */
2205 
2206         D(1, 1) = d;
2207         D(2, 1) = -d;
2208         D(1, 2) = -d;
2209         D(2, 2) = 2*d;
2210         D(3, 2) = -d;
2211         D(2, 3) = -d;
2212         D(3, 3) = 2*d;
2213     }
2214 
2215     template <typename T>
AssRes(blitz::Array<ResItem<T>,1> & WorkVec,doublereal dCoef,LocalDofMap * pDofMap)2216     blitz::Array<ResItem<T>, 1>& AssRes(blitz::Array<ResItem<T>, 1>& WorkVec, doublereal dCoef, LocalDofMap *pDofMap) {
2217         WorkVec.resize(iGetNumRows());
2218         typedef Vector<T, 3> Vec3;
2219         typedef Matrix<T, 3, 3> Mat3x3;
2220         Vec3 X1, X2, V1, V2, W1, W2;
2221         Mat3x3 R1, R2;
2222 
2223         node1->GetXCurr(X1, pDofMap);
2224         node1->GetVCurr(V1, pDofMap);
2225         node1->GetRCurr(R1, pDofMap);
2226         node1->GetWCurr(W1, pDofMap);
2227         node2->GetXCurr(X2, pDofMap);
2228         node2->GetVCurr(V2, pDofMap);
2229         node2->GetRCurr(R2, pDofMap);
2230         node2->GetWCurr(W2, pDofMap);
2231 
2232         const Vec3 R1o1 = R1 * o1;
2233         const Vec3 R2o2 = R2 * o2;
2234         const Vec3 dX = Transpose(R1) * Vec3(X1 + R1o1 - X2 - R2o2);
2235         const Vec3 dV = Transpose(R1) * Vec3(V1 + Cross(W1, R1o1) - V2 - Cross(W2, R2o2));
2236         const Vec3 F1 = R1 * Vec3(-S * dX - D * dV);
2237         const Vec3 M1 = Cross(R1o1, F1), M2 = Cross(R2o2, -F1);
2238 
2239         for (int i = 0; i < 6; ++i) {
2240             WorkVec(i).iEquIndex = node1->iGetFirstIndex() + i;
2241             WorkVec(i + 6).iEquIndex = node2->iGetFirstIndex() + i;
2242         }
2243 
2244         for (int i = 0; i < 3; ++i) {
2245             WorkVec(i).dCoef = F1(i + 1);
2246             WorkVec(i + 3).dCoef = M1(i + 1);
2247             WorkVec(i + 6).dCoef = -F1(i + 1);
2248             WorkVec(i + 9).dCoef = M2(i + 1);
2249         }
2250 
2251         return WorkVec;
2252     }
2253 
AssRes(blitz::Array<ResItem<doublereal>,1> & WorkVec,doublereal dCoef)2254     virtual blitz::Array<ResItem<doublereal>, 1>& AssRes(blitz::Array<ResItem<doublereal>, 1>& WorkVec, doublereal dCoef) {
2255         return AssRes(WorkVec, dCoef, 0);
2256     }
2257 
AssJac(SparseSubMatrixHandler & WorkMat,doublereal dCoef)2258     virtual SparseSubMatrixHandler& AssJac(SparseSubMatrixHandler& WorkMat, doublereal dCoef) {
2259         blitz::Array<ResItem<Gradient<NADVars> >, 1> WorkVec;
2260         return WorkMat.AssJac(this, &dofMap, WorkVec, dCoef);
2261     }
2262 
AssJac(FullSubMatrixHandler & WorkMat,doublereal dCoef)2263     virtual FullSubMatrixHandler& AssJac(FullSubMatrixHandler& WorkMat, doublereal dCoef) {
2264         WorkMat.ResizeReset(iGetNumRows(), iGetNumCols());
2265         typedef Matrix<doublereal, 3, 3> Mat3x3;
2266         typedef Vector<doublereal, 3> Vec3;
2267 
2268         for (int i = 0; i < 6; ++i) {
2269             WorkMat.PutColIndex(i, node1->iGetFirstIndex() + i);
2270             WorkMat.PutColIndex(i + 6, node2->iGetFirstIndex() + i);
2271             WorkMat.PutRowIndex(i, node1->iGetFirstIndex() + i);
2272             WorkMat.PutRowIndex(i + 6, node2->iGetFirstIndex() + i);
2273         }
2274 
2275         const Vec3& W1_0 = node1->GetWRef();
2276         const Vec3& W2_0 = node2->GetWRef();
2277         const Mat3x3& R1_0 = node1->GetRRef();
2278         const Mat3x3& R2_0 = node2->GetRRef();
2279 
2280         Vec3 X1, X2, V1, V2, W1, W2;
2281         Mat3x3 R1, R2;
2282 
2283         node1->GetXCurr(X1, 0);
2284         node1->GetVCurr(V1, 0);
2285         node1->GetRCurr(R1, 0);
2286         node1->GetWCurr(W1, 0);
2287 
2288         node2->GetXCurr(X2, 0);
2289         node2->GetVCurr(V2, 0);
2290         node2->GetRCurr(R2, 0);
2291         node2->GetWCurr(W2, 0);
2292 
2293 #ifdef ASS_JAC_USE_TEMP_EXPR
2294         const Mat3x3 skew_W2_0(MatCrossVec(W2_0));
2295         const Vec3 R1o1 = Vec3(R1 * o1);
2296         const Mat3x3 skew_R1o1(MatCrossVec(R1o1));
2297         const Vec3 R1_0o1 = Vec3(R1_0 * o1);
2298         const Mat3x3 skew_R1_0o1(MatCrossVec(R1_0o1));
2299         const Vec3 R2o2 = Vec3(R2 * o2);
2300         const Mat3x3 skew_R2o2(MatCrossVec(R2o2));
2301         const Vec3 R2_0o2 = Vec3(R2_0 * o2);
2302         const Mat3x3 skew_R2_0o2(MatCrossVec(R2_0o2));
2303         const Vec3 dX = Vec3(Mat3x3(Transpose(R1)) * Vec3(Vec3(Vec3(X1 + R1o1) - X2) - R2o2));
2304         const Vec3 dV = Vec3(Mat3x3(Transpose(R1)) * Vec3(Vec3(Vec3(V1 + Vec3(Cross(W1, R1o1))) - V2) - Vec3(Cross(W2, R2o2))));
2305         const Vec3 F1_R1 = Vec3(-Vec3(Vec3(S * dX) + Vec3(D * dV)));
2306         const Vec3 F1 = Vec3(R1 * F1_R1);
2307         const Vec3 F2 = Vec3(-F1);
2308 
2309         const Mat3x3 dF1_dX1 = Mat3x3(-Mat3x3(R1 * Mat3x3(S * Transpose(R1))));
2310 
2311         const Mat3x3 ddX_dg1 = Mat3x3(Mat3x3(Mat3x3(Transpose(R1_0)) * Mat3x3(MatCrossVec(Vec3(Vec3(Vec3(X1 + R1o1) - X2) - R2o2)))) - Mat3x3(Mat3x3(Transpose(R1)) * skew_R1_0o1));
2312         const Mat3x3 ddV_dg1 = Mat3x3(Mat3x3(Transpose(R1_0)) * Mat3x3(MatCrossVec(Vec3(Vec3(Vec3(V1 + Vec3(Cross(W1, R1o1))) - V2) - Vec3(Cross(W2, R2o2))))))
2313                                                + Mat3x3(Mat3x3(Transpose(R1)) * Mat3x3(Mat3x3(skew_R1o1 * Mat3x3(MatCrossVec(W1_0))) - Mat3x3(Mat3x3(MatCrossVec(W1)) * skew_R1_0o1)));
2314         const Mat3x3 dF1_dg1 = Mat3x3(Mat3x3(MatCrossVec(Vec3(R1_0 * Vec3(-F1_R1)))) - Mat3x3(R1 * Mat3x3(Mat3x3(S * ddX_dg1) + Mat3x3(D * ddV_dg1))));
2315 
2316         const Mat3x3 dF1_dX2 = Mat3x3(R1 * Mat3x3(S * Mat3x3(Transpose(R1))));
2317         const Mat3x3 ddX_dg2 = Mat3x3(Mat3x3(Transpose(R1)) * skew_R2_0o2);
2318         const Mat3x3 ddV_dg2 = Mat3x3(Mat3x3(Transpose(R1)) * Mat3x3(Mat3x3(skew_R2o2 * Mat3x3(-skew_W2_0)) + Mat3x3(skew_W2_0 * skew_R2_0o2)));
2319         const Mat3x3 dF1_dg2 = Mat3x3(Mat3x3(-R1) * Mat3x3(Mat3x3(S * ddX_dg2) + Mat3x3(D * ddV_dg2)));
2320 
2321         const Mat3x3 dF2_dX1 = Mat3x3(-dF1_dX1);
2322         const Mat3x3 dF2_dg1 = Mat3x3(-dF1_dg1);
2323         const Mat3x3 dF2_dX2 = Mat3x3(-dF1_dX2);
2324         const Mat3x3 dF2_dg2 = Mat3x3(-dF1_dg2);
2325 
2326         const Mat3x3 dM1_dX1 = Mat3x3(skew_R1o1 * dF1_dX1);
2327         const Mat3x3 dM1_dg1 = Mat3x3(Mat3x3(Mat3x3(MatCrossVec(F1)) * skew_R1_0o1) + Mat3x3(skew_R1o1 * dF1_dg1));
2328         const Mat3x3 dM1_dX2 = Mat3x3(skew_R1o1 * dF1_dX2);
2329         const Mat3x3 dM1_dg2 = Mat3x3(skew_R1o1 * dF1_dg2);
2330 
2331         const Mat3x3 dM2_dX1 = Mat3x3(skew_R2o2 * dF2_dX1);
2332         const Mat3x3 dM2_dg1 = Mat3x3(skew_R2o2 * dF2_dg1);
2333         const Mat3x3 dM2_dX2 = Mat3x3(skew_R2o2 * dF2_dX2);
2334         const Mat3x3 dM2_dg2 = Mat3x3(Mat3x3(Mat3x3(MatCrossVec(F2)) * skew_R2_0o2) + Mat3x3(skew_R2o2 * dF2_dg2));
2335 
2336         const Mat3x3 dF1_dV1 = Mat3x3(R1 * Mat3x3(Mat3x3(-D) * Mat3x3(Transpose(R1))));
2337         const Mat3x3 ddV_dgP1 = Mat3x3(Mat3x3(-Mat3x3(Transpose(R1))) * skew_R1o1);
2338         const Mat3x3 dF1_dgP1 = Mat3x3(R1 * Mat3x3(Mat3x3(-D) * ddV_dgP1));
2339         const Mat3x3 dF1_dV2 = Mat3x3(R1 * Mat3x3(D * Mat3x3(Transpose(R1))));
2340         const Mat3x3 ddV_dgP2 = Mat3x3(Mat3x3(Transpose(R1)) * skew_R2o2);
2341         const Mat3x3 dF1_dgP2 = Mat3x3(R1 * Mat3x3(Mat3x3(-D) * ddV_dgP2));
2342 
2343         const Mat3x3 dM1_dV1 = Mat3x3(skew_R1o1 * dF1_dV1);
2344         const Mat3x3 dM1_dgP1 = Mat3x3(skew_R1o1 * dF1_dgP1);
2345         const Mat3x3 dM1_dV2 = Mat3x3(skew_R1o1 * dF1_dV2);
2346         const Mat3x3 dM1_dgP2 = Mat3x3(skew_R1o1 * dF1_dgP2);
2347 
2348         const Mat3x3 dF2_dV1 = Mat3x3(-dF1_dV1);
2349         const Mat3x3 dF2_dgP1 = Mat3x3(-dF1_dgP1);
2350         const Mat3x3 dF2_dV2 = Mat3x3(-dF1_dV2);
2351         const Mat3x3 dF2_dgP2 = Mat3x3(-dF1_dgP2);
2352 
2353         const Mat3x3 dM2_dV1 = Mat3x3(skew_R2o2 * dF2_dV1);
2354         const Mat3x3 dM2_dgP1 = Mat3x3(skew_R2o2 * dF2_dgP1);
2355         const Mat3x3 dM2_dV2 = Mat3x3(skew_R2o2 * dF2_dV2);
2356         const Mat3x3 dM2_dgP2 = Mat3x3(skew_R2o2 * dF2_dgP2);
2357 #else
2358         const Mat3x3 skew_W2_0(MatCrossVec(W2_0));
2359         const Vec3 R1o1 = R1 * o1;
2360         const Mat3x3 skew_R1o1(MatCrossVec(R1o1));
2361         const Vec3 R1_0o1 = R1_0 * o1;
2362         const Mat3x3 skew_R1_0o1(MatCrossVec(R1_0o1));
2363         const Vec3 R2o2 = R2 * o2;
2364         const Mat3x3 skew_R2o2(MatCrossVec(R2o2));
2365         const Vec3 R2_0o2 = R2_0 * o2;
2366         const Mat3x3 skew_R2_0o2(MatCrossVec(R2_0o2));
2367         const Vec3 dX = Transpose(R1) * Vec3(X1 + R1o1 - X2 - R2o2);
2368         const Vec3 dV = Transpose(R1) * Vec3(V1 + Cross(W1, R1o1) - V2 - Cross(W2, R2o2));
2369         const Vec3 F1_R1 = -(S * dX + D * dV);
2370         const Vec3 F1 = R1 * F1_R1;
2371         const Vec3 F2 = -F1;
2372 
2373         const Mat3x3 dF1_dX1 = -R1 * Mat3x3(S * Transpose(R1));
2374 
2375         const Mat3x3 ddX_dg1 = Transpose(R1_0) * Mat3x3(MatCrossVec(X1 + R1o1 - X2 - R2o2)) - Transpose(R1) * skew_R1_0o1;
2376         const Mat3x3 ddV_dg1 = Transpose(R1_0) * Mat3x3(MatCrossVec(V1 + Cross(W1, R1o1) - V2 - Cross(W2, R2o2)))
2377                                                + Transpose(R1) * Mat3x3(skew_R1o1 * Mat3x3(MatCrossVec(W1_0)) - Mat3x3(MatCrossVec(W1)) * skew_R1_0o1);
2378         const Mat3x3 dF1_dg1 = Mat3x3(MatCrossVec(R1_0 * (-F1_R1))) - R1 * Mat3x3(S * ddX_dg1 + D * ddV_dg1);
2379 
2380         const Mat3x3 dF1_dX2 = R1 * Mat3x3(S * Transpose(R1));
2381         const Mat3x3 ddX_dg2 = Transpose(R1) * skew_R2_0o2;
2382         const Mat3x3 ddV_dg2 = Transpose(R1) * Mat3x3(skew_R2o2 * (-skew_W2_0) + skew_W2_0 * skew_R2_0o2);
2383         const Mat3x3 dF1_dg2 = -R1 * Mat3x3(S * ddX_dg2 + D * ddV_dg2);
2384 
2385         const Mat3x3 dF2_dX1 = -dF1_dX1;
2386         const Mat3x3 dF2_dg1 = -dF1_dg1;
2387         const Mat3x3 dF2_dX2 = -dF1_dX2;
2388         const Mat3x3 dF2_dg2 = -dF1_dg2;
2389 
2390         const Mat3x3 dM1_dX1 = skew_R1o1 * dF1_dX1;
2391         const Mat3x3 dM1_dg1 = Mat3x3(MatCrossVec(F1)) * skew_R1_0o1 + skew_R1o1 * dF1_dg1;
2392         const Mat3x3 dM1_dX2 = skew_R1o1 * dF1_dX2;
2393         const Mat3x3 dM1_dg2 = skew_R1o1 * dF1_dg2;
2394 
2395         const Mat3x3 dM2_dX1 = skew_R2o2 * dF2_dX1;
2396         const Mat3x3 dM2_dg1 = skew_R2o2 * dF2_dg1;
2397         const Mat3x3 dM2_dX2 = skew_R2o2 * dF2_dX2;
2398         const Mat3x3 dM2_dg2 = Mat3x3(MatCrossVec(F2)) * skew_R2_0o2 + skew_R2o2 * dF2_dg2;
2399 
2400         const Mat3x3 dF1_dV1 = R1 * Mat3x3((-D) * Transpose(R1));
2401         const Mat3x3 ddV_dgP1 = -Transpose(R1) * skew_R1o1;
2402         const Mat3x3 dF1_dgP1 = R1 * Mat3x3((-D) * ddV_dgP1);
2403         const Mat3x3 dF1_dV2 = R1 * Mat3x3(D * Transpose(R1));
2404         const Mat3x3 ddV_dgP2 = Transpose(R1) * skew_R2o2;
2405         const Mat3x3 dF1_dgP2 = R1 * Mat3x3((-D) * ddV_dgP2);
2406 
2407         const Mat3x3 dM1_dV1 = skew_R1o1 * dF1_dV1;
2408         const Mat3x3 dM1_dgP1 = skew_R1o1 * dF1_dgP1;
2409         const Mat3x3 dM1_dV2 = skew_R1o1 * dF1_dV2;
2410         const Mat3x3 dM1_dgP2 = skew_R1o1 * dF1_dgP2;
2411 
2412         const Mat3x3 dF2_dV1 = -dF1_dV1;
2413         const Mat3x3 dF2_dgP1 = -dF1_dgP1;
2414         const Mat3x3 dF2_dV2 = -dF1_dV2;
2415         const Mat3x3 dF2_dgP2 = -dF1_dgP2;
2416 
2417         const Mat3x3 dM2_dV1 = skew_R2o2 * dF2_dV1;
2418         const Mat3x3 dM2_dgP1 = skew_R2o2 * dF2_dgP1;
2419         const Mat3x3 dM2_dV2 = skew_R2o2 * dF2_dV2;
2420         const Mat3x3 dM2_dgP2 = skew_R2o2 * dF2_dgP2;
2421 #endif
2422 
2423         for (int i = 0; i < 3; ++i) {
2424             for (int j = 0; j < 3; ++j) {
2425                 WorkMat.PutCoef(i, j,     -dF1_dV1(i + 1, j + 1)  - dCoef * dF1_dX1(i + 1, j + 1));
2426                 WorkMat.PutCoef(i, j + 3, -dF1_dgP1(i + 1, j + 1) - dCoef * dF1_dg1(i + 1, j + 1));
2427                 WorkMat.PutCoef(i, j + 6, -dF1_dV2(i + 1, j + 1)  - dCoef * dF1_dX2(i + 1, j + 1));
2428                 WorkMat.PutCoef(i, j + 9, -dF1_dgP2(i + 1, j + 1) - dCoef * dF1_dg2(i + 1, j + 1));
2429 
2430                 WorkMat.PutCoef(i + 3, j,     -dM1_dV1(i + 1, j + 1)  - dCoef * dM1_dX1(i + 1, j + 1));
2431                 WorkMat.PutCoef(i + 3, j + 3, -dM1_dgP1(i + 1, j + 1) - dCoef * dM1_dg1(i + 1, j + 1));
2432                 WorkMat.PutCoef(i + 3, j + 6, -dM1_dV2(i + 1, j + 1)  - dCoef * dM1_dX2(i + 1, j + 1));
2433                 WorkMat.PutCoef(i + 3, j + 9, -dM1_dgP2(i + 1, j + 1) - dCoef * dM1_dg2(i + 1, j + 1));
2434 
2435                 WorkMat.PutCoef(i + 6, j,     -dF2_dV1(i + 1, j + 1)  - dCoef * dF2_dX1(i + 1, j + 1));
2436                 WorkMat.PutCoef(i + 6, j + 3, -dF2_dgP1(i + 1, j + 1) - dCoef * dF2_dg1(i + 1, j + 1));
2437                 WorkMat.PutCoef(i + 6, j + 6, -dF2_dV2(i + 1, j + 1)  - dCoef * dF2_dX2(i + 1, j + 1));
2438                 WorkMat.PutCoef(i + 6, j + 9, -dF2_dgP2(i + 1, j + 1)  - dCoef * dF2_dg2(i + 1, j + 1));
2439 
2440                 WorkMat.PutCoef(i + 9, j,     -dM2_dV1(i + 1, j + 1)  - dCoef * dM2_dX1(i + 1, j + 1));
2441                 WorkMat.PutCoef(i + 9, j + 3, -dM2_dgP1(i + 1, j + 1) - dCoef * dM2_dg1(i + 1, j + 1));
2442                 WorkMat.PutCoef(i + 9, j + 6, -dM2_dV2(i + 1, j + 1)  - dCoef * dM2_dX2(i + 1, j + 1));
2443                 WorkMat.PutCoef(i + 9, j + 9, -dM2_dgP2(i + 1, j + 1) - dCoef * dM2_dg2(i + 1, j + 1));
2444             }
2445         }
2446 
2447         return WorkMat;
2448     }
2449 
iGetNumRows() const2450     index_type iGetNumRows() const { return 12; }
iGetNumCols() const2451     index_type iGetNumCols() const { return NADVars; }
2452 };
2453 
testAssembly()2454 void testAssembly() {
2455     doublereal tckRes = 0;
2456     doublereal tckJacAD = 0;
2457     doublereal tckJac = 0;
2458     doublereal tckStart;
2459     long iIterCnt = 0;
2460     tic(tckStart);
2461 
2462     for (int loop = 0; loop < NLoopsAss; ++loop) {
2463         const int iNumNodes = 3;
2464 
2465         Node* nodes[iNumNodes] = { 0 };
2466 
2467         for (int i = 0; i < iNumNodes; ++i) {
2468             Vector<doublereal, 3> X0, XP0, Phi0, W0;
2469 
2470             for (int j = 0; j < 3; ++j) {
2471                 X0(j + 1) = ((i + 1) * 10 + j + 1);
2472                 XP0(j + 1) = ((i + 1) * 1000 + (j + 1) * 100);
2473                 Phi0(j + 1) = ((i + 1) * 0.1 + (j + 1) * 0.01);
2474                 W0(j + 1) = ((i + 1) * 0.1 + (j + 1) * 0.01);
2475             }
2476 
2477             Matrix<doublereal, 3, 3> R0;
2478 
2479             Euler123ToMatR(Phi0, R0);
2480 
2481             nodes[i] = new Node(X0, XP0, R0, W0);
2482         }
2483 
2484         int iNumDof = 0;
2485 
2486         for (int i = 0; i < iNumNodes; ++i) {
2487             nodes[i]->SetFirstDofIndex(iNumDof);
2488             iNumDof += nodes[i]->iGetNumDof();
2489         }
2490 
2491         const int iNumElem = iNumNodes - 1;
2492 
2493         Element* elements[iNumElem] = {0};
2494 
2495         for (int i = 0; i < iNumElem; ++i) {
2496             Vector<doublereal, 3> o1, o2;
2497 
2498             o1(1) = 1.;
2499             o1(2) = 2.;
2500             o1(3) = 3.;
2501             o2(1) = 4.;
2502             o2(2) = 5.;
2503             o2(3) = 6.;
2504 
2505             doublereal s = 100 * (i + 1);
2506             doublereal d = 10 * (i + 1);
2507             elements[i] = new Element1(nodes[i], o1, nodes[i + 1], o2, s, d);
2508         }
2509 
2510         blitz::Array<doublereal, 1> XCurr(iNumDof), XPrimeCurr(iNumDof);
2511 
2512         XCurr.initialize(0.);
2513         XPrimeCurr.initialize(0.);
2514 
2515         for (int i = 0; i < iNumNodes; ++i) {
2516             nodes[i]->SetValue(XCurr, XPrimeCurr);
2517         }
2518 
2519         blitz::Array<ResItem<doublereal>, 1> WorkVec;
2520         SparseSubMatrixHandler WorkMatSp;
2521         FullSubMatrixHandler WorkMatFull;
2522         blitz::Array<doublereal, 1> ResVec;
2523         blitz::Array<doublereal, 2> JacMatAD, JacMat;
2524 
2525         ResVec.resize(iNumDof);
2526         JacMatAD.resize(iNumDof, iNumDof);
2527         JacMat.resize(iNumDof, iNumDof);
2528 
2529         ResVec.initialize(0.);
2530         JacMatAD.initialize(0.);
2531         JacMat.initialize(0.);
2532 
2533         const doublereal dCoef = 0.001;
2534 
2535         for (int iTime = 0; iTime < 100; ++iTime) {
2536         	iIterCnt++;
2537 
2538             for (int i = 0; i < iNumNodes; ++i) {
2539                 nodes[i]->Update(XCurr, XPrimeCurr, dCoef);
2540             }
2541 
2542             for (int i = 0; i < iNumElem; ++i) {
2543                 tic();
2544 
2545                 elements[i]->AssRes(WorkVec, dCoef);
2546 
2547                 tckRes += toc();
2548 
2549                 for (int j = 0; j < WorkVec.rows(); ++j) {
2550                     ResVec(WorkVec(j).iEquIndex) += WorkVec(j).dCoef;
2551                 }
2552 
2553                 tic();
2554 
2555                 elements[i]->AssJac(WorkMatSp, dCoef);
2556 
2557                 tckJacAD += toc();
2558 
2559                 WorkMatSp.AddTo(JacMatAD);
2560 
2561                 tic();
2562                 elements[i]->AssJac(WorkMatFull, dCoef);
2563                 tckJac += toc();
2564 
2565                 WorkMatFull.AddTo(JacMat);
2566             }
2567 
2568             for (int i = 0; i < JacMat.rows(); ++i) {
2569                 for (int j = 0; j < JacMat.cols(); ++j) {
2570                     const doublereal dTol = sqrt(std::numeric_limits<scalar_deriv_type>::epsilon()) * std::max(1., std::max(std::abs(JacMat(i, j)), std::abs(JacMatAD(i, j))));
2571                     if(std::abs(JacMat(i, j) - JacMatAD(i, j)) > dTol) {
2572                     	throw std::runtime_error("testAssembly(): incorrect result");
2573                     }
2574                 }
2575             }
2576         }
2577 
2578         if (loop == 0) {
2579             std::cout << "ResVec = [" << std::endl;
2580 
2581             for (int i = 0; i < iNumDof; ++i) {
2582                 std::cout << std::setw(10) << ResVec(i) << std::endl;
2583             }
2584 
2585             std::cout << "]" << std::endl;
2586 
2587             std::cout << "JacMatAD = [" << std::endl;
2588 
2589             for (int i = 0; i < iNumDof; ++i) {
2590                 for (int j = 0; j < iNumDof; ++j) {
2591                     std::cout << std::setw(10) << std::setprecision(16) << JacMatAD(i, j) << " ";
2592                 }
2593                 std::cout << std::endl;
2594             }
2595 
2596             std::cout << "]" << std::endl;
2597 
2598             std::cout << "JacMat = [" << std::endl;
2599 
2600             for (int i = 0; i < iNumDof; ++i) {
2601                 for (int j = 0; j < iNumDof; ++j) {
2602                     std::cout << std::setw(10) << std::setprecision(16) << JacMat(i, j) << " ";
2603                 }
2604                 std::cout << std::endl;
2605             }
2606 
2607             std::cout << "]" << std::endl;
2608         }
2609 
2610         for (int i = 0; i < iNumElem; ++i) {
2611             delete elements[i];
2612         }
2613 
2614         for (int i = 0; i < iNumNodes; ++i) {
2615             delete nodes[i];
2616         }
2617     }
2618 
2619     doublereal tckEnd;
2620     tic(tckEnd);
2621     std::cerr << "number of iterations:" << iIterCnt << std::endl;
2622     std::cerr << "AssRes:" << tckRes / iIterCnt << std::endl;
2623     std::cerr << "AssJacAD:" << tckJacAD / iIterCnt << std::endl;
2624     std::cerr << "AssJac:" << tckJac / iIterCnt << std::endl;
2625     std::cerr << "overhead:" << tckJacAD / tckJac << std::endl;
2626     std::cerr << "total time:" << (tckEnd - tckStart) / iIterCnt << std::endl;
2627 }
2628 
2629 #endif
2630 }
2631 
testMatVec3()2632 void testMatVec3() {
2633 	srand(0);
2634 
2635 	for (int n = 0; n < NLoops; ++n) {
2636 		Vector<doublereal, 3> g1, v1;
2637 		Vec3 g2, v2;
2638 		for (index_type i = 1; i <= 3; ++i) {
2639 			g2(i) = g1(i) = random1() * 1e-1;
2640 			v2(i) = v1(i) = random1() * 1e1;
2641 		}
2642 
2643 		Matrix<doublereal, 3, 3> R1(MatGVec(g1));
2644 		Mat3x3 R2(CGR_Rot::MatG, g2);
2645 		Matrix<doublereal, 3, 3> C1(MatCrossVec(g1));
2646 		Mat3x3 C2(MatCross, g2);
2647 		Matrix<doublereal, 3, 3> CC1(MatCrossCrossVec(g1));
2648 		Mat3x3 CC2(MatCrossCross, g2, g2);
2649 		Vector<doublereal, 3> CCv1 = CC1 * v1;
2650 		Vec3 CCv2 = CC2 * v2;
2651 		Vector<doublereal, 3> CCv3 = CC1 * v2;
2652 		Vector<doublereal, 3> CCv4 = CC2 * v1;
2653 		Matrix<doublereal, 3, 3> A1 = R1 * C1;
2654 		Mat3x3 A2 = R2 * C2;
2655 		Matrix<doublereal, 3, 3> A3 = R1 * C2;
2656 		Matrix<doublereal, 3, 3> A4 = R2 * C1;
2657 		Vector<doublereal, 3> v3(v1(1), v1(2), v1(3));
2658 		Vector<doublereal, 2> v4(v1(1), v1(2));
2659         Matrix<doublereal, 3, 3> X1(MatCrossVec(g1, 1.));
2660         Mat3x3 X2(1., g2);
2661 
2662 		if (n == 0) {
2663 			std::cout << "g1=" << std::endl << g1 << std::endl;
2664 			std::cout << "g2=" << std::endl << g2 << std::endl;
2665 			std::cout << "R1=" << std::endl << R1 << std::endl;
2666 			std::cout << "R2=" << std::endl << R2 << std::endl;
2667 			std::cout << "C1=" << std::endl << C1 << std::endl;
2668 			std::cout << "C2=" << std::endl << C2 << std::endl;
2669 			std::cout << "CC1=" << std::endl << CC1 << std::endl;
2670 			std::cout << "CC2=" << std::endl << CC2 << std::endl;
2671 			std::cout << "CCv1=" << std::endl << CCv1 << std::endl;
2672 			std::cout << "CCv2=" << std::endl << CCv2 << std::endl;
2673 			std::cout << "CCv3=" << std::endl << CCv3 << std::endl;
2674 			std::cout << "CCv4=" << std::endl << CCv4 << std::endl;
2675 			std::cout << "A1=" << std::endl << A1 << std::endl;
2676 			std::cout << "A2=" << std::endl << A2 << std::endl;
2677 			std::cout << "A3=" << std::endl << A3 << std::endl;
2678 			std::cout << "A4=" << std::endl << A4 << std::endl;
2679 		}
2680 
2681         const doublereal dTol = 10*std::numeric_limits<scalar_deriv_type>::epsilon();
2682 
2683 		for (index_type i = 1; i <= 3; ++i) {
2684 			for (index_type j = 1; j <= 3; ++j) {
2685 				assert(std::abs(R1(i, j) - R2(i, j)) < dTol);
2686 				assert(std::abs(C1(i, j) - C2(i, j)) < dTol);
2687 				assert(std::abs(CC1(i, j) - CC2(i, j)) < dTol);
2688 				assert(std::abs(A1(i, j) - A2(i, j)) < dTol);
2689 				assert(std::abs(A1(i, j) - A3(i, j)) < dTol);
2690 				assert(std::abs(A1(i, j) - A4(i, j)) < dTol);
2691                 assert(std::abs(X1(i, j) - X2(i, j)) < dTol);
2692 			}
2693 			assert(std::abs(CCv1(i) - CCv2(i)) < dTol);
2694 			assert(std::abs(CCv3(i) - CCv1(i)) < dTol);
2695 			assert(std::abs(CCv4(i) - CCv1(i)) < dTol);
2696 			assert(v3(i) == v1(i));
2697 
2698 			if (i < 3) {
2699 				assert(v4(i) == v1(i));
2700 			}
2701 		}
2702 	}
2703 }
2704 
testSubVecAss()2705 void testSubVecAss() {
2706 	LocalDofMap dof;
2707 	const integer N = 3;
2708 	MySubVectorHandler vh(N);
2709 
2710 	GradientAssVec<doublereal> WorkVec(vh);
2711 
2712 	for (integer i = 1; i <= N; ++i) {
2713 		WorkVec.AddItem(i, i * 10.);
2714 	}
2715 
2716 	SparseSubMatrixHandler mh(4*N);
2717 	GradientAssVec<Gradient<4> > WorkMat(mh);
2718 
2719 	for (integer i = 1; i <= N; ++i) {
2720 		Gradient<4> g;
2721 		g.SetValue(i * 10.);
2722 		g.DerivativeResizeReset(&dof, 1, 5, MapVectorBase::GLOBAL, 0.);
2723 		for (integer k = 1; k <= 4; ++k) {
2724 			g.SetDerivativeGlobal(k, i + k * 0.1);
2725 		}
2726 
2727 		WorkMat.AddItem(i, g);
2728 	}
2729 
2730 	std::cout << "WorkVec=" << std::endl;
2731 	for (integer i = 1; i <= vh.iGetSize(); ++i) {
2732 		std::cout << " " << vh.dGetCoef(i) << std::endl;
2733 	}
2734 
2735 	std::cout << std::endl;
2736 
2737 	std::cout << "WorkMat=" << std::endl;
2738 
2739 	for (integer i = 1; i <= mh.iGetNumRows(); ++i) {
2740 		std::cout << " " << mh.iGetRowIndex(i)
2741 				  << " " << mh.iGetColIndex(i)
2742 				  << " " << mh.dGetCoef(i, 0) << std::endl;
2743 	}
2744 
2745 	std::cout << std::endl;
2746 }
2747 
testSubVecAssMatVec()2748 void testSubVecAssMatVec() {
2749 	LocalDofMap dof;
2750 	const integer N = 3;
2751 	MySubVectorHandler vh(2*N);
2752 
2753 	Vector<doublereal, 3> v;
2754 
2755 	for (integer i = 1; i <= 3; ++i) {
2756 		v(i) = i * 10;
2757 	}
2758 
2759 	GradientAssVec<doublereal> WorkVec(vh);
2760 
2761 	WorkVec.AddItem(1, v);
2762 
2763 	SparseSubMatrixHandler mh(2*4*N);
2764 	GradientAssVec<Gradient<4> > WorkMat(mh);
2765 
2766 	Vector<Gradient<4>, 3> g;
2767 
2768 	for (integer i = 1; i <= 3; ++i) {
2769 		g(i).SetValue(i * 10.);
2770 		g(i).DerivativeResizeReset(&dof, 1, 5, MapVectorBase::GLOBAL, 0.);
2771 		for (integer k = 1; k <= 4; ++k) {
2772 			g(i).SetDerivativeGlobal(k, i + k * 0.1);
2773 		}
2774 	}
2775 
2776 	WorkMat.AddItem(1, g);
2777 
2778 	GradientAssVec<doublereal> WorkVec2(vh, GradientAssVecBase::APPEND);
2779 	Vector<doublereal, 3> v2 = v * 2.;
2780 	WorkVec2.AddItem(4, v2);
2781 
2782 	GradientAssVec<Gradient<4> > WorkMat2(mh, GradientAssVecBase::APPEND);
2783 	Vector<Gradient<4>, 3> g2 = g * 2.;
2784 	WorkMat2.AddItem(4, g2);
2785 
2786 	std::cout << "v=" << std::endl << v << std::endl;
2787 	std::cout << "WorkVec=" << std::endl;
2788 	for (integer i = 1; i <= vh.iGetSize(); ++i) {
2789 		std::cout << " " << vh.iGetRowIndex(i) << " " << vh.dGetCoef(i) << std::endl;
2790 	}
2791 
2792 	std::cout << std::endl;
2793 
2794 	std::cout << "g=" << std::endl << g << std::endl;
2795 
2796 	std::cout << "WorkMat=" << std::endl;
2797 
2798 	for (integer i = 1; i <= mh.iGetNumRows(); ++i) {
2799 		std::cout << " " << mh.iGetRowIndex(i)
2800 				  << " " << mh.iGetColIndex(i)
2801 				  << " " << mh.dGetCoef(i, 0) << std::endl;
2802 	}
2803 
2804 	std::cout << std::endl;
2805 }
2806 
testInv()2807 void testInv() {
2808 	Matrix<doublereal, 2, 2> A;
2809 
2810 	A(1, 1) =  0.658371336838182;
2811 	A(1, 2) = 0.733036075010795;
2812 	A(2, 1) = 0.483830962404444;
2813 	A(2, 2) = 0.395950513263802;
2814 
2815 	const doublereal detA = Det(A);
2816 	const Matrix<doublereal, 2, 2> invA = Inv(A);
2817 	const Matrix<doublereal, 2, 2> B1 = invA * A;
2818 	const Matrix<doublereal, 2, 2> B2 = A * invA;
2819 	std::cout << "A=" << Tabular(A) << std::endl;
2820 	std::cout << "Inv(A)=" << Tabular(invA) << std::endl;
2821 	std::cout << "Det(A)=" << detA << std::endl;
2822 	std::cout << "Inv(A)*A=" << Tabular(B1) << std::endl;
2823 	std::cout << "A * Inv(A)=" << Tabular(B2) << std::endl;
2824 
2825 	const doublereal dTol = sqrt(std::numeric_limits<doublereal>::epsilon());
2826 
2827 	assert(bCompare(B1(1, 1), 1., dTol));
2828 	assert(bCompare(B2(1, 1), 1., dTol));
2829 	assert(bCompare(B1(2, 2), 1., dTol));
2830 	assert(bCompare(B2(2, 2), 1., dTol));
2831 	assert(bCompare(B1(1, 2), 0., dTol));
2832 	assert(bCompare(B2(1, 2), 0., dTol));
2833 	assert(bCompare(B1(2, 1), 0., dTol));
2834 	assert(bCompare(B2(2, 1), 0., dTol));
2835 
2836 	assert(bCompare(invA(1, 1), -4.21299780160758, dTol));
2837 	assert(bCompare(invA(2, 2), -7.00521126207687, dTol));
2838 	assert(bCompare(invA(1, 2), 7.79965998039245, dTol));
2839 	assert(bCompare(invA(2, 1), 5.14806449967026, dTol));
2840 	assert(bCompare(detA, -0.0939830809103952, dTol));
2841 }
2842 
testSolve()2843 void testSolve() {
2844 	const Mat3x3 A(1.32972137393521,      0.61849905020148,     0.709385530146435,
2845 				   0.61849905020148,     0.290559435134808,     0.344471283014357,
2846 				   0.709385530146435,     0.344471283014357,     0.752776020323268);
2847 
2848 	const Vec3 b(0.815664130323409,
2849 		     	 0.255816061333836,
2850 		     	 0.416955203644826);
2851 
2852 	const doublereal dTol = sqrt(std::numeric_limits<doublereal>::epsilon());
2853 
2854 	const Vec3 x1 = A.Solve(b);
2855 	const Vec3 x2 = A.LDLSolve(b);
2856 	const doublereal f1 = (A * x1 - b).Norm();
2857 	const doublereal f2 = (A * x2 - b).Norm();
2858 	std::cout << "A=" << Tabular(Matrix<doublereal, 3, 3>(A)) << std::endl;
2859 	std::cout << "b=" << b << std::endl;
2860 	std::cout << "x1=" << x1 << std::endl;
2861 	std::cout << "x2=" << x2 << std::endl;
2862 	std::cout << "f1=" << f1 << std::endl;
2863 	std::cout << "f2=" << f2 << std::endl;
2864 	assert(f1 < dTol);
2865 	assert(f2 < dTol);
2866 }
2867 
2868 doublereal dStartTime;
2869 
tic(doublereal & dTime)2870 void tic(doublereal& dTime) {
2871 	dTime = mbdyn_clock_time();
2872 }
2873 
tic()2874 void tic() {
2875     dStartTime = mbdyn_clock_time();
2876 }
2877 
toc()2878 doublereal toc() {
2879 	return mbdyn_clock_time() - dStartTime;
2880 }
2881 
2882 template <index_type N>
testMatVec()2883 void testMatVec() {
2884 	doublereal c_MatVecGradient[N], cd_MatVecGradient[N][N];
2885 	doublereal c_MatVecDouble[N];
2886 
2887 	std::cerr << "---------------------------\ntestMatVecGradient<" << N << ">()\n";
2888     testMatVecGradient<N>(c_MatVecGradient, cd_MatVecGradient);
2889 
2890     std::cerr << "---------------------------\ntestMatVecDouble<" << N << ">()\n";
2891     testMatVecDouble<N>(c_MatVecDouble);
2892 
2893 #ifdef HAVE_BLITZ
2894 	doublereal c_MatVecDoubleBlitz[N];
2895 	doublereal c_MatVecGradientBlitz[N], cd_MatVecGradientBlitz[N][N];
2896     std::cerr << "---------------------------\ntestMatVecDoubleBlitz<" << N << ">()\n";
2897     testMatVecDoubleBlitz<N>(c_MatVecDoubleBlitz);
2898 
2899 	std::cerr << "---------------------------\ntestMatVecGradientBlitz<" << N << ">()\n";
2900     testMatVecGradientBlitz<N>(c_MatVecGradientBlitz, cd_MatVecGradientBlitz);
2901 #endif // HAVE_BLITZ
2902 
2903     const doublereal dTol = 10 * std::numeric_limits<scalar_deriv_type>::epsilon();
2904 
2905     for (int i = 0; i < N; ++i) {
2906     	assert(bCompare(c_MatVecGradient[i], c_MatVecDouble[i], dTol));
2907 #ifdef HAVE_BLITZ
2908     	assert(bCompare(c_MatVecGradient[i], c_MatVecDoubleBlitz[i], dTol));
2909     	assert(bCompare(c_MatVecGradient[i], c_MatVecGradientBlitz[i], dTol));
2910 
2911     	for (int j = 0; j < N; ++j) {
2912     		assert(bCompare(cd_MatVecGradient[i][j], cd_MatVecGradientBlitz[i][j]));
2913     	}
2914 #endif
2915     }
2916 }
2917 
2918 template <int iNumDofMax>
cppad_benchmark1(const int N)2919 void cppad_benchmark1(const int N) {
2920     const int iNumDof = 6;
2921 
2922     assert(iNumDofMax == 0 || iNumDofMax >= iNumDof);
2923 
2924     Matrix<Gradient<iNumDofMax>, 3, 3> R;
2925     Vector<Gradient<iNumDofMax>, 3> X, Y, Phi;
2926     Vector<doublereal, 3> o;
2927     LocalDofMap dof;
2928 
2929     o(1) = 1.;
2930     o(2) = 2.;
2931     o(3) = 3.;
2932 
2933     Matrix<doublereal, 3, iNumDof> jac;
2934 
2935     const double start = mbdyn_clock_time();
2936     double calc = 0.;
2937 
2938     for (int loop = 0; loop < N; ++loop) {
2939         for (int i = 0; i < 3; ++i) {
2940             X(i + 1).SetValuePreserve(0.);
2941             X(i + 1).DerivativeResizeReset(&dof, i, MapVectorBase::GLOBAL, 1.);
2942             Phi(i + 1).SetValuePreserve(0.);
2943             Phi(i + 1).DerivativeResizeReset(&dof, i + 3, MapVectorBase::GLOBAL, 1.);
2944         }
2945 
2946         const double start_calc = mbdyn_clock_time();
2947 
2948         gradVecAssTest::Euler123ToMatR(Phi, R);
2949 
2950         Y = X + R * o;
2951 
2952         calc += mbdyn_clock_time() - start_calc;
2953 
2954         for (int i = 0; i < 3; ++i) {
2955             for (int j = 0; j < iNumDof; ++j) {
2956                 jac(i + 1, j + 1) = Y(i + 1).dGetDerivativeGlobal(j);
2957             }
2958         }
2959     }
2960 
2961     std::cout << "calculation time: " << calc/N << "s\n";
2962     std::cout << "elapsed time: " << (mbdyn_clock_time() - start)/N << "s\n";
2963 
2964     std::cout << "x=" << X << std::endl;
2965     std::cout << "y=" << Y << std::endl;
2966     std::cout << "jac=\n";
2967 
2968     for (int i = 0; i < 3; ++i) {
2969         for (int j = 0; j < iNumDof; ++j) {
2970             std::cout << jac(i + 1, j + 1) << '\t';
2971         }
2972         std::cout << std::endl;
2973     }
2974 }
2975 
2976 template <int iNumDofMax>
cppad_benchmark2(const int N)2977 void cppad_benchmark2(const int N) {
2978     const int iNumDof = 12;
2979 
2980     assert(iNumDofMax == 0 || iNumDofMax >= iNumDof);
2981 
2982     Matrix<Gradient<iNumDofMax>, 3, 3> R1, R2;
2983     Vector<Gradient<iNumDofMax>, 3> X1, X2, Y, Phi1, Phi2;
2984     Vector<doublereal, 3> o1, o2;
2985     LocalDofMap dof;
2986 
2987     o1(1) = 1.;
2988     o1(2) = 2.;
2989     o1(3) = 3.;
2990 
2991     o2(1) = 1.;
2992     o2(2) = 2.;
2993     o2(3) = 3.;
2994 
2995     Matrix<doublereal, 3, iNumDof> jac;
2996 
2997     const double start = mbdyn_clock_time();
2998     double calc = 0.;
2999 
3000     for (int loop = 0; loop < N; ++loop) {
3001         for (int i = 0; i < 3; ++i) {
3002             X1(i + 1).SetValuePreserve(0.);
3003             X1(i + 1).DerivativeResizeReset(&dof, i, MapVectorBase::GLOBAL, 1.);
3004             Phi1(i + 1).SetValuePreserve(0.);
3005             Phi1(i + 1).DerivativeResizeReset(&dof, i + 3, MapVectorBase::GLOBAL, 1.);
3006             X2(i + 1).SetValuePreserve(0.);
3007             X2(i + 1).DerivativeResizeReset(&dof, i + 6, MapVectorBase::GLOBAL, 1.);
3008             Phi2(i + 1).SetValuePreserve(0.);
3009             Phi2(i + 1).DerivativeResizeReset(&dof, i + 9, MapVectorBase::GLOBAL, 1.);
3010         }
3011 
3012         const double start_calc = mbdyn_clock_time();
3013 
3014         gradVecAssTest::Euler123ToMatR(Phi1, R1);
3015         gradVecAssTest::Euler123ToMatR(Phi2, R2);
3016 
3017         Y = X1 + R1 * o1 - X2 - R2 * o2;
3018 
3019         calc += mbdyn_clock_time() - start_calc;
3020 
3021         for (int i = 0; i < 3; ++i) {
3022             for (int j = 0; j < iNumDof; ++j) {
3023                 jac(i + 1, j + 1) = Y(i + 1).dGetDerivativeGlobal(j);
3024             }
3025         }
3026     }
3027 
3028     std::cout << "calculation time: " << calc/N << "s\n";
3029     std::cout << "elapsed time: " << (mbdyn_clock_time() - start)/N << "s\n";
3030 
3031     std::cout << "X1=" << X1 << std::endl;
3032     std::cout << "X2=" << X2 << std::endl;
3033     std::cout << "Y=" << Y << std::endl;
3034     std::cout << "jac=\n";
3035 
3036     for (int i = 0; i < 3; ++i) {
3037         for (int j = 0; j < iNumDof; ++j) {
3038             std::cout << jac(i + 1, j + 1) << '\t';
3039         }
3040         std::cout << std::endl;
3041     }
3042 }
3043 
3044 template <int iNumDofMax>
cppad_benchmark3(const int N)3045 void cppad_benchmark3(const int N) {
3046     const int iNumDof = 12;
3047 
3048     assert(iNumDofMax == 0 || iNumDofMax >= iNumDof);
3049 
3050     typedef Matrix<Gradient<iNumDofMax>, 3, 3> Mat3x3;
3051     typedef Vector<Gradient<iNumDofMax>, 3> Vec3;
3052     typedef Vector<doublereal, 3> CVec3;
3053 
3054     Mat3x3 R1, R2;
3055     Vec3 X1, X2, Y, Phi1, Phi2;
3056     CVec3 o1, o2;
3057     LocalDofMap dof;
3058 
3059     o1(1) = 1.;
3060     o1(2) = 2.;
3061     o1(3) = 3.;
3062 
3063     o2(1) = 1.;
3064     o2(2) = 2.;
3065     o2(3) = 3.;
3066 
3067     Matrix<doublereal, 3, iNumDof> jac;
3068 
3069     const double start = mbdyn_clock_time();
3070     double calc = 0.;
3071 
3072     for (int loop = 0; loop < N; ++loop) {
3073         for (int i = 0; i < 3; ++i) {
3074             X1(i + 1).SetValuePreserve(0.);
3075             X1(i + 1).DerivativeResizeReset(&dof, i, MapVectorBase::GLOBAL, 1.);
3076             Phi1(i + 1).SetValuePreserve(0.);
3077             Phi1(i + 1).DerivativeResizeReset(&dof, i + 3, MapVectorBase::GLOBAL, 1.);
3078             X2(i + 1).SetValuePreserve(0.);
3079             X2(i + 1).DerivativeResizeReset(&dof, i + 6, MapVectorBase::GLOBAL, 1.);
3080             Phi2(i + 1).SetValuePreserve(0.);
3081             Phi2(i + 1).DerivativeResizeReset(&dof, i + 9, MapVectorBase::GLOBAL, 1.);
3082         }
3083 
3084         const double start_calc = mbdyn_clock_time();
3085 
3086         gradVecAssTest::Euler123ToMatR(Phi1, R1);
3087         gradVecAssTest::Euler123ToMatR(Phi2, R2);
3088 
3089         Y = Transpose(R2) * Vec3(X1 + R1 * o1 - X2) - o2;
3090 
3091         calc += mbdyn_clock_time() - start_calc;
3092 
3093         for (int i = 0; i < 3; ++i) {
3094             for (int j = 0; j < iNumDof; ++j) {
3095                 jac(i + 1, j + 1) = Y(i + 1).dGetDerivativeGlobal(j);
3096             }
3097         }
3098     }
3099 
3100     std::cout << "calculation time: " << calc/N << "s\n";
3101     std::cout << "elapsed time: " << (mbdyn_clock_time() - start)/N << "s\n";
3102 
3103     std::cout << "X1=" << X1 << std::endl;
3104     std::cout << "X2=" << X2 << std::endl;
3105     std::cout << "Y=" << Y << std::endl;
3106     std::cout << "jac=\n";
3107 
3108     for (int i = 0; i < 3; ++i) {
3109         for (int j = 0; j < iNumDof; ++j) {
3110             std::cout << jac(i + 1, j + 1) << '\t';
3111         }
3112         std::cout << std::endl;
3113     }
3114 }
3115 
Mat3xN_test(int N,int M)3116 void Mat3xN_test(int N, int M) {
3117     Mat3xN A(N, 0.);
3118 
3119     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3120         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3121             A(i, j) = 100 * i + j;
3122         }
3123     }
3124 
3125     Vector<doublereal, DYNAMIC_SIZE> x1(A.iGetNumCols()), x2(A.iGetNumCols()), x(A.iGetNumCols());
3126 
3127     for (index_type i = 1; i <= x.iGetNumRows(); ++i) {
3128         x1(i) = i;
3129         x2(i) = 10 * i;
3130     }
3131 
3132     Vector<doublereal, 3> b = A * x;
3133 
3134     const double start = mbdyn_clock_time();
3135 
3136     for (int i = 0; i < M; ++i) {
3137         x = x1 * 3. + x2 * 5.;
3138         b = A * x;
3139     }
3140 
3141     std::cerr << "Mat3xN: " << (mbdyn_clock_time() - start) / M << "s\n";
3142 
3143     const doublereal tol = sqrt(std::numeric_limits<doublereal>::epsilon());
3144 
3145     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3146         doublereal b_i = 0.;
3147 
3148         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3149             b_i += A(i, j) * (3. * x1(j) + 5. * x2(j));
3150         }
3151 
3152         assert(bCompare(b_i, b(i), tol));
3153     }
3154 }
3155 
MatNxN_test(int N,int M)3156 void MatNxN_test(int N, int M) {
3157     MatNxN A(N, 0.);
3158 
3159     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3160         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3161             A(i, j) = 100 * i + j;
3162         }
3163     }
3164 
3165     Vector<doublereal, DYNAMIC_SIZE> x1(A.iGetNumCols()), x2(A.iGetNumCols()), x(A.iGetNumCols());
3166 
3167     for (index_type i = 1; i <= x.iGetNumRows(); ++i) {
3168         x1(i) = i;
3169         x2(i) = 10 * i;
3170     }
3171 
3172     Vector<doublereal, DYNAMIC_SIZE> b = A * x;
3173 
3174     const double start = mbdyn_clock_time();
3175 
3176     for (int i = 0; i < M; ++i) {
3177         x = x1 * 3. + x2 * 5.;
3178         b = A * x;
3179     }
3180 
3181     std::cerr << "MatNxN: " << (mbdyn_clock_time() - start) / M << "s\n";
3182 
3183     const doublereal tol = sqrt(std::numeric_limits<doublereal>::epsilon());
3184 
3185     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3186         doublereal b_i = 0.;
3187 
3188         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3189             b_i += A(i, j) * (3. * x1(j) + 5. * x2(j));
3190         }
3191 
3192         assert(bCompare(b_i, b(i), tol));
3193     }
3194 }
3195 
MatDynamic_test(index_type iNumRows,index_type iNumCols,index_type iNumLoops)3196 void MatDynamic_test(index_type iNumRows, index_type iNumCols, index_type iNumLoops) {
3197     Matrix<doublereal, DYNAMIC_SIZE, DYNAMIC_SIZE> A(iNumRows, iNumCols);
3198 
3199     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3200         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3201             A(i, j) = 100 * i + j;
3202         }
3203     }
3204 
3205     Vector<doublereal, DYNAMIC_SIZE> x1(A.iGetNumCols()), x2(A.iGetNumCols()), x(A.iGetNumCols());
3206 
3207     for (index_type i = 1; i <= x.iGetNumRows(); ++i) {
3208         x1(i) = i;
3209         x2(i) = 10 * i;
3210     }
3211 
3212     Vector<doublereal, DYNAMIC_SIZE> b = A * x;
3213 
3214     const double start = mbdyn_clock_time();
3215 
3216     for (int i = 0; i < iNumLoops; ++i) {
3217         x = x1 * 3. + x2 * 5.;
3218         b = A * x;
3219     }
3220 
3221     std::cerr << "Matrix<DYNAMIC_SIZE, DYNAMIC_SIZE>: " << (mbdyn_clock_time() - start) / iNumLoops << "s\n";
3222 
3223     const doublereal tol = sqrt(std::numeric_limits<doublereal>::epsilon());
3224 
3225     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3226         doublereal b_i = 0.;
3227 
3228         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3229             b_i += A(i, j) * (3. * x1(j) + 5. * x2(j));
3230         }
3231 
3232         assert(bCompare(b_i, b(i), tol));
3233     }
3234 }
3235 
3236 
3237 template <index_type N_SIZE>
Mat3xN_test_grad(int iNumDeriv,int N,int M)3238 void Mat3xN_test_grad(int iNumDeriv, int N, int M) {
3239     assert((N_SIZE == 0) || (N_SIZE >= iNumDeriv));
3240 
3241     LocalDofMap dof;
3242 
3243     Mat3xN A(N, 0.);
3244 
3245     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3246         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3247             A(i, j) = 100 * i + j;
3248         }
3249     }
3250 
3251     Vector<Gradient<N_SIZE>, DYNAMIC_SIZE> x1(A.iGetNumCols()), x2(A.iGetNumCols()), x(A.iGetNumCols());
3252 
3253     for (index_type i = 1; i <= x.iGetNumRows(); ++i) {
3254         x1(i).SetValuePreserve(i);
3255         x1(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3256 
3257         for (index_type k = 0; k < iNumDeriv; ++k) {
3258             x1(i).SetDerivativeLocal(k, -1. * k);
3259         }
3260 
3261         x2(i).SetValuePreserve(10 * i);
3262         x2(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3263 
3264         for (index_type k = 0; k < iNumDeriv; ++k) {
3265             x2(i).SetDerivativeLocal(k, 10. * k);
3266         }
3267     }
3268 
3269     Vector<Gradient<N_SIZE>, 3> b = A * x;
3270 
3271     const double start = mbdyn_clock_time();
3272 
3273     for (int i = 0; i < M; ++i) {
3274         x = x1 * 3. + x2 * 5.;
3275         b = A * x;
3276     }
3277 
3278     std::cerr << "Mat3xN * Gradient: " << (mbdyn_clock_time() - start) / M << "s\n";
3279 
3280     const doublereal tol = sqrt(std::numeric_limits<doublereal>::epsilon());
3281 
3282     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3283         Gradient<N_SIZE> b_i;
3284 
3285         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3286             b_i += A(i, j) * (3. * x1(j) + 5. * x2(j));
3287         }
3288 
3289         assert(bCompare(b_i, b(i), tol));
3290     }
3291 }
3292 
3293 template <index_type N_SIZE>
Mat3xNT_test_grad(int iNumDeriv,int N,int M)3294 void Mat3xNT_test_grad(int iNumDeriv, int N, int M) {
3295     assert((N_SIZE == 0) || (N_SIZE >= iNumDeriv));
3296 
3297     LocalDofMap dof;
3298 
3299     Mat3xN A(N, 0.);
3300 
3301     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3302         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3303             A(i, j) = 100 * i + j;
3304         }
3305     }
3306 
3307     Vector<Gradient<N_SIZE>, 3> x1(A.iGetNumRows()), x2(A.iGetNumRows());
3308 
3309     for (index_type i = 1; i <= x1.iGetNumRows(); ++i) {
3310         x1(i).SetValuePreserve(i);
3311         x1(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3312 
3313         for (index_type k = 0; k < iNumDeriv; ++k) {
3314             x1(i).SetDerivativeLocal(k, -1. * k);
3315         }
3316 
3317         x2(i).SetValuePreserve(10 * i);
3318         x2(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3319 
3320         for (index_type k = 0; k < iNumDeriv; ++k) {
3321             x2(i).SetDerivativeLocal(k, 10. * k);
3322         }
3323     }
3324 
3325     Vector<Gradient<N_SIZE>, DYNAMIC_SIZE> b = Transpose(A) * x1;
3326 
3327     const double start = mbdyn_clock_time();
3328 
3329     for (int i = 0; i < M; ++i) {
3330         b = Transpose(A) * (x1 * 3. + x2 * 5.);
3331     }
3332 
3333     std::cerr << "Transpose(Mat3xN) * Gradient: " << (mbdyn_clock_time() - start) / M << "s\n";
3334 
3335     const doublereal tol = sqrt(std::numeric_limits<doublereal>::epsilon());
3336 
3337     for (index_type i = 1; i <= A.iGetNumCols(); ++i) {
3338         Gradient<N_SIZE> b_i;
3339 
3340         for (index_type j = 1; j <= A.iGetNumRows(); ++j) {
3341             b_i += A(j, i) * (3. * x1(j) + 5. * x2(j));
3342         }
3343 
3344         assert(bCompare(b_i, b(i), tol));
3345     }
3346 }
3347 
3348 template <index_type N_SIZE>
MatNxNT_test_grad(int iNumDeriv,int N,int M)3349 void MatNxNT_test_grad(int iNumDeriv, int N, int M) {
3350     assert((N_SIZE == 0) || (N_SIZE >= iNumDeriv));
3351 
3352     LocalDofMap dof;
3353 
3354     MatNxN A(N, 0.);
3355 
3356     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3357         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3358             A(i, j) = 100 * i + j;
3359         }
3360     }
3361 
3362     Vector<Gradient<N_SIZE>, DYNAMIC_SIZE> x1(A.iGetNumRows()), x2(A.iGetNumRows());
3363 
3364     for (index_type i = 1; i <= x1.iGetNumRows(); ++i) {
3365         x1(i).SetValuePreserve(i);
3366         x1(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3367 
3368         for (index_type k = 0; k < iNumDeriv; ++k) {
3369             x1(i).SetDerivativeLocal(k, -1. * k);
3370         }
3371 
3372         x2(i).SetValuePreserve(10 * i);
3373         x2(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3374 
3375         for (index_type k = 0; k < iNumDeriv; ++k) {
3376             x2(i).SetDerivativeLocal(k, 10. * k);
3377         }
3378     }
3379 
3380     Vector<Gradient<N_SIZE>, DYNAMIC_SIZE> b = Transpose(A) * x1;
3381 
3382     const double start = mbdyn_clock_time();
3383 
3384     for (int i = 0; i < M; ++i) {
3385         b = Transpose(A) * (x1 * 3. + x2 * 5.);
3386     }
3387 
3388     std::cerr << "Transpose(MatNxN) * Gradient: " << (mbdyn_clock_time() - start) / M << "s\n";
3389 
3390     const doublereal tol = sqrt(std::numeric_limits<doublereal>::epsilon());
3391 
3392     for (index_type i = 1; i <= A.iGetNumCols(); ++i) {
3393         Gradient<N_SIZE> b_i;
3394 
3395         for (index_type j = 1; j <= A.iGetNumRows(); ++j) {
3396             b_i += A(j, i) * (3. * x1(j) + 5. * x2(j));
3397         }
3398 
3399         assert(bCompare(b_i, b(i), tol));
3400     }
3401 }
3402 
3403 template <index_type N_SIZE>
MatNxN_test_grad(int iNumDeriv,int N,int M)3404 void MatNxN_test_grad(int iNumDeriv, int N, int M)
3405 {
3406     assert((N_SIZE == 0) || (N_SIZE >= iNumDeriv));
3407 
3408     LocalDofMap dof;
3409 
3410     MatNxN A(N, 0.);
3411 
3412     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3413         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3414             A(i, j) = 100 * i + j;
3415         }
3416     }
3417 
3418     Vector<Gradient<N_SIZE>, DYNAMIC_SIZE> x1(A.iGetNumCols()), x2(A.iGetNumCols()), x(A.iGetNumCols());
3419 
3420     for (index_type i = 1; i <= x.iGetNumRows(); ++i) {
3421         x1(i).SetValuePreserve(i);
3422         x1(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3423 
3424         for (index_type k = 0; k < iNumDeriv; ++k) {
3425             x1(i).SetDerivativeLocal(k, -1. * k);
3426         }
3427 
3428         x2(i).SetValuePreserve(10 * i);
3429         x2(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3430 
3431         for (index_type k = 0; k < iNumDeriv; ++k) {
3432             x2(i).SetDerivativeLocal(k, 10. * k);
3433         }
3434     }
3435 
3436     Vector<Gradient<N_SIZE>, DYNAMIC_SIZE> b = A * x;
3437 
3438     const double start = mbdyn_clock_time();
3439 
3440     for (int i = 0; i < M; ++i) {
3441         x = x1 * 3. + x2 * 5.;
3442         b = A * x;
3443     }
3444 
3445     std::cerr << "MatNxN * Gradient: " << (mbdyn_clock_time() - start) / M << "s\n";
3446 
3447     const doublereal tol = sqrt(std::numeric_limits<doublereal>::epsilon());
3448 
3449     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3450         Gradient<N_SIZE> b_i;
3451 
3452         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3453             b_i += A(i, j) * (3. * x1(j) + 5. * x2(j));
3454         }
3455 
3456         assert(bCompare(b_i, b(i), tol));
3457     }
3458 }
3459 
3460 template <index_type N_SIZE>
MatDynamic_test_grad(index_type iNumDeriv,index_type iNumRows,index_type iNumCols,int iNumLoops)3461 void MatDynamic_test_grad(index_type iNumDeriv, index_type iNumRows, index_type iNumCols, int iNumLoops)
3462 {
3463     assert((N_SIZE == 0) || (N_SIZE >= iNumDeriv));
3464 
3465     LocalDofMap dof;
3466 
3467     Matrix<Gradient<N_SIZE>, DYNAMIC_SIZE, DYNAMIC_SIZE> A(iNumRows, iNumCols);
3468 
3469     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3470         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3471             A(i, j).SetValuePreserve(100 * i + j);
3472             A(i, j).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3473 
3474             for (index_type k = 0; k < iNumDeriv; ++k) {
3475                 A(i, j).SetDerivativeLocal(k, 1000. * i + 100. * j + k + 1);
3476             }
3477         }
3478     }
3479 
3480     Vector<Gradient<N_SIZE>, DYNAMIC_SIZE> x1(A.iGetNumCols()), x2(A.iGetNumCols()), x(A.iGetNumCols());
3481 
3482     for (index_type i = 1; i <= x.iGetNumRows(); ++i) {
3483         x1(i).SetValuePreserve(i);
3484         x1(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3485 
3486         for (index_type k = 0; k < iNumDeriv; ++k) {
3487             x1(i).SetDerivativeLocal(k, -1. * k);
3488         }
3489 
3490         x2(i).SetValuePreserve(10 * i);
3491         x2(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3492 
3493         for (index_type k = 0; k < iNumDeriv; ++k) {
3494             x2(i).SetDerivativeLocal(k, 10. * k);
3495         }
3496     }
3497 
3498     Vector<Gradient<N_SIZE>, DYNAMIC_SIZE> b = A * x;
3499 
3500     const double start = mbdyn_clock_time();
3501 
3502     for (int i = 0; i < iNumLoops; ++i) {
3503         x = x1 * 3. + x2 * 5.;
3504         b = A * x;
3505     }
3506 
3507     std::cerr << "Matrix<Gradient,DYNAMIC_SIZE,DYNAMIC_SIZE> * Gradient: " << (mbdyn_clock_time() - start) / iNumLoops << "s\n";
3508 
3509     const doublereal tol = sqrt(std::numeric_limits<doublereal>::epsilon());
3510 
3511     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3512         Gradient<N_SIZE> b_i;
3513 
3514         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3515             b_i += A(i, j) * (3. * x1(j) + 5. * x2(j));
3516         }
3517 
3518         assert(bCompare(b_i, b(i), tol));
3519     }
3520 }
3521 
3522 template <index_type N_ROWS, index_type N_COLS>
MatDynamicT_test(index_type iNumRows,index_type iNumCols,int iNumLoops)3523 void MatDynamicT_test(index_type iNumRows, index_type iNumCols, int iNumLoops)
3524 {
3525     assert((N_ROWS == iNumRows) || (N_ROWS == DYNAMIC_SIZE));
3526     assert((N_COLS == iNumCols) || (N_COLS == DYNAMIC_SIZE));
3527 
3528     Matrix<doublereal, N_ROWS, N_COLS> A(iNumRows, iNumCols);
3529 
3530     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3531         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3532             A(i, j) = 100 * i + j;
3533         }
3534     }
3535 
3536     Vector<doublereal, N_ROWS> x1(A.iGetNumRows()), x2(A.iGetNumRows());
3537 
3538     for (index_type i = 1; i <= x1.iGetNumRows(); ++i) {
3539         x1(i) = i;
3540         x2(i) = 10 * i;
3541     }
3542 
3543     Vector<doublereal, N_COLS> b = Transpose(A) * x1;
3544 
3545     const double start = mbdyn_clock_time();
3546 
3547     for (int i = 0; i < iNumLoops; ++i) {
3548         b = Transpose(A) * (x1 * 3. + x2 * 5.);
3549     }
3550 
3551     std::cerr << "Transpose(Matrix<doublereal>,"
3552               << iNumRows << "(" << N_ROWS << ")," << iNumCols << "(" << N_COLS << ")"
3553               << ">) * Vector<doublereal>,"
3554               << iNumRows << "(" << N_ROWS << ")>: " << (mbdyn_clock_time() - start) / iNumLoops << "s\n";
3555 
3556     const doublereal tol = sqrt(std::numeric_limits<doublereal>::epsilon());
3557 
3558     for (index_type i = 1; i <= A.iGetNumCols(); ++i) {
3559         doublereal b_i = 0.;
3560 
3561         for (index_type j = 1; j <= A.iGetNumRows(); ++j) {
3562             b_i += A(j, i) * (3. * x1(j) + 5. * x2(j));
3563         }
3564 
3565         assert(bCompare(b_i, b(i), tol));
3566     }
3567 }
3568 
3569 template <index_type N_DERIV, index_type N_ROWS, index_type N_COLS>
MatDynamicT_test_grad(index_type iNumDeriv,index_type iNumRows,index_type iNumCols,int iNumLoops)3570 void MatDynamicT_test_grad(index_type iNumDeriv, index_type iNumRows, index_type iNumCols, int iNumLoops)
3571 {
3572     assert((N_DERIV == 0) || (N_DERIV >= iNumDeriv));
3573     assert((N_ROWS == iNumRows) || (N_ROWS == DYNAMIC_SIZE));
3574     assert((N_COLS == iNumCols) || (N_COLS == DYNAMIC_SIZE));
3575 
3576     LocalDofMap dof;
3577 
3578     Matrix<Gradient<N_DERIV>, N_ROWS, N_COLS> A(iNumRows, iNumCols);
3579 
3580     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3581         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3582             A(i, j).SetValuePreserve(100 * i + j);
3583             A(i, j).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3584 
3585             for (index_type k = 0; k < iNumDeriv; ++k) {
3586                 A(i, j).SetDerivativeLocal(k, 1000. * i + 100. * j + k + 1);
3587             }
3588         }
3589     }
3590 
3591     Vector<Gradient<N_DERIV>, N_ROWS> x1(A.iGetNumRows()), x2(A.iGetNumRows());
3592 
3593     for (index_type i = 1; i <= x1.iGetNumRows(); ++i) {
3594         x1(i).SetValuePreserve(i);
3595         x1(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3596 
3597         for (index_type k = 0; k < iNumDeriv; ++k) {
3598             x1(i).SetDerivativeLocal(k, -1. * k);
3599         }
3600 
3601         x2(i).SetValuePreserve(10 * i);
3602         x2(i).DerivativeResizeReset(&dof, 0, iNumDeriv, MapVectorBase::LOCAL, 0.);
3603 
3604         for (index_type k = 0; k < iNumDeriv; ++k) {
3605             x2(i).SetDerivativeLocal(k, 10. * k);
3606         }
3607     }
3608 
3609     Vector<Gradient<N_DERIV>, N_COLS> b = Transpose(A) * x1;
3610 
3611     const double start = mbdyn_clock_time();
3612 
3613     for (int i = 0; i < iNumLoops; ++i) {
3614         b = Transpose(A) * (x1 * 3. + x2 * 5.);
3615     }
3616 
3617     std::cerr << "Transpose(Matrix<Gradient<" << iNumDeriv << "(" << N_DERIV << ")"
3618               << ">," << iNumRows << "(" << N_ROWS << ")," << iNumCols << "(" << N_COLS << ")"
3619               << ">) * Vector<Gradient<" << iNumDeriv << "(" << N_DERIV << ")"
3620               << ">," << iNumRows << "(" << N_ROWS << ")>: " << (mbdyn_clock_time() - start) / iNumLoops << "s\n";
3621 
3622     const doublereal tol = sqrt(std::numeric_limits<doublereal>::epsilon());
3623 
3624     for (index_type i = 1; i <= A.iGetNumCols(); ++i) {
3625         Gradient<N_DERIV> b_i;
3626 
3627         for (index_type j = 1; j <= A.iGetNumRows(); ++j) {
3628             b_i += A(j, i) * (3. * x1(j) + 5. * x2(j));
3629         }
3630 
3631         assert(bCompare(b_i, b(i), tol));
3632     }
3633 
3634     for (index_type i = 1; i <= A.iGetNumRows(); ++i) {
3635         for (index_type j = 1; j <= A.iGetNumCols(); ++j) {
3636             assert(bCompare(A(i, j), A.GetRow(i)(j), 0.));
3637             assert(bCompare(A(i, j), A.GetCol(j)(i), 0.));
3638             assert(bCompare(A(i, j), Transpose(A)(j, i), 0.));
3639             assert(bCompare(A(i, j), Transpose(A).GetCol(i)(j), 0.));
3640             assert(bCompare(A(i, j), Transpose(A).GetRow(j)(i), 0.));
3641         }
3642     }
3643 }
3644 
MatManip_test(int NLoops)3645 void MatManip_test(int NLoops) {
3646     const doublereal tol1 = 10. * std::numeric_limits<doublereal>::epsilon();
3647     const doublereal tol2 = 1e-4 * sqrt(tol1);
3648     const doublereal alpha = 1. - sqrt(tol2);
3649 
3650     srand(0);
3651 
3652     for (int k = 0; k < 3 * NLoops; ++k) {
3653         Vector<doublereal, 3> g0;
3654         Vec3 g0_;
3655 
3656         for (int i = 1; i <= 3; ++i) {
3657             g0_(i) = (2. * rand() / RAND_MAX - 1.);
3658         }
3659 
3660         g0_ *= (2. * rand() / RAND_MAX - 1.) * alpha * M_PI / g0_.Norm();
3661 
3662         if (k >= NLoops && k < 2 * NLoops) {
3663             g0_ *= sqrt(std::numeric_limits<doublereal>::epsilon());
3664         } else if (k >= 2 * NLoops) {
3665             g0_ *= std::numeric_limits<doublereal>::epsilon();
3666         }
3667 
3668         g0 = g0_;
3669 
3670         const Mat3x3 G1(CGR_Rot::MatG, g0_);
3671         const Mat3x3 R1(CGR_Rot::MatR, g0_);
3672         const Mat3x3 X1(1., g0_);
3673         const Matrix<doublereal, 3, 3> G2(MatGVec(g0));
3674         Matrix<doublereal, 3, 3> R2(MatRVec(g0));
3675         const Matrix<doublereal, 3, 3> X2(MatCrossVec(g0, 1.));
3676 
3677         for (index_type i = 1; i <= 3; ++i) {
3678             for (index_type j = 1; j <= 3; ++j) {
3679                 assert(bCompare(G1(i, j), G2(i, j), tol1));
3680                 assert(bCompare(R1(i, j), R2(i, j), tol1));
3681                 assert(bCompare(X1(i, j), X2(i, j), tol1));
3682             }
3683         }
3684 
3685         if (k == 0) {
3686             R2 = ::Eye3;
3687         } else if (k == 1) {
3688             R2(1, 1) = -1;
3689             R2(1, 2) = 0;
3690             R2(1, 3) = 0;
3691             R2(2, 1) = 0;
3692             R2(2, 2) = -1;
3693             R2(2, 3) = 0;
3694             R2(3, 1) = 0;
3695             R2(3, 2) = 0;
3696             R2(3, 3) = 1;
3697         } else if (k == 2) {
3698             R2(1, 1) = -1;
3699             R2(1, 2) = 0;
3700             R2(1, 3) = 0;
3701             R2(2, 1) = 0;
3702             R2(2, 2) = 1;
3703             R2(2, 3) = 0;
3704             R2(3, 1) = 0;
3705             R2(3, 2) = 0;
3706             R2(3, 3) = -1;
3707         } else if (k == 3) {
3708             R2(1, 1) = 1;
3709             R2(1, 2) = 0;
3710             R2(1, 3) = 0;
3711             R2(2, 1) = 0;
3712             R2(2, 2) = -1;
3713             R2(2, 3) = 0;
3714             R2(3, 1) = 0;
3715             R2(3, 2) = 0;
3716             R2(3, 3) = -1;
3717         } else if (k == 4) {
3718             R2(1,1)=-2.2841125213377644e-01;
3719             R2(1,2)=9.5997603033895429e-01;
3720             R2(1,3)=1.6209355654480440e-01;
3721             R2(2,1)=9.5997603033895418e-01;
3722             R2(2,2)=1.9435901751267337e-01;
3723             R2(2,3)=2.0166951551033063e-01;
3724             R2(3,1)=1.6209355654480423e-01;
3725             R2(3,2)=2.0166951551033083e-01;
3726             R2(3,3)=-9.6594776537889704e-01;
3727         }
3728 
3729         Mat3x3 R2_;
3730 
3731         for (index_type i = 1; i <= 3; ++i) {
3732             for (index_type j = 1; j <= 3; ++j) {
3733                 R2_(i, j) = R2(i, j);
3734             }
3735         }
3736 
3737         const Vec3 g1(RotManip::VecRot(R2_));
3738         const Vector<doublereal, 3> g2(VecRotMat(R2));
3739 
3740         for (index_type i = 1; i <= 3; ++i) {
3741             assert(bCompare(g1(i), g2(i), std::numeric_limits<doublereal>::epsilon()) || bCompare(fabs(g1(i) - g2(i)), 2 * M_PI, std::numeric_limits<doublereal>::epsilon()));
3742         }
3743     }
3744 }
3745 
main(int argc,char * argv[])3746 int main(int argc, char* argv[]) {
3747 #ifdef HAVE_FEENABLEEXCEPT
3748 	feenableexcept(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW);
3749 #endif
3750 	if (argc > 1) {
3751 		NLoops = atol(argv[1]);
3752 	}
3753 
3754     if (argc > 2) {
3755         NLoopsAss = atol(argv[2]);
3756     }
3757 
3758     if (NLoops < 1) {
3759         NLoops = 1;
3760     }
3761 
3762     if (NLoopsAss < 1) {
3763         NLoopsAss = 1;
3764     }
3765 
3766     std::cerr << "MatManip_test()\n";
3767 
3768     MatManip_test(NLoops);
3769 
3770 	std::cerr << "---------------------------\ntestScalarTypeTraits()\n";
3771 	testScalarTypeTraits();
3772 
3773 	testMatVec<3>();
3774 	testMatVec<6>();
3775 	testMatVec<8>();
3776 	testMatVec<12>();
3777 	testMatVec<24>();
3778 
3779     std::cerr << "---------------------------\ntestMatVecGradient2<1>()\n";
3780 	testMatVecGradient2<1>();
3781 
3782     std::cerr << "---------------------------\ntestMatVecGradient2<2>()\n";
3783 	testMatVecGradient2<2>();
3784 
3785     std::cerr << "---------------------------\ntestMatVecGradient2<3>()\n";
3786 	testMatVecGradient2<3>();
3787 
3788     std::cerr << "---------------------------\ntestMatVecGradient2<4>()\n";
3789 	testMatVecGradient2<4>();
3790 
3791     std::cerr << "---------------------------\ntestMatVecGradient2<5>()\n";
3792 	testMatVecGradient2<5>();
3793 
3794     std::cerr << "---------------------------\ntestMatVecGradient2<6>()\n";
3795 	testMatVecGradient2<6>();
3796 
3797     std::cerr << "---------------------------\ntestMatVecGradient2<8>()\n";
3798 	testMatVecGradient2<8>();
3799 
3800     std::cerr << "---------------------------\ntestMatVecGradient2<10>()\n";
3801 	testMatVecGradient2<10>();
3802 
3803     std::cerr << "---------------------------\ntestMatVecGradient2<12>()\n";
3804 	testMatVecGradient2<12>();
3805 
3806 	std::cerr << "---------------------------\ntestMatVecDouble2()\n";
3807 	testMatVecDouble2();
3808 
3809     std::cerr << "---------------------------\ntestMatVecProduct()\n";
3810     testMatVecProduct();
3811     std::cerr << "---------------------------\ntestMatVecProductGradient()\n";
3812     testMatVecProductGradient();
3813 
3814     std::cerr << "---------------------------\ntestMatVecProductGradient2()\n";
3815     testMatVecProductGradient2<1>(1, NLoops);
3816     testMatVecProductGradient2<4>(4, NLoops);
3817     testMatVecProductGradient2<8>(8, NLoops);
3818     testMatVecProductGradient2<12>(12, NLoops);
3819     testMatVecProductGradient2<32>(32, NLoops);
3820     testMatVecProductGradient2<64>(64, NLoops);
3821     testMatVecProductGradient2<0>(256, NLoops);
3822 
3823     std::cerr << "---------------------------\ntestMatVecCopy()\n";
3824     testMatVecCopy<1>();
3825     testMatVecCopy<3>();
3826     testMatVecCopy<5>();
3827     testMatVecCopy<9>();
3828 
3829 #ifdef HAVE_BLITZ
3830     std::cerr << "---------------------------\ntestAssembly()\n";
3831     gradVecAssTest::testAssembly();
3832 #endif
3833 
3834     std::cerr << "---------------------------\ntestMatVec3()\n";
3835     testMatVec3();
3836 
3837     std::cerr << "---------------------------\ntestSubVecAss()\n";
3838     testSubVecAss();
3839 
3840     std::cerr << "---------------------------\ntestSubVecAssMatVec()\n";
3841     testSubVecAssMatVec();
3842 
3843     std::cerr << "---------------------------\ntestInv()\n";
3844     testInv();
3845 
3846     std::cerr << "----------------------------\ntestSolve()\n";
3847     testSolve();
3848 
3849       {
3850             const int N = argc > 3 ? atoi(argv[3]) : 1;
3851             const int nr = 4, nc = 4;
3852             Matrix<doublereal, nr, nc> A, B, C;
3853 
3854             for (int i = 1; i <= nr; ++i)
3855             {
3856                 for (int j = 1; j <= nc; ++j)
3857                 {
3858                     B(i, j) = rand();
3859                     C(i, j) = rand();
3860                 }
3861             }
3862 
3863             clock_t start = clock();
3864 
3865             for (int i = 0; i < N; ++i)
3866             {
3867                 A = B * C;
3868             }
3869 
3870             std::cerr << "time: " << double(clock()-start)/N/CLOCKS_PER_SEC << std::endl;
3871       }
3872 
3873     std::cerr << "----------------------------\ncppad_benchmark1<0>()\n";
3874     cppad_benchmark1<0>(NLoops);
3875 
3876     std::cerr << "----------------------------\ncppad_benchmark1<6>()\n";
3877     cppad_benchmark1<6>(NLoops);
3878 
3879     std::cerr << "----------------------------\ncppad_benchmark2<0>()\n";
3880     cppad_benchmark2<0>(NLoops);
3881 
3882     std::cerr << "----------------------------\ncppad_benchmark2<12>()\n";
3883     cppad_benchmark2<12>(NLoops);
3884 
3885     std::cerr << "----------------------------\ncppad_benchmark3<0>()\n";
3886     cppad_benchmark3<0>(NLoops);
3887 
3888     std::cerr << "----------------------------\ncppad_benchmark3<12>()\n";
3889     cppad_benchmark3<12>(NLoops);
3890 
3891 
3892     testVecOp<3, 2>(NLoops, 2, doVecAdd<Gradient<2>, 3>, __FC_DECL__(func2addad_dv), "add");
3893 	testVecOp<3, 4>(NLoops, 4, doVecAdd<Gradient<4>, 3>, __FC_DECL__(func2addad_dv), "add");
3894 	testVecOp<3, 8>(NLoops, 8, doVecAdd<Gradient<8>, 3>, __FC_DECL__(func2addad_dv), "add");
3895 	testVecOp<3, 16>(NLoops, 16, doVecAdd<Gradient<16>, 3>, __FC_DECL__(func2addad_dv), "add");
3896 
3897     testVecOp<12, 2>(NLoops, 2, doVecAdd<Gradient<2>, 12>, __FC_DECL__(func2addad_dv), "add");
3898 	testVecOp<12, 4>(NLoops, 4, doVecAdd<Gradient<4>, 12>, __FC_DECL__(func2addad_dv), "add");
3899 	testVecOp<12, 8>(NLoops, 8, doVecAdd<Gradient<8>, 12>, __FC_DECL__(func2addad_dv), "add");
3900 	testVecOp<12, 16>(NLoops, 16, doVecAdd<Gradient<16>, 12>, __FC_DECL__(func2addad_dv), "add");
3901 
3902     testVecOp<3, 2>(NLoops, 2, doVecMul<Gradient<2>, 3>, __FC_DECL__(func2mulad_dv), "mul");
3903 	testVecOp<3, 4>(NLoops, 4, doVecMul<Gradient<4>, 3>, __FC_DECL__(func2mulad_dv), "mul");
3904 	testVecOp<3, 8>(NLoops, 8, doVecMul<Gradient<8>, 3>, __FC_DECL__(func2mulad_dv), "mul");
3905 	testVecOp<3, 16>(NLoops, 16, doVecMul<Gradient<16>, 3>, __FC_DECL__(func2mulad_dv), "mul");
3906 
3907     testVecOp<12, 2>(NLoops, 2, doVecAdd<Gradient<2>, 12>, __FC_DECL__(func2addad_dv), "add");
3908 	testVecOp<12, 4>(NLoops, 4, doVecAdd<Gradient<4>, 12>, __FC_DECL__(func2addad_dv), "add");
3909 	testVecOp<12, 8>(NLoops, 8, doVecAdd<Gradient<8>, 12>, __FC_DECL__(func2addad_dv), "add");
3910 	testVecOp<12, 16>(NLoops, 16, doVecAdd<Gradient<16>, 12>, __FC_DECL__(func2addad_dv), "add");
3911 
3912     testVecOp<3, 0>(NLoops, 2, doVecAdd<Gradient<0>, 3>, __FC_DECL__(func2addad_dv), "add");
3913     testVecOp<3, 0>(NLoops, 4, doVecAdd<Gradient<0>, 3>, __FC_DECL__(func2addad_dv), "add");
3914     testVecOp<3, 0>(NLoops, 8, doVecAdd<Gradient<0>, 3>, __FC_DECL__(func2addad_dv), "add");
3915     testVecOp<3, 0>(NLoops, 16, doVecAdd<Gradient<0>, 3>, __FC_DECL__(func2addad_dv), "add");
3916     testVecOp<3, 0>(NLoops, 256, doVecAdd<Gradient<0>, 3>, __FC_DECL__(func2addad_dv), "add");
3917     testVecOp<3, 0>(NLoops, 512, doVecAdd<Gradient<0>, 3>, __FC_DECL__(func2addad_dv), "add");
3918     testVecOp<3, 0>(NLoops, 1024, doVecAdd<Gradient<0>, 3>, __FC_DECL__(func2addad_dv), "add");
3919 
3920     testVecOp<12, 0>(NLoops, 2, doVecAdd<Gradient<0>, 12>, __FC_DECL__(func2addad_dv), "add");
3921     testVecOp<12, 0>(NLoops, 4, doVecAdd<Gradient<0>, 12>, __FC_DECL__(func2addad_dv), "add");
3922     testVecOp<12, 0>(NLoops, 8, doVecAdd<Gradient<0>, 12>, __FC_DECL__(func2addad_dv), "add");
3923     testVecOp<12, 0>(NLoops, 16, doVecAdd<Gradient<0>, 12>, __FC_DECL__(func2addad_dv), "add");
3924     testVecOp<12, 0>(NLoops, 256, doVecAdd<Gradient<0>, 12>, __FC_DECL__(func2addad_dv), "add");
3925     testVecOp<12, 0>(NLoops, 512, doVecAdd<Gradient<0>, 12>, __FC_DECL__(func2addad_dv), "add");
3926     testVecOp<12, 0>(NLoops, 1024, doVecAdd<Gradient<0>, 12>, __FC_DECL__(func2addad_dv), "add");
3927 
3928     testVecOp<3, 0>(NLoops, 2, doVecMul<Gradient<0>, 3>, __FC_DECL__(func2mulad_dv), "mul");
3929     testVecOp<3, 0>(NLoops, 4, doVecMul<Gradient<0>, 3>, __FC_DECL__(func2mulad_dv), "mul");
3930     testVecOp<3, 0>(NLoops, 8, doVecMul<Gradient<0>, 3>, __FC_DECL__(func2mulad_dv), "mul");
3931     testVecOp<3, 0>(NLoops, 16, doVecMul<Gradient<0>, 3>, __FC_DECL__(func2mulad_dv), "mul");
3932     testVecOp<3, 0>(NLoops, 256, doVecMul<Gradient<0>, 3>, __FC_DECL__(func2mulad_dv), "mul");
3933     testVecOp<3, 0>(NLoops, 512, doVecMul<Gradient<0>, 3>, __FC_DECL__(func2mulad_dv), "mul");
3934     testVecOp<3, 0>(NLoops, 1024, doVecMul<Gradient<0>, 3>, __FC_DECL__(func2mulad_dv), "mul");
3935 
3936     testVecOp<12, 0>(NLoops, 2, doVecMul<Gradient<0>, 12>, __FC_DECL__(func2mulad_dv), "mul");
3937     testVecOp<12, 0>(NLoops, 4, doVecMul<Gradient<0>, 12>, __FC_DECL__(func2mulad_dv), "mul");
3938     testVecOp<12, 0>(NLoops, 8, doVecMul<Gradient<0>, 12>, __FC_DECL__(func2mulad_dv), "mul");
3939     testVecOp<12, 0>(NLoops, 16, doVecMul<Gradient<0>, 12>, __FC_DECL__(func2mulad_dv), "mul");
3940     testVecOp<12, 0>(NLoops, 256, doVecMul<Gradient<0>, 12>, __FC_DECL__(func2mulad_dv), "mul");
3941     testVecOp<12, 0>(NLoops, 512, doVecMul<Gradient<0>, 12>, __FC_DECL__(func2mulad_dv), "mul");
3942     testVecOp<12, 0>(NLoops, 1024, doVecMul<Gradient<0>, 12>, __FC_DECL__(func2mulad_dv), "mul");
3943 
3944     Mat3xN_test(3, NLoops);
3945     Mat3xN_test(10, NLoops);
3946     Mat3xN_test(100, NLoops);
3947 
3948     MatNxN_test(3, NLoops);
3949     MatNxN_test(10, NLoops);
3950     MatNxN_test(100, NLoops);
3951 
3952     Mat3xN_test_grad<4>(4, 3, NLoops);
3953     Mat3xN_test_grad<5>(5, 10, NLoops);
3954     Mat3xN_test_grad<0>(20, 100, NLoops);
3955 
3956     Mat3xNT_test_grad<4>(4, 3, NLoops);
3957     Mat3xNT_test_grad<5>(5, 10, NLoops);
3958     Mat3xNT_test_grad<0>(20, 100, NLoops);
3959 
3960     MatNxN_test_grad<4>(4, 3, NLoops);
3961     MatNxN_test_grad<5>(5, 10, NLoops);
3962     MatNxN_test_grad<0>(20, 100, NLoops);
3963 
3964     MatNxNT_test_grad<4>(4, 3, NLoops);
3965     MatNxNT_test_grad<5>(5, 10, NLoops);
3966     MatNxNT_test_grad<0>(20, 100, NLoops);
3967 
3968     MatDynamic_test(3, 4, NLoops);
3969     MatDynamic_test(10, 20, NLoops);
3970 
3971     MatDynamic_test_grad<3>(3, 4, 5, NLoops);
3972     MatDynamic_test_grad<5>(5, 10, 20, NLoops);
3973     MatDynamic_test_grad<0>(15, 20, 30, NLoops);
3974 
3975     MatDynamicT_test<10, 20>(10, 20, NLoops);
3976     MatDynamicT_test<DYNAMIC_SIZE, 20>(10, 20, NLoops);
3977     MatDynamicT_test<10, DYNAMIC_SIZE>(10, 20, NLoops);
3978     MatDynamicT_test<DYNAMIC_SIZE, DYNAMIC_SIZE>(10, 20, NLoops);
3979 
3980     MatDynamicT_test_grad<5, 10, 20>(5, 10, 20, NLoops);
3981     MatDynamicT_test_grad<5, DYNAMIC_SIZE, 20>(5, 10, 20, NLoops);
3982     MatDynamicT_test_grad<5, 10, DYNAMIC_SIZE>(5, 10, 20, NLoops);
3983     MatDynamicT_test_grad<5, DYNAMIC_SIZE, DYNAMIC_SIZE>(5, 10, 20, NLoops);
3984     MatDynamicT_test_grad<0, 10, 20>(5, 10, 20, NLoops);
3985     MatDynamicT_test_grad<0, DYNAMIC_SIZE, 20>(5, 10, 20, NLoops);
3986     MatDynamicT_test_grad<0, 10, DYNAMIC_SIZE>(5, 10, 20, NLoops);
3987     MatDynamicT_test_grad<0, DYNAMIC_SIZE, DYNAMIC_SIZE>(5, 10, 20, NLoops);
3988 
3989 #ifdef NDEBUG
3990     std::cerr << "\nNo tests have been done" << std::endl;
3991 #else
3992     std::cerr << "\nAll tests passed" << std::endl;
3993 #endif
3994 
3995     std::cerr << "MATVEC_DEBUG=" << MATVEC_DEBUG << std::endl;
3996     std::cerr << "GRADIENT_DEBUG=" << GRADIENT_DEBUG << std::endl;
3997 
3998     return 0;
3999 }
4000 
4001 /*
4002 %!test
4003 %! printf("\n\ntestMatVecProduct()\n");
4004 %!
4005 %! A=[11 12 13 14
4006 %! 21 22 23 24
4007 %! 31 32 33 34]
4008 %!
4009 %! B= [0.011e3 0.012e3
4010 %!    0.021e3 0.022e3
4011 %!	  0.031e3 0.032e3
4012 %!	  0.041e3 0.042e3];
4013 %!
4014 %! R=[11 12 13
4015 %! 21 22 23
4016 %! 31 32 33]
4017 %!
4018 %! x=[100
4019 %! 200
4020 %! 300
4021 %! 400]
4022 %!
4023 %! g = [523;
4024 %!      -786 ]
4025 %!
4026 %! b=A*x
4027 %!
4028 %! c = R * A * x
4029 %!
4030 %! C = A * B
4031 %!
4032 %! D = -A * A.'
4033 %!
4034 %! h = A * B * g
4035 %!
4036 %! d=A.'*b
4037 %!
4038 %! e=A.' * A * x
4039 %! f=A*A.'*A*(x+x)
4040 
4041 %!function print_matrix(x)
4042 %!  printf("{");
4043 %!  for i=1:rows(x)
4044 %! 		if (columns(x) > 1)
4045 %!			printf("{");
4046 %!		endif
4047 %!		for j=1:columns(x)
4048 %!			printf("%.16g", x(i, j));
4049 %!			if (j < columns(x))
4050 %!				printf(", ");
4051 %!			endif
4052 %!		endfor
4053 %!		if (columns(x) > 1)
4054 %!			printf("}");
4055 %!		endif
4056 %!		if (i < rows(x))
4057 %!			printf(",\n");
4058 %!		endif
4059 %!  endfor
4060 %!  printf("}");
4061 
4062 %!function print_gradient(g)
4063 %!  printf("\nstatic const struct test_%s {\ndouble val[%d];\n", inputname(1), length(g.x));
4064 %!  printf("doublereal der[%d][%d];\n}", rows(g.J), columns(g.J));
4065 %!  printf(" oct_%s = {\n", inputname(1));
4066 %!  print_matrix(g.x);
4067 %!  printf(",\n");
4068 %!  print_matrix(g.J);
4069 %!  printf("};\n");
4070 %!  % printf("\ntestGradient(oct_%s, %s, dTol);\n", inputname(1), inputname(1));
4071 %!
4072 
4073 %!test
4074 %! A = [11, 12, 13, 14;
4075 %!      21, 22, 23, 24;
4076 %! 		31, 32, 33, 34]
4077 %!
4078 %! h = [15.7; -7.3; 12.4]
4079 %!
4080 %! A = gradinit(A);
4081 %!
4082 %! x = (A(1, :) * 10).'
4083 %!
4084 %! b = A * x
4085 %! c = -A * x
4086 %! d = cross(c, b) * 5
4087 %! e = cross(c + d, b) / 2
4088 %! f = cross(e, d + c) - e
4089 %! g = cross(d + e, f - c) + b
4090 %! i = cross(d, h) * 5.7
4091 %! j = cross(h, g) / 3.5
4092 %! k = cross(h, h) + h * 3
4093 %! l = cross(h + h, j) / 2
4094 %! m = cross(h, j - i) * 5
4095 %! n = cross(h + h, h) + h
4096 %! o = cross(h, h + h) * 2
4097 %! p = cross(h * 2.5, h / 2) - h
4098 %! r = dot(h, g)
4099 %! s = dot(g, h)
4100 %! s1 = dot(g.x, h)
4101 %! t = dot(g, g)
4102 %! u = dot(h, h);
4103 %! norm_g = norm(g)
4104 %! printf("\n\ntestMatVecProductGradient()\n");
4105 %! print_gradient(b);
4106 %! print_gradient(c);
4107 %! print_gradient(d);
4108 %! print_gradient(e);
4109 %! print_gradient(f);
4110 %! print_gradient(g);
4111 %! print_gradient(i);
4112 %! print_gradient(j);
4113 %! print_gradient(l);
4114 %! print_gradient(m);
4115 %! print_gradient(r);
4116 %! print_gradient(s);
4117 %! print_gradient(t);
4118 %! print_gradient(norm_g);
4119 */
4120 
4121