1 ///////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (c) 2002-2012, Industrial Light & Magic, a division of Lucas
4 // Digital Ltd. LLC
5 //
6 // All rights reserved.
7 //
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
10 // met:
11 // *       Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 // *       Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other materials provided with the
16 // distribution.
17 // *       Neither the name of Industrial Light & Magic nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 //
33 ///////////////////////////////////////////////////////////////////////////
34 
35 
36 
37 
38 
39 //----------------------------------------------------------------------------
40 //
41 //	Implementation of non-template items declared in ImathMatrixAlgo.h
42 //
43 //----------------------------------------------------------------------------
44 
45 #include "ImathMatrixAlgo.h"
46 #include <cmath>
47 #include <algorithm>
48 
49 #if defined(OPENEXR_DLL)
50     #define EXPORT_CONST __declspec(dllexport)
51 #else
52     #define EXPORT_CONST const
53 #endif
54 
55 IMATH_INTERNAL_NAMESPACE_SOURCE_ENTER
56 
57 EXPORT_CONST M33f identity33f ( 1, 0, 0,
58 				0, 1, 0,
59 				0, 0, 1);
60 
61 EXPORT_CONST M33d identity33d ( 1, 0, 0,
62 				0, 1, 0,
63 				0, 0, 1);
64 
65 EXPORT_CONST M44f identity44f ( 1, 0, 0, 0,
66 				0, 1, 0, 0,
67 				0, 0, 1, 0,
68 				0, 0, 0, 1);
69 
70 EXPORT_CONST M44d identity44d ( 1, 0, 0, 0,
71 				0, 1, 0, 0,
72 				0, 0, 1, 0,
73 				0, 0, 0, 1);
74 
75 namespace
76 {
77 
78 class KahanSum
79 {
80 public:
KahanSum()81     KahanSum() : _total(0), _correction(0) {}
82 
83     void
operator +=(const double val)84     operator+= (const double val)
85     {
86         const double y = val - _correction;
87         const double t = _total + y;
88         _correction = (t - _total) - y;
89         _total = t;
90     }
91 
get() const92     double get() const
93     {
94         return _total;
95     }
96 
97 private:
98     double _total;
99     double _correction;
100 };
101 
102 }
103 
104 template <typename T>
105 M44d
procrustesRotationAndTranslation(const Vec3<T> * A,const Vec3<T> * B,const T * weights,const size_t numPoints,const bool doScale)106 procrustesRotationAndTranslation (const Vec3<T>* A, const Vec3<T>* B, const T* weights, const size_t numPoints, const bool doScale)
107 {
108     if (numPoints == 0)
109         return M44d();
110 
111     // Always do the accumulation in double precision:
112     V3d Acenter (0.0);
113     V3d Bcenter (0.0);
114     double weightsSum = 0.0;
115 
116     if (weights == 0)
117     {
118         for (int i = 0; i < numPoints; ++i)
119         {
120             Acenter += (V3d) A[i];
121             Bcenter += (V3d) B[i];
122         }
123         weightsSum = (double) numPoints;
124     }
125     else
126     {
127         for (int i = 0; i < numPoints; ++i)
128         {
129             const double w = weights[i];
130             weightsSum += w;
131 
132             Acenter += w * (V3d) A[i];
133             Bcenter += w * (V3d) B[i];
134         }
135     }
136 
137     if (weightsSum == 0)
138         return M44d();
139 
140     Acenter /= weightsSum;
141     Bcenter /= weightsSum;
142 
143     //
144     // Find Q such that |Q*A - B|  (actually A-Acenter and B-Bcenter, weighted)
145     // is minimized in the least squares sense.
146     // From Golub/Van Loan, p.601
147     //
148     // A,B are 3xn
149     // Let C = B A^T   (where A is 3xn and B^T is nx3, so C is 3x3)
150     // Compute the SVD: C = U D V^T  (U,V rotations, D diagonal).
151     // Throw away the D part, and return Q = U V^T
152     M33d C (0.0);
153     if (weights == 0)
154     {
155         for (int i = 0; i < numPoints; ++i)
156             C += outerProduct ((V3d) B[i] - Bcenter, (V3d) A[i] - Acenter);
157     }
158     else
159     {
160         for (int i = 0; i < numPoints; ++i)
161         {
162             const double w = weights[i];
163             C += outerProduct (w * ((V3d) B[i] - Bcenter), (V3d) A[i] - Acenter);
164         }
165     }
166 
167     M33d U, V;
168     V3d S;
169     jacobiSVD (C, U, S, V, IMATH_INTERNAL_NAMESPACE::limits<double>::epsilon(), true);
170 
171     // We want Q.transposed() here since we are going to be using it in the
172     // Imath style (multiplying vectors on the right, v' = v*A^T):
173     const M33d Qt = V * U.transposed();
174 
175     double s = 1.0;
176     if (doScale && numPoints > 1)
177     {
178         // Finding a uniform scale: let us assume the Q is completely fixed
179         // at this point (solving for both simultaneously seems much harder).
180         // We are trying to compute (again, per Golub and van Loan)
181         //    min || s*A*Q - B ||_F
182         // Notice that we've jammed a uniform scale in front of the Q.
183         // Now, the Frobenius norm (the least squares norm over matrices)
184         // has the neat property that it is equivalent to minimizing the trace
185         // of M^T*M (see your friendly neighborhood linear algebra text for a
186         // derivation).  Thus, we can expand this out as
187         //   min tr (s*A*Q - B)^T*(s*A*Q - B)
188         // = min tr(Q^T*A^T*s*s*A*Q) + tr(B^T*B) - 2*tr(Q^T*A^T*s*B)  by linearity of the trace
189         // = min s^2 tr(A^T*A) + tr(B^T*B) - 2*s*tr(Q^T*A^T*B)        using the fact that the trace is invariant
190         //                                                            under similarity transforms Q*M*Q^T
191         // If we differentiate w.r.t. s and set this to 0, we get
192         // 0 = 2*s*tr(A^T*A) - 2*tr(Q^T*A^T*B)
193         // so
194         // 2*s*tr(A^T*A) = 2*s*tr(Q^T*A^T*B)
195         // s = tr(Q^T*A^T*B) / tr(A^T*A)
196 
197         KahanSum traceATA;
198         if (weights == 0)
199         {
200             for (int i = 0; i < numPoints; ++i)
201                 traceATA += ((V3d) A[i] - Acenter).length2();
202         }
203         else
204         {
205             for (int i = 0; i < numPoints; ++i)
206                 traceATA += ((double) weights[i]) * ((V3d) A[i] - Acenter).length2();
207         }
208 
209         KahanSum traceBATQ;
210         for (int i = 0; i < 3; ++i)
211             for (int j = 0; j < 3; ++j)
212                 traceBATQ += Qt[j][i] * C[i][j];
213 
214         s = traceBATQ.get() / traceATA.get();
215     }
216 
217     // Q is the rotation part of what we want to return.
218     // The entire transform is:
219     //    (translate origin to Bcenter) * Q * (translate Acenter to origin)
220     //                last                                first
221     // The effect of this on a point is:
222     //    (translate origin to Bcenter) * Q * (translate Acenter to origin) * point
223     //  = (translate origin to Bcenter) * Q * (-Acenter + point)
224     //  = (translate origin to Bcenter) * (-Q*Acenter + Q*point)
225     //  = (translate origin to Bcenter) * (translate Q*Acenter to origin) * Q*point
226     //  = (translate Q*Acenter to Bcenter) * Q*point
227     // So what we want to return is:
228     //    (translate Q*Acenter to Bcenter) * Q
229     //
230     // In block form, this is:
231     //   [ 1 0 0  | ] [       0 ] [ 1 0 0  |  ]   [ 1 0 0  | ] [           |   ]   [                 ]
232     //   [ 0 1 0 tb ] [  s*Q  0 ] [ 0 1 0 -ta ] = [ 0 1 0 tb ] [  s*Q  -s*Q*ta ] = [   Q   tb-s*Q*ta ]
233     //   [ 0 0 1  | ] [       0 ] [ 0 0 1  |  ]   [ 0 0 1  | ] [           |   ]   [                 ]
234     //   [ 0 0 0  1 ] [ 0 0 0 1 ] [ 0 0 0  1  ]   [ 0 0 0  1 ] [ 0 0 0     1   ]   [ 0 0 0    1      ]
235     // (ofc the whole thing is transposed for Imath).
236     const V3d translate = Bcenter - s*Acenter*Qt;
237 
238     return M44d (s*Qt.x[0][0], s*Qt.x[0][1], s*Qt.x[0][2], T(0),
239                  s*Qt.x[1][0], s*Qt.x[1][1], s*Qt.x[1][2], T(0),
240                  s*Qt.x[2][0], s*Qt.x[2][1], s*Qt.x[2][2], T(0),
241                  translate.x, translate.y, translate.z, T(1));
242 } // procrustesRotationAndTranslation
243 
244 template <typename T>
245 M44d
procrustesRotationAndTranslation(const Vec3<T> * A,const Vec3<T> * B,const size_t numPoints,const bool doScale)246 procrustesRotationAndTranslation (const Vec3<T>* A, const Vec3<T>* B, const size_t numPoints, const bool doScale)
247 {
248     return procrustesRotationAndTranslation (A, B, (const T*) 0, numPoints, doScale);
249 } // procrustesRotationAndTranslation
250 
251 
252 template IMATH_EXPORT M44d procrustesRotationAndTranslation (const V3d* from, const V3d* to, const size_t numPoints, const bool doScale);
253 template IMATH_EXPORT M44d procrustesRotationAndTranslation (const V3f* from, const V3f* to, const size_t numPoints, const bool doScale);
254 template IMATH_EXPORT M44d procrustesRotationAndTranslation (const V3d* from, const V3d* to, const double* weights, const size_t numPoints, const bool doScale);
255 template IMATH_EXPORT M44d procrustesRotationAndTranslation (const V3f* from, const V3f* to, const float* weights, const size_t numPoints, const bool doScale);
256 
257 
258 namespace
259 {
260 
261 // Applies the 2x2 Jacobi rotation
262 //   [  c s 0 ]    [ 1  0 0 ]    [  c 0 s ]
263 //   [ -s c 0 ] or [ 0  c s ] or [  0 1 0 ]
264 //   [  0 0 1 ]    [ 0 -s c ]    [ -s 0 c ]
265 // from the right; that is, computes
266 //   J * A
267 // for the Jacobi rotation J and the matrix A.  This is efficient because we
268 // only need to touch exactly the 2 columns that are affected, so we never
269 // need to explicitly construct the J matrix.
270 template <typename T, int j, int k>
271 void
jacobiRotateRight(IMATH_INTERNAL_NAMESPACE::Matrix33<T> & A,const T c,const T s)272 jacobiRotateRight (IMATH_INTERNAL_NAMESPACE::Matrix33<T>& A,
273                    const T c,
274                    const T s)
275 {
276     for (int i = 0; i < 3; ++i)
277     {
278         const T tau1 = A[i][j];
279         const T tau2 = A[i][k];
280         A[i][j] = c * tau1 - s * tau2;
281         A[i][k] = s * tau1 + c * tau2;
282     }
283 }
284 
285 template <typename T>
286 void
jacobiRotateRight(IMATH_INTERNAL_NAMESPACE::Matrix44<T> & A,const int j,const int k,const T c,const T s)287 jacobiRotateRight (IMATH_INTERNAL_NAMESPACE::Matrix44<T>& A,
288                    const int j,
289                    const int k,
290                    const T c,
291                    const T s)
292 {
293     for (int i = 0; i < 4; ++i)
294     {
295         const T tau1 = A[i][j];
296         const T tau2 = A[i][k];
297         A[i][j] = c * tau1 - s * tau2;
298         A[i][k] = s * tau1 + c * tau2;
299     }
300 }
301 
302 // This routine solves the 2x2 SVD:
303 //     [  c1   s1 ] [ w   x ] [  c2  s2 ]   [ d1    0 ]
304 //     [          ] [       ] [         ] = [         ]
305 //     [ -s1   c1 ] [ y   z ] [ -s2  c2 ]   [  0   d2 ]
306 // where
307 //      [ w   x ]
308 //  A = [       ]
309 //      [ y   z ]
310 // is the subset of A consisting of the [j,k] entries, A([j k], [j k]) in
311 // Matlab parlance.  The method is the 'USVD' algorithm described in the
312 // following paper:
313 //    'Computation of the Singular Value Decomposition using Mesh-Connected Processors'
314 //    by Richard P. Brent, Franklin T. Luk, and Charles Van Loan
315 // It breaks the computation into two steps: the first symmetrizes the matrix,
316 // and the second diagonalizes the symmetric matrix.
317 template <typename T, int j, int k, int l>
318 bool
twoSidedJacobiRotation(IMATH_INTERNAL_NAMESPACE::Matrix33<T> & A,IMATH_INTERNAL_NAMESPACE::Matrix33<T> & U,IMATH_INTERNAL_NAMESPACE::Matrix33<T> & V,const T tol)319 twoSidedJacobiRotation (IMATH_INTERNAL_NAMESPACE::Matrix33<T>& A,
320                         IMATH_INTERNAL_NAMESPACE::Matrix33<T>& U,
321                         IMATH_INTERNAL_NAMESPACE::Matrix33<T>& V,
322                         const T tol)
323 {
324     // Load everything into local variables to make things easier on the
325     // optimizer:
326     const T w = A[j][j];
327     const T x = A[j][k];
328     const T y = A[k][j];
329     const T z = A[k][k];
330 
331     // We will keep track of whether we're actually performing any rotations,
332     // since if the matrix is already diagonal we'll end up with the identity
333     // as our Jacobi rotation and we can short-circuit.
334     bool changed = false;
335 
336     // The first step is to symmetrize the 2x2 matrix,
337     //   [ c  s ]^T [ w x ] = [ p q ]
338     //   [ -s c ]   [ y z ]   [ q r ]
339     T mu_1 = w + z;
340     T mu_2 = x - y;
341 
342     T c, s;
343     if (std::abs(mu_2) <= tol*std::abs(mu_1))  // Already symmetric (to tolerance)
344     {                                          // Note that the <= is important here
345         c = T(1);                              // because we want to bypass the computation
346         s = T(0);                              // of rho if mu_1 = mu_2 = 0.
347 
348         const T p = w;
349         const T r = z;
350         mu_1 = r - p;
351         mu_2 = x + y;
352     }
353     else
354     {
355         const T rho = mu_1 / mu_2;
356         s = T(1) / std::sqrt (T(1) + rho*rho);  // TODO is there a native inverse square root function?
357         if (rho < 0)
358             s = -s;
359         c = s * rho;
360 
361         mu_1 = s * (x + y) + c * (z - w);   // = r - p
362         mu_2 = T(2) * (c * x - s * z);      // = 2*q
363 
364         changed = true;
365     }
366 
367     // The second stage diagonalizes,
368     //   [ c2   s2 ]^T [ p q ] [ c2  s2 ]  = [ d1   0 ]
369     //   [ -s2  c2 ]   [ q r ] [ -s2 c2 ]    [  0  d2 ]
370     T c_2, s_2;
371     if (std::abs(mu_2) <= tol*std::abs(mu_1))
372     {
373        c_2 = T(1);
374        s_2 = T(0);
375     }
376     else
377     {
378         const T rho_2 = mu_1 / mu_2;
379         T t_2 = T(1) / (std::abs(rho_2) + std::sqrt(1 + rho_2*rho_2));
380         if (rho_2 < 0)
381             t_2 = -t_2;
382         c_2 = T(1) / std::sqrt (T(1) + t_2*t_2);
383         s_2 = c_2 * t_2;
384 
385         changed = true;
386     }
387 
388     const T c_1 = c_2 * c - s_2 * s;
389     const T s_1 = s_2 * c + c_2 * s;
390 
391     if (!changed)
392     {
393         // We've decided that the off-diagonal entries are already small
394         // enough, so we'll set them to zero.  This actually appears to result
395         // in smaller errors than leaving them be, possibly because it prevents
396         // us from trying to do extra rotations later that we don't need.
397         A[k][j] = 0;
398         A[j][k] = 0;
399         return false;
400     }
401 
402     const T d_1 = c_1*(w*c_2 - x*s_2) - s_1*(y*c_2 - z*s_2);
403     const T d_2 = s_1*(w*s_2 + x*c_2) + c_1*(y*s_2 + z*c_2);
404 
405     // For the entries we just zeroed out, we'll just set them to 0, since
406     // they should be 0 up to machine precision.
407     A[j][j] = d_1;
408     A[k][k] = d_2;
409     A[k][j] = 0;
410     A[j][k] = 0;
411 
412     // Rotate the entries that _weren't_ involved in the 2x2 SVD:
413     {
414         // Rotate on the left by
415         //    [  c1 s1 0 ]^T      [  c1 0 s1 ]^T      [ 1   0  0 ]^T
416         //    [ -s1 c1 0 ]    or  [   0 1  0 ]    or  [ 0  c1 s1 ]
417         //    [   0  0 1 ]        [ -s1 0 c1 ]        [ 0 -s1 c1 ]
418         // This has the effect of adding the (weighted) ith and jth _rows_ to
419         // each other.
420         const T tau1 = A[j][l];
421         const T tau2 = A[k][l];
422         A[j][l] = c_1 * tau1 - s_1 * tau2;
423         A[k][l] = s_1 * tau1 + c_1 * tau2;
424     }
425 
426     {
427         // Rotate on the right by
428         //    [  c2 s2 0 ]      [  c2 0 s2 ]      [ 1   0  0 ]
429         //    [ -s2 c2 0 ]  or  [   0 1  0 ]  or  [ 0  c2 s2 ]
430         //    [   0  0 1 ]      [ -s2 0 c2 ]      [ 0 -s2 c2 ]
431         // This has the effect of adding the (weighted) ith and jth _columns_ to
432         // each other.
433         const T tau1 = A[l][j];
434         const T tau2 = A[l][k];
435         A[l][j] = c_2 * tau1 - s_2 * tau2;
436         A[l][k] = s_2 * tau1 + c_2 * tau2;
437     }
438 
439     // Now apply the rotations to U and V:
440     // Remember that we have
441     //    R1^T * A * R2 = D
442     // This is in the 2x2 case, but after doing a bunch of these
443     // we will get something like this for the 3x3 case:
444     //   ... R1b^T * R1a^T * A * R2a * R2b * ... = D
445     //   -----------------       ---------------
446     //        = U^T                    = V
447     // So,
448     //   U = R1a * R1b * ...
449     //   V = R2a * R2b * ...
450     jacobiRotateRight<T, j, k> (U, c_1, s_1);
451     jacobiRotateRight<T, j, k> (V, c_2, s_2);
452 
453     return true;
454 }
455 
456 template <typename T>
457 bool
twoSidedJacobiRotation(IMATH_INTERNAL_NAMESPACE::Matrix44<T> & A,int j,int k,IMATH_INTERNAL_NAMESPACE::Matrix44<T> & U,IMATH_INTERNAL_NAMESPACE::Matrix44<T> & V,const T tol)458 twoSidedJacobiRotation (IMATH_INTERNAL_NAMESPACE::Matrix44<T>& A,
459                         int j,
460                         int k,
461                         IMATH_INTERNAL_NAMESPACE::Matrix44<T>& U,
462                         IMATH_INTERNAL_NAMESPACE::Matrix44<T>& V,
463                         const T tol)
464 {
465     // Load everything into local variables to make things easier on the
466     // optimizer:
467     const T w = A[j][j];
468     const T x = A[j][k];
469     const T y = A[k][j];
470     const T z = A[k][k];
471 
472     // We will keep track of whether we're actually performing any rotations,
473     // since if the matrix is already diagonal we'll end up with the identity
474     // as our Jacobi rotation and we can short-circuit.
475     bool changed = false;
476 
477     // The first step is to symmetrize the 2x2 matrix,
478     //   [ c  s ]^T [ w x ] = [ p q ]
479     //   [ -s c ]   [ y z ]   [ q r ]
480     T mu_1 = w + z;
481     T mu_2 = x - y;
482 
483     T c, s;
484     if (std::abs(mu_2) <= tol*std::abs(mu_1))  // Already symmetric (to tolerance)
485     {                                          // Note that the <= is important here
486         c = T(1);                              // because we want to bypass the computation
487         s = T(0);                              // of rho if mu_1 = mu_2 = 0.
488 
489         const T p = w;
490         const T r = z;
491         mu_1 = r - p;
492         mu_2 = x + y;
493     }
494     else
495     {
496         const T rho = mu_1 / mu_2;
497         s = T(1) / std::sqrt (T(1) + rho*rho);  // TODO is there a native inverse square root function?
498         if (rho < 0)
499             s = -s;
500         c = s * rho;
501 
502         mu_1 = s * (x + y) + c * (z - w);   // = r - p
503         mu_2 = T(2) * (c * x - s * z);      // = 2*q
504 
505         changed = true;
506     }
507 
508     // The second stage diagonalizes,
509     //   [ c2   s2 ]^T [ p q ] [ c2  s2 ]  = [ d1   0 ]
510     //   [ -s2  c2 ]   [ q r ] [ -s2 c2 ]    [  0  d2 ]
511     T c_2, s_2;
512     if (std::abs(mu_2) <= tol*std::abs(mu_1))
513     {
514        c_2 = T(1);
515        s_2 = T(0);
516     }
517     else
518     {
519         const T rho_2 = mu_1 / mu_2;
520         T t_2 = T(1) / (std::abs(rho_2) + std::sqrt(1 + rho_2*rho_2));
521         if (rho_2 < 0)
522             t_2 = -t_2;
523         c_2 = T(1) / std::sqrt (T(1) + t_2*t_2);
524         s_2 = c_2 * t_2;
525 
526         changed = true;
527     }
528 
529     const T c_1 = c_2 * c - s_2 * s;
530     const T s_1 = s_2 * c + c_2 * s;
531 
532     if (!changed)
533     {
534         // We've decided that the off-diagonal entries are already small
535         // enough, so we'll set them to zero.  This actually appears to result
536         // in smaller errors than leaving them be, possibly because it prevents
537         // us from trying to do extra rotations later that we don't need.
538         A[k][j] = 0;
539         A[j][k] = 0;
540         return false;
541     }
542 
543     const T d_1 = c_1*(w*c_2 - x*s_2) - s_1*(y*c_2 - z*s_2);
544     const T d_2 = s_1*(w*s_2 + x*c_2) + c_1*(y*s_2 + z*c_2);
545 
546     // For the entries we just zeroed out, we'll just set them to 0, since
547     // they should be 0 up to machine precision.
548     A[j][j] = d_1;
549     A[k][k] = d_2;
550     A[k][j] = 0;
551     A[j][k] = 0;
552 
553     // Rotate the entries that _weren't_ involved in the 2x2 SVD:
554     for (int l = 0; l < 4; ++l)
555     {
556         if (l == j || l == k)
557             continue;
558 
559         // Rotate on the left by
560         //    [ 1               ]
561         //    [   .             ]
562         //    [     c2   s2     ]  j
563         //    [        1        ]
564         //    [    -s2   c2     ]  k
565         //    [             .   ]
566         //    [               1 ]
567         //          j    k
568         //
569         // This has the effect of adding the (weighted) ith and jth _rows_ to
570         // each other.
571         const T tau1 = A[j][l];
572         const T tau2 = A[k][l];
573         A[j][l] = c_1 * tau1 - s_1 * tau2;
574         A[k][l] = s_1 * tau1 + c_1 * tau2;
575     }
576 
577     for (int l = 0; l < 4; ++l)
578     {
579         // We set the A[j/k][j/k] entries already
580         if (l == j || l == k)
581             continue;
582 
583         // Rotate on the right by
584         //    [ 1               ]
585         //    [   .             ]
586         //    [     c2   s2     ]  j
587         //    [        1        ]
588         //    [    -s2   c2     ]  k
589         //    [             .   ]
590         //    [               1 ]
591         //          j    k
592         //
593         // This has the effect of adding the (weighted) ith and jth _columns_ to
594         // each other.
595         const T tau1 = A[l][j];
596         const T tau2 = A[l][k];
597         A[l][j] = c_2 * tau1 - s_2 * tau2;
598         A[l][k] = s_2 * tau1 + c_2 * tau2;
599     }
600 
601     // Now apply the rotations to U and V:
602     // Remember that we have
603     //    R1^T * A * R2 = D
604     // This is in the 2x2 case, but after doing a bunch of these
605     // we will get something like this for the 3x3 case:
606     //   ... R1b^T * R1a^T * A * R2a * R2b * ... = D
607     //   -----------------       ---------------
608     //        = U^T                    = V
609     // So,
610     //   U = R1a * R1b * ...
611     //   V = R2a * R2b * ...
612     jacobiRotateRight (U, j, k, c_1, s_1);
613     jacobiRotateRight (V, j, k, c_2, s_2);
614 
615     return true;
616 }
617 
618 template <typename T>
619 void
swapColumns(IMATH_INTERNAL_NAMESPACE::Matrix33<T> & A,int j,int k)620 swapColumns (IMATH_INTERNAL_NAMESPACE::Matrix33<T>& A, int j, int k)
621 {
622     for (int i = 0; i < 3; ++i)
623         std::swap (A[i][j], A[i][k]);
624 }
625 
626 template <typename T>
627 T
maxOffDiag(const IMATH_INTERNAL_NAMESPACE::Matrix33<T> & A)628 maxOffDiag (const IMATH_INTERNAL_NAMESPACE::Matrix33<T>& A)
629 {
630     T result = 0;
631     result = std::max (result, std::abs (A[0][1]));
632     result = std::max (result, std::abs (A[0][2]));
633     result = std::max (result, std::abs (A[1][0]));
634     result = std::max (result, std::abs (A[1][2]));
635     result = std::max (result, std::abs (A[2][0]));
636     result = std::max (result, std::abs (A[2][1]));
637     return result;
638 }
639 
640 template <typename T>
641 T
maxOffDiag(const IMATH_INTERNAL_NAMESPACE::Matrix44<T> & A)642 maxOffDiag (const IMATH_INTERNAL_NAMESPACE::Matrix44<T>& A)
643 {
644     T result = 0;
645     for (int i = 0; i < 4; ++i)
646     {
647         for (int j = 0; j < 4; ++j)
648         {
649             if (i != j)
650                 result = std::max (result, std::abs (A[i][j]));
651         }
652     }
653 
654     return result;
655 }
656 
657 template <typename T>
658 void
twoSidedJacobiSVD(IMATH_INTERNAL_NAMESPACE::Matrix33<T> A,IMATH_INTERNAL_NAMESPACE::Matrix33<T> & U,IMATH_INTERNAL_NAMESPACE::Vec3<T> & S,IMATH_INTERNAL_NAMESPACE::Matrix33<T> & V,const T tol,const bool forcePositiveDeterminant)659 twoSidedJacobiSVD (IMATH_INTERNAL_NAMESPACE::Matrix33<T> A,
660                    IMATH_INTERNAL_NAMESPACE::Matrix33<T>& U,
661                    IMATH_INTERNAL_NAMESPACE::Vec3<T>& S,
662                    IMATH_INTERNAL_NAMESPACE::Matrix33<T>& V,
663                    const T tol,
664                    const bool forcePositiveDeterminant)
665 {
666     // The two-sided Jacobi SVD works by repeatedly zeroing out
667     // off-diagonal entries of the matrix, 2 at a time.  Basically,
668     // we can take our 3x3 matrix,
669     //    [* * *]
670     //    [* * *]
671     //    [* * *]
672     // and use a pair of orthogonal transforms to zero out, say, the
673     // pair of entries (0, 1) and (1, 0):
674     //  [ c1 s1  ] [* * *] [ c2 s2  ]   [*   *]
675     //  [-s1 c1  ] [* * *] [-s2 c2  ] = [  * *]
676     //  [       1] [* * *] [       1]   [* * *]
677     // When we go to zero out the next pair of entries (say, (0, 2) and (2, 0))
678     // then we don't expect those entries to stay 0:
679     //  [ c1 s1  ] [*   *] [ c2 s2  ]   [* *  ]
680     //  [-s1 c1  ] [  * *] [-s2 c2  ] = [* * *]
681     //  [       1] [* * *] [       1]   [  * *]
682     // However, if we keep doing this, we'll find that the off-diagonal entries
683     // converge to 0 fairly quickly (convergence should be roughly cubic).  The
684     // result is a diagonal A matrix and a bunch of orthogonal transforms:
685     //               [* * *]                [*    ]
686     //  L1 L2 ... Ln [* * *] Rn ... R2 R1 = [  *  ]
687     //               [* * *]                [    *]
688     //  ------------ ------- ------------   -------
689     //      U^T         A         V            S
690     // This turns out to be highly accurate because (1) orthogonal transforms
691     // are extremely stable to compute and apply (this is why QR factorization
692     // works so well, FWIW) and because (2) by applying everything to the original
693     // matrix A instead of computing (A^T * A) we avoid any precision loss that
694     // would result from that.
695     U.makeIdentity();
696     V.makeIdentity();
697 
698     const int maxIter = 20;  // In case we get really unlucky, prevents infinite loops
699     const T absTol = tol * maxOffDiag (A);  // Tolerance is in terms of the maximum
700     if (absTol != 0)                        // _off-diagonal_ entry.
701     {
702         int numIter = 0;
703         do
704         {
705             ++numIter;
706             bool changed = twoSidedJacobiRotation<T, 0, 1, 2> (A, U, V, tol);
707             changed = twoSidedJacobiRotation<T, 0, 2, 1> (A, U, V, tol) || changed;
708             changed = twoSidedJacobiRotation<T, 1, 2, 0> (A, U, V, tol) || changed;
709             if (!changed)
710                 break;
711         } while (maxOffDiag(A) > absTol && numIter < maxIter);
712     }
713 
714     // The off-diagonal entries are (effectively) 0, so whatever's left on the
715     // diagonal are the singular values:
716     S.x = A[0][0];
717     S.y = A[1][1];
718     S.z = A[2][2];
719 
720     // Nothing thus far has guaranteed that the singular values are positive,
721     // so let's go back through and flip them if not (since by contract we are
722     // supposed to return all positive SVs):
723     for (int i = 0; i < 3; ++i)
724     {
725         if (S[i] < 0)
726         {
727             // If we flip S[i], we need to flip the corresponding column of U
728             // (we could also pick V if we wanted; it doesn't really matter):
729             S[i] = -S[i];
730             for (int j = 0; j < 3; ++j)
731                 U[j][i] = -U[j][i];
732         }
733     }
734 
735     // Order the singular values from largest to smallest; this requires
736     // exactly two passes through the data using bubble sort:
737     for (int i = 0; i < 2; ++i)
738     {
739         for (int j = 0; j < (2 - i); ++j)
740         {
741             // No absolute values necessary since we already ensured that
742             // they're positive:
743             if (S[j] < S[j+1])
744             {
745                 // If we swap singular values we also have to swap
746                 // corresponding columns in U and V:
747                 std::swap (S[j], S[j+1]);
748                 swapColumns (U, j, j+1);
749                 swapColumns (V, j, j+1);
750             }
751         }
752     }
753 
754     if (forcePositiveDeterminant)
755     {
756         // We want to guarantee that the returned matrices always have positive
757         // determinant.  We can do this by adding the appropriate number of
758         // matrices of the form:
759         //       [ 1       ]
760         //  L =  [    1    ]
761         //       [      -1 ]
762         // Note that L' = L and L*L = Identity.  Thus we can add:
763         //   U*L*L*S*V = (U*L)*(L*S)*V
764         // if U has a negative determinant, and
765         //   U*S*L*L*V = U*(S*L)*(L*V)
766         // if V has a neg. determinant.
767         if (U.determinant() < 0)
768         {
769             for (int i = 0; i < 3; ++i)
770                 U[i][2] = -U[i][2];
771             S.z = -S.z;
772         }
773 
774         if (V.determinant() < 0)
775         {
776             for (int i = 0; i < 3; ++i)
777                 V[i][2] = -V[i][2];
778             S.z = -S.z;
779         }
780     }
781 }
782 
783 template <typename T>
784 void
twoSidedJacobiSVD(IMATH_INTERNAL_NAMESPACE::Matrix44<T> A,IMATH_INTERNAL_NAMESPACE::Matrix44<T> & U,IMATH_INTERNAL_NAMESPACE::Vec4<T> & S,IMATH_INTERNAL_NAMESPACE::Matrix44<T> & V,const T tol,const bool forcePositiveDeterminant)785 twoSidedJacobiSVD (IMATH_INTERNAL_NAMESPACE::Matrix44<T> A,
786                    IMATH_INTERNAL_NAMESPACE::Matrix44<T>& U,
787                    IMATH_INTERNAL_NAMESPACE::Vec4<T>& S,
788                    IMATH_INTERNAL_NAMESPACE::Matrix44<T>& V,
789                    const T tol,
790                    const bool forcePositiveDeterminant)
791 {
792     // Please see the Matrix33 version for a detailed description of the algorithm.
793     U.makeIdentity();
794     V.makeIdentity();
795 
796     const int maxIter = 20;  // In case we get really unlucky, prevents infinite loops
797     const T absTol = tol * maxOffDiag (A);  // Tolerance is in terms of the maximum
798     if (absTol != 0)                        // _off-diagonal_ entry.
799     {
800         int numIter = 0;
801         do
802         {
803             ++numIter;
804             bool changed = twoSidedJacobiRotation (A, 0, 1, U, V, tol);
805             changed = twoSidedJacobiRotation (A, 0, 2, U, V, tol) || changed;
806             changed = twoSidedJacobiRotation (A, 0, 3, U, V, tol) || changed;
807             changed = twoSidedJacobiRotation (A, 1, 2, U, V, tol) || changed;
808             changed = twoSidedJacobiRotation (A, 1, 3, U, V, tol) || changed;
809             changed = twoSidedJacobiRotation (A, 2, 3, U, V, tol) || changed;
810             if (!changed)
811                 break;
812         } while (maxOffDiag(A) > absTol && numIter < maxIter);
813     }
814 
815     // The off-diagonal entries are (effectively) 0, so whatever's left on the
816     // diagonal are the singular values:
817     S[0] = A[0][0];
818     S[1] = A[1][1];
819     S[2] = A[2][2];
820     S[3] = A[3][3];
821 
822     // Nothing thus far has guaranteed that the singular values are positive,
823     // so let's go back through and flip them if not (since by contract we are
824     // supposed to return all positive SVs):
825     for (int i = 0; i < 4; ++i)
826     {
827         if (S[i] < 0)
828         {
829             // If we flip S[i], we need to flip the corresponding column of U
830             // (we could also pick V if we wanted; it doesn't really matter):
831             S[i] = -S[i];
832             for (int j = 0; j < 4; ++j)
833                 U[j][i] = -U[j][i];
834         }
835     }
836 
837     // Order the singular values from largest to smallest using insertion sort:
838     for (int i = 1; i < 4; ++i)
839     {
840         const IMATH_INTERNAL_NAMESPACE::Vec4<T> uCol (U[0][i], U[1][i], U[2][i], U[3][i]);
841         const IMATH_INTERNAL_NAMESPACE::Vec4<T> vCol (V[0][i], V[1][i], V[2][i], V[3][i]);
842         const T sVal = S[i];
843 
844         int j = i - 1;
845         while (std::abs (S[j]) < std::abs (sVal))
846         {
847             for (int k = 0; k < 4; ++k)
848                 U[k][j+1] = U[k][j];
849             for (int k = 0; k < 4; ++k)
850                 V[k][j+1] = V[k][j];
851             S[j+1] = S[j];
852 
853             --j;
854             if (j < 0)
855                 break;
856         }
857 
858         for (int k = 0; k < 4; ++k)
859             U[k][j+1] = uCol[k];
860         for (int k = 0; k < 4; ++k)
861             V[k][j+1] = vCol[k];
862         S[j+1] = sVal;
863     }
864 
865     if (forcePositiveDeterminant)
866     {
867         // We want to guarantee that the returned matrices always have positive
868         // determinant.  We can do this by adding the appropriate number of
869         // matrices of the form:
870         //       [ 1          ]
871         //  L =  [    1       ]
872         //       [       1    ]
873         //       [         -1 ]
874         // Note that L' = L and L*L = Identity.  Thus we can add:
875         //   U*L*L*S*V = (U*L)*(L*S)*V
876         // if U has a negative determinant, and
877         //   U*S*L*L*V = U*(S*L)*(L*V)
878         // if V has a neg. determinant.
879         if (U.determinant() < 0)
880         {
881             for (int i = 0; i < 4; ++i)
882                 U[i][3] = -U[i][3];
883             S[3] = -S[3];
884         }
885 
886         if (V.determinant() < 0)
887         {
888             for (int i = 0; i < 4; ++i)
889                 V[i][3] = -V[i][3];
890             S[3] = -S[3];
891         }
892     }
893 }
894 
895 }
896 
897 template <typename T>
898 void
jacobiSVD(const IMATH_INTERNAL_NAMESPACE::Matrix33<T> & A,IMATH_INTERNAL_NAMESPACE::Matrix33<T> & U,IMATH_INTERNAL_NAMESPACE::Vec3<T> & S,IMATH_INTERNAL_NAMESPACE::Matrix33<T> & V,const T tol,const bool forcePositiveDeterminant)899 jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix33<T>& A,
900            IMATH_INTERNAL_NAMESPACE::Matrix33<T>& U,
901            IMATH_INTERNAL_NAMESPACE::Vec3<T>& S,
902            IMATH_INTERNAL_NAMESPACE::Matrix33<T>& V,
903            const T tol,
904            const bool forcePositiveDeterminant)
905 {
906     twoSidedJacobiSVD (A, U, S, V, tol, forcePositiveDeterminant);
907 }
908 
909 template <typename T>
910 void
jacobiSVD(const IMATH_INTERNAL_NAMESPACE::Matrix44<T> & A,IMATH_INTERNAL_NAMESPACE::Matrix44<T> & U,IMATH_INTERNAL_NAMESPACE::Vec4<T> & S,IMATH_INTERNAL_NAMESPACE::Matrix44<T> & V,const T tol,const bool forcePositiveDeterminant)911 jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix44<T>& A,
912            IMATH_INTERNAL_NAMESPACE::Matrix44<T>& U,
913            IMATH_INTERNAL_NAMESPACE::Vec4<T>& S,
914            IMATH_INTERNAL_NAMESPACE::Matrix44<T>& V,
915            const T tol,
916            const bool forcePositiveDeterminant)
917 {
918     twoSidedJacobiSVD (A, U, S, V, tol, forcePositiveDeterminant);
919 }
920 
921 template IMATH_EXPORT void jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix33<float>& A,
922                                       IMATH_INTERNAL_NAMESPACE::Matrix33<float>& U,
923                                       IMATH_INTERNAL_NAMESPACE::Vec3<float>& S,
924                                       IMATH_INTERNAL_NAMESPACE::Matrix33<float>& V,
925                                       const float tol,
926                                       const bool forcePositiveDeterminant);
927 template IMATH_EXPORT void jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix33<double>& A,
928                                       IMATH_INTERNAL_NAMESPACE::Matrix33<double>& U,
929                                       IMATH_INTERNAL_NAMESPACE::Vec3<double>& S,
930                                       IMATH_INTERNAL_NAMESPACE::Matrix33<double>& V,
931                                       const double tol,
932                                       const bool forcePositiveDeterminant);
933 template IMATH_EXPORT void jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix44<float>& A,
934                                       IMATH_INTERNAL_NAMESPACE::Matrix44<float>& U,
935                                       IMATH_INTERNAL_NAMESPACE::Vec4<float>& S,
936                                       IMATH_INTERNAL_NAMESPACE::Matrix44<float>& V,
937                                       const float tol,
938                                       const bool forcePositiveDeterminant);
939 template IMATH_EXPORT void jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix44<double>& A,
940                                       IMATH_INTERNAL_NAMESPACE::Matrix44<double>& U,
941                                       IMATH_INTERNAL_NAMESPACE::Vec4<double>& S,
942                                       IMATH_INTERNAL_NAMESPACE::Matrix44<double>& V,
943                                       const double tol,
944                                       const bool forcePositiveDeterminant);
945 
946 namespace
947 {
948 
949 template <int j, int k, typename TM>
950 inline
951 void
jacobiRotateRight(TM & A,const typename TM::BaseType s,const typename TM::BaseType tau)952 jacobiRotateRight (TM& A,
953                    const typename TM::BaseType s,
954                    const typename TM::BaseType tau)
955 {
956     typedef typename TM::BaseType T;
957 
958     for (unsigned int i = 0; i < TM::dimensions(); ++i)
959     {
960         const T nu1 = A[i][j];
961         const T nu2 = A[i][k];
962         A[i][j] -= s * (nu2 + tau * nu1);
963         A[i][k] += s * (nu1 - tau * nu2);
964    }
965 }
966 
967 template <int j, int k, int l, typename T>
968 bool
jacobiRotation(Matrix33<T> & A,Matrix33<T> & V,Vec3<T> & Z,const T tol)969 jacobiRotation (Matrix33<T>& A,
970                 Matrix33<T>& V,
971                 Vec3<T>& Z,
972                 const T tol)
973 {
974     // Load everything into local variables to make things easier on the
975     // optimizer:
976     const T x = A[j][j];
977     const T y = A[j][k];
978     const T z = A[k][k];
979 
980     // The first stage diagonalizes,
981     //   [ c  s ]^T [ x y ] [ c -s ]  = [ d1   0 ]
982     //   [ -s c ]   [ y z ] [ s  c ]    [  0  d2 ]
983     const T mu1 = z - x;
984     const T mu2 = 2 * y;
985 
986     if (std::abs(mu2) <= tol*std::abs(mu1))
987     {
988         // We've decided that the off-diagonal entries are already small
989         // enough, so we'll set them to zero.  This actually appears to result
990         // in smaller errors than leaving them be, possibly because it prevents
991         // us from trying to do extra rotations later that we don't need.
992         A[j][k] = 0;
993         return false;
994     }
995     const T rho = mu1 / mu2;
996     const T t = (rho < 0 ? T(-1) : T(1)) / (std::abs(rho) + std::sqrt(1 + rho*rho));
997     const T c = T(1) / std::sqrt (T(1) + t*t);
998     const T s = t * c;
999     const T tau = s / (T(1) + c);
1000     const T h = t * y;
1001 
1002     // Update diagonal elements.
1003     Z[j] -= h;
1004     Z[k] += h;
1005     A[j][j] -= h;
1006     A[k][k] += h;
1007 
1008     // For the entries we just zeroed out, we'll just set them to 0, since
1009     // they should be 0 up to machine precision.
1010     A[j][k] = 0;
1011 
1012     // We only update upper triagnular elements of A, since
1013     // A is supposed to be symmetric.
1014     T& offd1 = l < j ? A[l][j] : A[j][l];
1015     T& offd2 = l < k ? A[l][k] : A[k][l];
1016     const T nu1 = offd1;
1017     const T nu2 = offd2;
1018     offd1 = nu1 - s * (nu2 + tau * nu1);
1019     offd2 = nu2 + s * (nu1 - tau * nu2);
1020 
1021     // Apply rotation to V
1022     jacobiRotateRight<j, k> (V, s, tau);
1023 
1024     return true;
1025 }
1026 
1027 template <int j, int k, int l1, int l2, typename T>
1028 bool
jacobiRotation(Matrix44<T> & A,Matrix44<T> & V,Vec4<T> & Z,const T tol)1029 jacobiRotation (Matrix44<T>& A,
1030                 Matrix44<T>& V,
1031                 Vec4<T>& Z,
1032                 const T tol)
1033 {
1034     const T x = A[j][j];
1035     const T y = A[j][k];
1036     const T z = A[k][k];
1037 
1038     const T mu1 = z - x;
1039     const T mu2 = T(2) * y;
1040 
1041     // Let's see if rho^(-1) = mu2 / mu1 is less than tol
1042     // This test also checks if rho^2 will overflow
1043     // when tol^(-1) < sqrt(limits<T>::max()).
1044     if (std::abs(mu2) <= tol*std::abs(mu1))
1045     {
1046         A[j][k] = 0;
1047         return true;
1048     }
1049 
1050     const T rho = mu1 / mu2;
1051     const T t = (rho < 0 ? T(-1) : T(1)) / (std::abs(rho) + std::sqrt(1 + rho*rho));
1052     const T c = T(1) / std::sqrt (T(1) + t*t);
1053     const T s = c * t;
1054     const T tau = s / (T(1) + c);
1055     const T h = t * y;
1056 
1057     Z[j] -= h;
1058     Z[k] += h;
1059     A[j][j] -= h;
1060     A[k][k] += h;
1061     A[j][k] = 0;
1062 
1063     {
1064         T& offd1 = l1 < j ? A[l1][j] : A[j][l1];
1065         T& offd2 = l1 < k ? A[l1][k] : A[k][l1];
1066         const T nu1 = offd1;
1067         const T nu2 = offd2;
1068         offd1 -= s * (nu2 + tau * nu1);
1069         offd2 += s * (nu1 - tau * nu2);
1070     }
1071 
1072     {
1073         T& offd1 = l2 < j ? A[l2][j] : A[j][l2];
1074         T& offd2 = l2 < k ? A[l2][k] : A[k][l2];
1075         const T nu1 = offd1;
1076         const T nu2 = offd2;
1077         offd1 -= s * (nu2 + tau * nu1);
1078         offd2 += s * (nu1 - tau * nu2);
1079     }
1080 
1081     jacobiRotateRight<j, k> (V, s, tau);
1082 
1083     return true;
1084 }
1085 
1086 template <typename TM>
1087 inline
1088 typename TM::BaseType
maxOffDiagSymm(const TM & A)1089 maxOffDiagSymm (const TM& A)
1090 {
1091     typedef typename TM::BaseType T;
1092     T result = 0;
1093     for (unsigned int i = 0; i < TM::dimensions(); ++i)
1094         for (unsigned int j = i+1; j < TM::dimensions(); ++j)
1095             result = std::max (result, std::abs (A[i][j]));
1096 
1097    return result;
1098 }
1099 
1100 } // namespace
1101 
1102 template <typename T>
1103 void
jacobiEigenSolver(Matrix33<T> & A,Vec3<T> & S,Matrix33<T> & V,const T tol)1104 jacobiEigenSolver (Matrix33<T>& A,
1105                    Vec3<T>& S,
1106                    Matrix33<T>& V,
1107                    const T tol)
1108 {
1109     V.makeIdentity();
1110     for(int i = 0; i < 3; ++i) {
1111         S[i] = A[i][i];
1112     }
1113 
1114     const int maxIter = 20;  // In case we get really unlucky, prevents infinite loops
1115     const T absTol = tol * maxOffDiagSymm (A);  // Tolerance is in terms of the maximum
1116     if (absTol != 0)                        // _off-diagonal_ entry.
1117     {
1118         int numIter = 0;
1119         do
1120         {
1121             // Z is for accumulating small changes (h) to diagonal entries
1122             // of A for one sweep. Adding h's directly to A might cause
1123             // a cancellation effect when h is relatively very small to
1124             // the corresponding diagonal entry of A and
1125             // this will increase numerical errors
1126             Vec3<T> Z(0, 0, 0);
1127             ++numIter;
1128             bool changed = jacobiRotation<0, 1, 2> (A, V, Z, tol);
1129             changed = jacobiRotation<0, 2, 1> (A, V, Z, tol) || changed;
1130             changed = jacobiRotation<1, 2, 0> (A, V, Z, tol) || changed;
1131             // One sweep passed. Add accumulated changes (Z) to singular values (S)
1132             // Update diagonal elements of A for better accuracy as well.
1133             for(int i = 0; i < 3; ++i) {
1134                 A[i][i] = S[i] += Z[i];
1135             }
1136             if (!changed)
1137                 break;
1138         } while (maxOffDiagSymm(A) > absTol && numIter < maxIter);
1139     }
1140 }
1141 
1142 template <typename T>
1143 void
jacobiEigenSolver(Matrix44<T> & A,Vec4<T> & S,Matrix44<T> & V,const T tol)1144 jacobiEigenSolver (Matrix44<T>& A,
1145                    Vec4<T>& S,
1146                    Matrix44<T>& V,
1147                    const T tol)
1148 {
1149     V.makeIdentity();
1150 
1151     for(int i = 0; i < 4; ++i) {
1152         S[i] = A[i][i];
1153     }
1154 
1155     const int maxIter = 20;  // In case we get really unlucky, prevents infinite loops
1156     const T absTol = tol * maxOffDiagSymm (A);  // Tolerance is in terms of the maximum
1157     if (absTol != 0)                        // _off-diagonal_ entry.
1158     {
1159         int numIter = 0;
1160         do
1161         {
1162             ++numIter;
1163             Vec4<T> Z(0, 0, 0, 0);
1164             bool changed = jacobiRotation<0, 1, 2, 3> (A, V, Z, tol);
1165             changed = jacobiRotation<0, 2, 1, 3> (A, V, Z, tol) || changed;
1166             changed = jacobiRotation<0, 3, 1, 2> (A, V, Z, tol) || changed;
1167             changed = jacobiRotation<1, 2, 0, 3> (A, V, Z, tol) || changed;
1168             changed = jacobiRotation<1, 3, 0, 2> (A, V, Z, tol) || changed;
1169             changed = jacobiRotation<2, 3, 0, 1> (A, V, Z, tol) || changed;
1170             for(int i = 0; i < 4; ++i) {
1171                 A[i][i] = S[i] += Z[i];
1172             }
1173            if (!changed)
1174                 break;
1175         } while (maxOffDiagSymm(A) > absTol && numIter < maxIter);
1176     }
1177 }
1178 
1179 template <typename TM, typename TV>
1180 void
maxEigenVector(TM & A,TV & V)1181 maxEigenVector (TM& A, TV& V)
1182 {
1183     TV S;
1184     TM MV;
1185     jacobiEigenSolver(A, S, MV);
1186 
1187     int maxIdx(0);
1188     for(unsigned int i = 1; i < TV::dimensions(); ++i)
1189     {
1190         if(std::abs(S[i]) > std::abs(S[maxIdx]))
1191             maxIdx = i;
1192     }
1193 
1194     for(unsigned int i = 0; i < TV::dimensions(); ++i)
1195         V[i] = MV[i][maxIdx];
1196 }
1197 
1198 template <typename TM, typename TV>
1199 void
minEigenVector(TM & A,TV & V)1200 minEigenVector (TM& A, TV& V)
1201 {
1202     TV S;
1203     TM MV;
1204     jacobiEigenSolver(A, S, MV);
1205 
1206     int minIdx(0);
1207     for(unsigned int i = 1; i < TV::dimensions(); ++i)
1208     {
1209         if(std::abs(S[i]) < std::abs(S[minIdx]))
1210             minIdx = i;
1211     }
1212 
1213    for(unsigned int i = 0; i < TV::dimensions(); ++i)
1214         V[i] = MV[i][minIdx];
1215 }
1216 
1217 template IMATH_EXPORT void jacobiEigenSolver (Matrix33<float>& A,
1218                                               Vec3<float>& S,
1219                                               Matrix33<float>& V,
1220                                               const float tol);
1221 template IMATH_EXPORT void jacobiEigenSolver (Matrix33<double>& A,
1222                                               Vec3<double>& S,
1223                                               Matrix33<double>& V,
1224                                               const double tol);
1225 template IMATH_EXPORT void jacobiEigenSolver (Matrix44<float>& A,
1226                                               Vec4<float>& S,
1227                                               Matrix44<float>& V,
1228                                               const float tol);
1229 template IMATH_EXPORT void jacobiEigenSolver (Matrix44<double>& A,
1230                                               Vec4<double>& S,
1231                                               Matrix44<double>& V,
1232                                               const double tol);
1233 
1234 template IMATH_EXPORT void maxEigenVector (Matrix33<float>& A,
1235                                            Vec3<float>& S);
1236 template IMATH_EXPORT void maxEigenVector (Matrix44<float>& A,
1237                                            Vec4<float>& S);
1238 template IMATH_EXPORT void maxEigenVector (Matrix33<double>& A,
1239                                            Vec3<double>& S);
1240 template IMATH_EXPORT void maxEigenVector (Matrix44<double>& A,
1241                                            Vec4<double>& S);
1242 
1243 template IMATH_EXPORT void minEigenVector (Matrix33<float>& A,
1244                                            Vec3<float>& S);
1245 template IMATH_EXPORT void minEigenVector (Matrix44<float>& A,
1246                                            Vec4<float>& S);
1247 template IMATH_EXPORT void minEigenVector (Matrix33<double>& A,
1248                                            Vec3<double>& S);
1249 template IMATH_EXPORT void minEigenVector (Matrix44<double>& A,
1250                                            Vec4<double>& S);
1251 
1252 IMATH_INTERNAL_NAMESPACE_SOURCE_EXIT
1253