1 /*
2 
3  HyPhy - Hypothesis Testing Using Phylogenies.
4 
5  Copyright (C) 1997-now
6  Core Developers:
7  Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
8  Art FY Poon    (apoon42@uwo.ca)
9  Steven Weaver (sweaver@temple.edu)
10 
11  Module Developers:
12  Lance Hepler (nlhepler@gmail.com)
13  Martin Smith (martin.audacis@gmail.com)
14 
15  Significant contributions from:
16  Spencer V Muse (muse@stat.ncsu.edu)
17  Simon DW Frost (sdf22@cam.ac.uk)
18 
19  Permission is hereby granted, free of charge, to any person obtaining a
20  copy of this software and associated documentation files (the
21  "Software"), to deal in the Software without restriction, including
22  without limitation the rights to use, copy, modify, merge, publish,
23  distribute, sublicense, and/or sell copies of the Software, and to
24  permit persons to whom the Software is furnished to do so, subject to
25  the following conditions:
26 
27  The above copyright notice and this permission notice shall be included
28  in all copies or substantial portions of the Software.
29 
30  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37 
38  */
39 
40 #include <string.h>
41 #include <stdio.h>
42 #include <time.h>
43 #include <float.h>
44 #include <math.h>
45 #include <limits.h>
46 
47 #include "matrix.h"
48 #include "polynoml.h"
49 #include "associative_list.h"
50 #include "batchlan.h"
51 #include "likefunc.h"
52 
53 #include "function_templates.h"
54 #include "mersenne_twister.h"
55 #include "global_things.h"
56 #include "string_file_wrapper.h"
57 
58 
59 //#include "profiler.h"
60 
61 using namespace hy_global;
62 
63 #define MEMORYERROR "Out of Memory"
64 #define ZEROOBJECT  0.0
65 #define ZEROPOINTER nil
66 
67 
68 _String     MATRIX_AGREEMENT            = "CONVERT_TO_POLYNOMIALS",
69             ANAL_COMP_FLAG              = "ANALYTIC_COMPUTATIONS",
70             ANAL_MATRIX_TOLERANCE      = "ANAL_MATRIX_TOLERANCE",
71             CACHE_FORMULA_DEPENDANCY  = "CACHE_FORMULA_DEPENDANCY";
72 
73 
74 int _Matrix::precisionArg = 0;
75 int _Matrix::storageIncrement = 16;
76 int _Matrix::switchThreshold = 35;
77 
78 hyFloat  _Matrix::truncPrecision = 1e-16;
79 #define     MatrixMemAllocate(X) MemAllocate(X, false, 64)
80 #define     MatrixMemFree(X)     free(X)
81 #define     MX_ACCESS(a,b) theData[(a)*hDim+(b)]
82 
83 
84 hyFloat     analMatrixTolerance = 1e-6,
85             zero = 0,
86             AUTO_PAD_DIAGONAL = 1,
87             toPolyOrNot=0.0,
88             toMorNot2M=1.0,
89             _log2 = log (2.0);
90 
91 long        ANALYTIC_COMPUTATION_FLAG = 0;
92 
93 _Trie       _HY_MatrixRandomValidPDFs;
94 
95 
96 
97 //__________________________________________________________________________________
98 
99 int         fexact_                 (long , long , double *, double , double , double , double *, double *);
100 void        MatrixIndexError        (long, long, long, long);
101 
102 
103 // function prototypes
104 
105 
106 #ifdef _SLKP_USE_AVX_INTRINSICS
echo_avx_sum_4(__m256d const x)107   void echo_avx_sum_4 (__m256d const x) {
108     double a[4];
109     _mm256_storeu_pd(a, x);
110     printf ("%g|%g|%g|%g\n", a[0], a[1], a[2], a[3]);
111   }
112 #endif
113 
114 
115 
116 
117 //__________________________________________________________________________________
118 
MatrixIndexError(long hPos,long vPos,long hDim,long vDim)119 void    MatrixIndexError (long hPos, long vPos, long hDim, long vDim) {
120     HandleApplicationError (
121                             kErrorStringInvalidMatrixIndex & _String ((long)hPos).Enquote('[',']') & _String ((long)vPos).Enquote('[',']') &
122                             " in an " &_String (hDim) & " by " &_String (vDim) & " matrix.");
123 }
124 
125 //_____________________________________________________________________________________________
126 
_Matrix()127 _Matrix::_Matrix () {                             // default constructor, doesn't do much
128   Initialize();
129 }
130 //_____________________________________________________________________________________________
131 
Initialize(bool)132 void _Matrix::Initialize (bool) {                            // default constructor, doesn't do much
133   theData         = nil;
134   theIndex        = nil;
135   vDim = hDim = lDim  = bufferPerRow = overflowBuffer = 0L;
136   storageType     = _NUMERICAL_TYPE;
137   allocationBlock = 1;
138   theValue        = nil;
139   compressedIndex = nil;
140 
141 }
142 
143 //_____________________________________________________________________________________________
144 
_Matrix(_String const & s,bool isNumeric,_FormulaParsingContext & fpc,bool use_square_brackets)145 _Matrix::_Matrix (_String const& s, bool isNumeric, _FormulaParsingContext & fpc, bool use_square_brackets) {
146   // takes two separate formats
147   // 1st : {{i11,...,i1n}{i21,...,i2n}....{in1,...,inn}} // all elements must be explicitly specified
148   // 2st : {hor dim, <vert dim>,{hor index, vert index, value or formula}{...}...}
149 
150   bool                doErrors            = fpc.errMsg() == nil,
151                       compute_keys_values = fpc.buildComplexObjects();
152 
153   _VariableContainer const* theP = fpc.formulaScope();
154 
155   Initialize();
156 
157   bool    isAConstant = true; // is this a matrix of numbers, or formulas
158   char    cc;
159 
160 
161   long    i=s.FirstNonSpaceIndex(),
162           j=s.FirstNonSpaceIndex(i+1),
163   k=0,
164   hPos = 0,
165   vPos = 0;
166 
167   char open_terminator  = use_square_brackets ? '[' : '{',
168        close_terminator = use_square_brackets ? ']' : '}';
169 
170   bool    terminators [256] {false};
171   terminators [(unsigned char)','] = true;
172   terminators [(unsigned char)close_terminator] = true;
173 
174 
175   try {
176      auto  handle_numeric_parameter = [&] (_String& term) -> long {
177        if (compute_keys_values) {
178         return round (ProcessNumericArgument(&term, theP));
179        }
180        else {
181          _String   err_msg;
182          _Formula  exp (term,theP, &err_msg);
183          if (exp.IsConstant()) {
184            return round(exp.Compute()->Value());
185          }
186          return 1;
187        }
188      };
189 
190     if (j>i && s.length()>4) { // non-empty string
191       _String term;
192       if (s.char_at (i) == open_terminator && s.char_at (j) == open_terminator) { // first type
193         i = j+1;
194         // read the dimensions first
195 
196         while (i<s.length()) {
197           long i2 = s.FindTerminator (i, terminators);
198           if (i2 == kNotFound) {
199             HandleApplicationError (kErrorStringUnterminatedMatrix & PrepareErrorContext(s, i));
200           }
201           i = i2;
202           cc = s.char_at (i);
203 
204           if (cc==close_terminator) {
205             break;
206           }
207 
208           if (cc==',') {
209             vDim++;
210           }
211           i++;
212         }
213 
214         vDim++;
215         hDim = 1;
216 
217         for (i = i + 1L; i<s.length()-1; i++) {
218           i = s.ExtractEnclosedExpression (i,open_terminator,close_terminator,fExtractRespectQuote | fExtractRespectEscape);
219           if (i < 0) {
220             break;
221           }
222           hDim ++;
223         }
224 
225         if ( hDim<=0 || vDim<=0) {
226           return;
227         }
228 
229         if (isNumeric) {
230           CreateMatrix (this, hDim, vDim, false, true, false);
231         } else {
232           CreateMatrix (this, hDim, vDim, false, false, true);
233         }
234 
235         // scan the elements one-by-one
236 
237         for (i=1; i<s.length()-1; i++) {
238           if (s.char_at(i) == open_terminator) {
239             while (s.char_at(i) != close_terminator) {
240               i++;
241               j = s.FindTerminator (i, terminators);
242 
243               if (j<0) {
244                 if (doErrors) {
245                   HandleApplicationError (kErrorStringUnterminatedMatrix & PrepareErrorContext(s, i));
246                 }
247                 return;
248               }
249 
250               _String lterm (s,s.FirstNonSpaceIndex(i,j-1,kStringDirectionForward),j-1); // store the term in a string
251 
252               //printf ("%s\n", lterm.sData);
253 
254               if (isNumeric) {
255                 if (lterm.length() == 1 && lterm.char_at(0) =='*') {
256                   lterm = kEmptyString;    // dummy element in probability matrix
257                 }
258 
259                 theData[vDim*hPos+vPos] = lterm.to_float ();
260               } else {
261                 if (lterm.length() == 1 && lterm.char_at(0) =='*') {
262                   lterm = kEmptyString;    // dummy element in probability matrix
263                 }
264 
265                 _String errMsg;
266                 _Formula*  theTerm;
267 
268                  if (doErrors) {
269                    theTerm = new _Formula (lterm, theP);
270                  } else {
271                    theTerm = new _Formula (lterm, theP, fpc.errMsg());
272                  }
273 
274                  isAConstant = isAConstant && theTerm->IsAConstant() && theTerm->ObjectClass() == NUMBER;
275                 ((_Formula**)theData)[vDim*hPos+vPos] = theTerm;
276               }
277 
278               vPos++;
279               if (vPos>vDim) {
280                 throw _String ("Rows of unequal lengths in matrix definition in ") & PrepareErrorContext(lterm, 0);
281               }
282 
283               i=j;
284             }
285           }
286           if (s[i]==close_terminator) {
287             if (vPos!=vDim) {
288               throw  kErrorStringBadMatrixDefinition & PrepareErrorContext (s,i-16);
289             }
290             hPos++;
291             vPos = 0;
292             if (hPos>hDim) {
293               throw kErrorStringBadMatrixDefinition & PrepareErrorContext (s,i-16);
294             }
295           }
296         }
297         if (hPos!=hDim) {
298           throw kErrorStringBadMatrixDefinition & PrepareErrorContext (s,i-16);
299         }
300       } else { // second type of input
301         for (i=j,j=0; s.char_at (i) !=open_terminator && s.char_at (i) !=close_terminator && i<s.length(); i++) {
302           if (s.char_at(i)==',') { // neither hDim nore vDim have been specified
303             if (j > 0) {
304               break;
305             }
306             term = s.Cut(1,i-1);
307             hDim = handle_numeric_parameter (term);
308             j    = i+1;
309           }
310         }
311 
312         if (j) { // both hDim and vDim specified
313           term = s.Cut(j,i-1);
314           vDim = handle_numeric_parameter (term);
315         } else { // only one dim specified, matrix assumed to be square
316           term = s.Cut(1,i-1);
317           hDim = handle_numeric_parameter (term);
318           vDim = hDim;
319         }
320 
321         if (hDim<=0 || vDim<=0) {
322           return;
323         }
324 
325         if (isNumeric) {
326           CreateMatrix (this, hDim, vDim, true, true, false);
327         } else {
328           CreateMatrix (this, hDim, vDim, true, false, true);
329         }
330 
331         // read the terms now
332 
333         for (; i<s.length(); i++) {
334           if (s.char_at (i) ==open_terminator) {
335             hPos = -1;
336             vPos = -1;
337             k    = i+1;
338 
339             for (j=i+1; j<s.length () && s.char_at (j) !=close_terminator; j++) {
340               long j2 = s.FindTerminator (j, terminators);
341 
342               if (j2<0) {
343                 throw (kErrorStringUnterminatedMatrix & PrepareErrorContext(s,j));
344               }
345               j = j2;
346 
347               if (s.char_at (j) ==',') {
348                 term = s.Cut (s.FirstNonSpaceIndex(k,j-1,kStringDirectionForward),j-1);
349                 _Formula coordF (term,theP);
350                 hyFloat coordV = coordF.Compute()->Value();
351                 if (hPos == -1) {
352                   hPos = coordV;
353                 } else {
354                   vPos = coordV;
355                 }
356                 k = j+1;
357               } else {
358                 j--;
359               }
360             }
361 
362             if (hPos <0 || vPos<0 || hPos>=hDim || vPos>=vDim)
363               // bad index
364             {
365               MatrixIndexError (hPos,vPos,hDim,vDim);
366               return;
367             }
368 
369             term = s.Cut(k,j-1); // read the element
370 
371             if (isNumeric) {
372               if (term.length() == 1UL && term.get_char (0)=='*') {
373                 term = kEmptyString;    // dummy element in probability matrix
374               }
375 
376               (*this)[vDim*hPos+vPos];
377               k = Hash (hPos,vPos);
378               theData[k]=term.to_float ();
379             } else {
380               if (term.length() == 1UL && term.get_char (0)=='*') {
381                 term = kEmptyString;    // dummy element in probability matrix
382               }
383 
384               _Formula * theTerm = new _Formula (term,theP);
385               isAConstant = isAConstant && theTerm->IsAConstant();
386 
387               (*this)[vDim*hPos+vPos];
388               k = Hash (hPos,vPos);
389               ((_Formula**)theData)[k]=theTerm;
390             }
391             i = j;
392           }
393         }
394       } // end else
395 
396       if (!isNumeric) {
397         storageType = 2; // formula elements
398         checkParameter (ANAL_COMP_FLAG, ANALYTIC_COMPUTATION_FLAG, 0L);
399         if ((ANALYTIC_COMPUTATION_FLAG)&&!isAConstant) {
400           _Matrix::ConvertFormulas2Poly (false);
401         }
402 
403         if (isAConstant) { // a matrix of numbers - store as such
404           _Matrix::Evaluate ();
405         }
406         _Matrix::AmISparse();
407       }
408     }
409   } catch (const _String& err) {
410     if (doErrors) {
411       HandleApplicationError(err);
412     }
413   }
414 }
415 
416 
417 //_____________________________________________________________________________________________
418 
_Matrix(_Matrix const & m)419 _Matrix::_Matrix (_Matrix const& m) {
420   DuplicateMatrix (this, &m);
421 }
422 
423 //_____________________________________________________________________________________________
424 
_Matrix(_SimpleList const & sl,long colArg)425 _Matrix::_Matrix (_SimpleList const& sl, long colArg) {
426   if (sl.lLength) {
427     if (colArg > 0 && colArg < sl.lLength) {
428       CreateMatrix (this, sl.lLength/colArg + colArg*(sl.lLength%colArg > 0), colArg,     false, true, false);
429     } else {
430       CreateMatrix (this, 1, sl.lLength,  false, true, false);
431     }
432     for (long k=0; k<sl.lLength; k++) {
433       theData[k] = sl.list_data[k];
434     }
435   } else {
436     Initialize();
437   }
438 }
439 
440 //_____________________________________________________________________________________________
441 
_Matrix(hyFloat const * inList,unsigned long rows,unsigned long columns)442 _Matrix::_Matrix (hyFloat const* inList, unsigned long rows, unsigned long columns) {
443   CreateMatrix (this, rows, columns, false, true, false);
444   for (unsigned long k = 0; k < rows*columns; k++) {
445     theData[k] = inList[k];
446   }
447 }
448 
449 //_____________________________________________________________________________________________
450 
_Matrix(hyFloat constant,unsigned long rows,unsigned long columns)451 _Matrix::_Matrix (hyFloat constant, unsigned long rows, unsigned long columns) {
452   CreateMatrix (this, rows, columns, false, true, false);
453   for (unsigned long k = 0; k < rows*columns; k++) {
454     theData[k] = constant;
455   }
456 }
457 
458 
459 //_____________________________________________________________________________________________
460 
_Matrix(_List const & sl,bool parse_escapes)461 _Matrix::_Matrix (_List const& sl, bool parse_escapes)
462 // list of strings
463 {
464   if (sl.nonempty()) {
465     CreateMatrix     (this, 1, sl.lLength,  false, false, true);
466     this->storageType = _FORMULA_TYPE;
467 
468     if (parse_escapes) {
469       for (unsigned long k=0UL; k<sl.lLength; k++) {
470         StoreFormula (0L,k,*new _Formula (new _FString (*(_String*) sl.GetItem(k))), false, false);
471       }
472     } else {
473       for (unsigned long k=0UL; k<sl.lLength; k++) {
474         _String* entry_k = (_String*) sl.GetItem(k);
475         entry_k -> AddAReference();
476         StoreFormula (0L,k,*new _Formula (new _FString (entry_k)), false, false);
477       }
478 
479     }
480   } else {
481     Initialize();
482   }
483 }
484 
485 //_____________________________________________________________________________________________
CreateMatrix(_Matrix * populate_me,long rows,long columns,bool sparse,bool allocateStorage,bool expression_matrix)486 void    _Matrix::CreateMatrix    (_Matrix* populate_me, long rows, long columns,  bool sparse, bool allocateStorage, bool expression_matrix) {
487 
488     populate_me->theValue     = nil;
489     populate_me->storageType  = allocateStorage ? _NUMERICAL_TYPE : _POLYNOMIAL_TYPE;
490 
491     if (rows && columns) {
492         if (sparse) { // store matrix as sparse
493             populate_me->lDim = rows*columns*populate_me->storageIncrement/100+1; // size of storage in elements
494             if (populate_me->lDim-1L<rows) {
495                 // either the matrix or the allocation block are too small
496                 // to sensibly store the matrix as sparse.
497                 CreateMatrix (populate_me, rows, columns, false, allocateStorage, expression_matrix);
498                 return;
499             }
500             populate_me->theIndex = (long*)MatrixMemAllocate(sizeof(long)*populate_me->lDim);
501             InitializeArray(populate_me->theIndex, populate_me->lDim, -1L);
502 
503         } else {
504             populate_me->lDim = rows*columns;
505             populate_me->theIndex = nil; // no index storage needed
506         }
507 
508         if (!allocateStorage) {
509             // matrix will store pointers to elements
510             populate_me->theData =(hyFloat*)MatrixMemAllocate(populate_me->lDim*sizeof(void*));
511             if (expression_matrix) {
512                 InitializeArray ((_Formula**)populate_me->theData,    populate_me->lDim, (_Formula*)ZEROPOINTER);
513             } else {
514                 InitializeArray ((_MathObject**)populate_me->theData, populate_me->lDim, (HBLObjectRef)ZEROPOINTER);
515             }
516 
517         } else {
518             populate_me->theData =(hyFloat*)MatrixMemAllocate (sizeof(hyFloat)*populate_me->lDim);
519             memset (populate_me->theData, 0, populate_me->lDim*sizeof(hyFloat));
520         }
521         populate_me->hDim = rows;
522         populate_me->vDim = columns;
523         populate_me->SetupSparseMatrixAllocations ();
524         populate_me->compressedIndex = nil;
525     } else {
526         populate_me->lDim      = 0L;
527         populate_me->theIndex  = nil;
528         populate_me->theData   = nil;
529         populate_me->compressedIndex = nil;
530         populate_me->hDim = 0UL;
531         populate_me->vDim = 0UL;
532     }
533 
534 }
535 
536 
537 //_____________________________________________________________________________________________
DuplicateMatrix(_Matrix * targetMatrix,_Matrix const * sourceMatrix)538 void    DuplicateMatrix (_Matrix* targetMatrix, _Matrix const* sourceMatrix) {
539   if (targetMatrix==sourceMatrix) {
540     return;
541   }
542   targetMatrix->lDim = sourceMatrix->lDim;
543   targetMatrix->hDim = sourceMatrix->hDim;
544   targetMatrix->vDim = sourceMatrix->vDim;
545   targetMatrix->storageType = sourceMatrix->storageType;
546   targetMatrix->bufferPerRow =sourceMatrix->bufferPerRow;
547   targetMatrix->overflowBuffer = sourceMatrix->overflowBuffer;
548   targetMatrix->allocationBlock = sourceMatrix->allocationBlock;
549   targetMatrix->theValue = nil;
550 
551   targetMatrix->compressedIndex = nil;
552 
553   if (! sourceMatrix->is_dense()) {
554     if (!(targetMatrix->theIndex = (long*)MatrixMemAllocate(sizeof(long) *sourceMatrix->lDim))) { // allocate element index storage
555       HandleApplicationError ( kErrorStringMemoryFail );
556     } else {
557       memcpy ((void*)targetMatrix->theIndex,(void*)sourceMatrix->theIndex,sourceMatrix->lDim*sizeof(long));
558     }
559     if (sourceMatrix->compressedIndex) {
560         targetMatrix->compressedIndex = (long*)MatrixMemAllocate(sizeof(long) *(sourceMatrix->lDim+sourceMatrix->hDim));
561         memcpy ((void*)targetMatrix->compressedIndex,(void*)sourceMatrix->compressedIndex,sizeof(long) *(sourceMatrix->lDim+sourceMatrix->hDim));
562     }
563   } else {
564     targetMatrix->theIndex = nil;
565   }
566 
567 
568   targetMatrix->theData = nil;
569 
570   if (sourceMatrix->lDim) {
571     if (sourceMatrix->is_polynomial())
572       // matrix will store pointers to elements
573     {
574       if (targetMatrix->lDim) {
575         if (!(targetMatrix->theData = (hyFloat*)MatrixMemAllocate(sizeof( char)*sourceMatrix->lDim*sizeof(void*)))) { // allocate element index storage
576           HandleApplicationError ( kErrorStringMemoryFail );
577         } else {
578           memcpy ((void*)targetMatrix->theData,(void*)sourceMatrix->theData,sourceMatrix->lDim*sizeof(void*));
579           if (sourceMatrix->is_dense()) { // non-sparse matrix
580             for (long i=0; i<sourceMatrix->lDim; i++)
581               if (sourceMatrix->GetMatrixObject(i)) {
582                 (sourceMatrix->GetMatrixObject(i))->AddAReference();
583               }
584           } else
585             for (long i=0; i<sourceMatrix->lDim; i++) {
586               _MathObject* theO = (sourceMatrix->GetMatrixObject(i));
587               if (theO!=ZEROPOINTER) {
588                 theO->AddAReference();
589               }
590             }
591 
592         }
593       }
594     } else if (sourceMatrix->is_expression_based()) {
595       if (targetMatrix->lDim) {
596         targetMatrix->theData = (hyFloat*)MatrixMemAllocate(sourceMatrix->lDim*sizeof(void*));
597         _Formula ** theFormulas = (_Formula**)(sourceMatrix->theData), **newFormulas =
598         (_Formula**)(targetMatrix->theData);
599         if (sourceMatrix->is_dense() == false) {
600           for (long i = 0; i<sourceMatrix->lDim; i++)
601             if (sourceMatrix->IsNonEmpty(i)) {
602               newFormulas[i] = (_Formula*)theFormulas[i]->makeDynamic();
603             }
604         } else
605           for (long i = 0; i<sourceMatrix->lDim; i++)
606             if(theFormulas[i]!=(_Formula*)ZEROPOINTER) {
607               newFormulas[i] = (_Formula*)theFormulas[i]->makeDynamic();
608             } else {
609               newFormulas[i]=ZEROPOINTER;
610             }
611       }
612     } else {
613       if (targetMatrix->lDim) {
614         if (!(targetMatrix->theData =(hyFloat*)MatrixMemAllocate(sizeof( hyFloat)*targetMatrix->lDim))) { // allocate element index storage
615           HandleApplicationError ( kErrorStringMemoryFail );
616         } else {
617           memcpy ((hyPointer)targetMatrix->theData,(hyPointer)sourceMatrix->theData,sizeof(hyFloat)*sourceMatrix->lDim);
618         }
619       }
620     }
621   } else {
622     targetMatrix->theData = nil;
623     targetMatrix->lDim    = 0;
624   }
625 
626 }
627 //_____________________________________________________________________________________________
makeDynamic(void) const628 BaseRef _Matrix::makeDynamic (void) const {
629   _Matrix * result = new _Matrix;
630   DuplicateMatrix (result, this);
631 
632   return result;
633 }
634 
635 //_____________________________________________________________________________________________
Duplicate(BaseRefConst obj)636 void _Matrix::Duplicate (BaseRefConst obj) {
637   Clear();
638   DuplicateMatrix (this,(_Matrix const*)obj);
639 }
640 
641 
642 //_____________________________________________________________________________________________
643 
_Matrix(long theHDim,long theVDim,bool sparse,bool allocateStorage)644 _Matrix::_Matrix (long theHDim, long theVDim, bool sparse, bool allocateStorage)    // create an kEmptyString matrix of given dimensions;
645                                                                                     // the flag specifies whether it is sparse or not
646 
647 {
648   CreateMatrix (this, theHDim, theVDim, sparse, allocateStorage);
649 }
650 
651 
652 
653 //_____________________________________________________________________________________________
654 
IsNonEmpty(long logicalIndex) const655 inline  bool    _Matrix::IsNonEmpty  (long logicalIndex) const {
656     if (is_dense() == false) {
657         return theIndex [logicalIndex] != -1;
658     }
659     if (is_numeric()) {
660         return true;
661     }
662     return GetMatrixObject(logicalIndex)!=ZEROPOINTER;
663 }
664 
665 //__________________________________________________________________________________
666 
HasChanged(bool)667 bool        _Matrix::HasChanged(bool) {
668 
669     switch (storageType) {
670         case _POLYNOMIAL_TYPE: {
671           return Any ([&] (_MathObject * f, unsigned long) -> bool {if (f) return f->HasChanged(); return false;},
672                       [&] (unsigned long i) -> _MathObject * {return ((_MathObject**)theData)[i];});
673         }
674         break;
675 
676         case _FORMULA_TYPE: {
677             return Any ([&] (_Formula * f, unsigned long) -> bool {if (f) return f->HasChanged(); return false;},
678                         [&] (unsigned long i) -> _Formula * {return ((_Formula**)theData)[i];});
679         }
680         break;
681 
682         case _SIMPLE_FORMULA_TYPE: {
683             if (cmd->has_volatile_entries) return true;
684             return cmd->varIndex.Any ([&] (long value, unsigned long) -> bool {
685                 return LocateVar (value)->HasChanged();
686             });
687         }
688         break;
689     }
690 
691     return false;
692 }
693 //__________________________________________________________________________________
694 
695 
ROTATE(hyFloat * a,long i,long j,long k,long l,hyFloat & g,hyFloat & h,hyFloat s,hyFloat tau,long hDim)696 inline static void ROTATE(hyFloat * a, long i, long j, long k, long l, hyFloat & g, hyFloat & h, hyFloat s, hyFloat tau, long hDim) {
697     // this is from NR
698     g = a[i*hDim + j];
699     h = a[k*hDim + l];
700     a[i*hDim + j] = g - s*(h + g*tau);
701     a[k*hDim + l] = h + s*(g - h*tau);
702 }
703 
704 //__________________________________________________________________________________
705 
is_square_numeric(bool dense) const706 bool        _Matrix::is_square_numeric(bool dense) const {
707     if (storageType!=_NUMERICAL_TYPE || hDim != vDim || hDim==0L){
708         HandleApplicationError ("Square numerical matrix required");
709     }
710     if (dense && !is_dense()) {
711         HandleApplicationError ("Square dense numerical matrix required");
712     }
713     return true;
714 }
715 
716 //__________________________________________________________________________________
Balance(void)717 void        _Matrix::Balance (void) {
718     if (!is_square_numeric (true)) {
719         return;
720     }
721 
722     hyFloat       Squared_Radix = 2.0 * 2.0;
723     bool          done = false;
724 
725     while (!done) {
726         done = true;
727 
728         for (long i = 0L; i < hDim; i++) {
729             hyFloat r = 0.0,
730                        c = 0.0;
731 
732             for (long j = 0L; j < vDim; j++)
733                 if (i!=j) {
734                     r += fabs (theData[i*vDim+j]);
735                     c += fabs (theData[j*vDim+i]);
736                 }
737 
738             if (r > 0.0 && c > 0.0) {
739                 hyFloat g = r / Squared_Radix,
740                            f = 1.,
741                            s = c+r;
742 
743                 while (c<g) {
744                     f *= 2.0;
745                     c *= Squared_Radix;
746                 }
747 
748                 g = r * 2.0;
749 
750                 while (c>g) {
751                     f /= 2.0;
752                     c /= Squared_Radix;
753                 }
754 
755                 if ((c+r)/f < 0.95*s) {
756                     done = false;
757                     g = 1. / f;
758                     for (long j = 0; j < vDim; j++) {
759                         theData[i*vDim+j] *= g;
760                         theData[j*vDim+i] *= f;
761                     }
762                 }
763 
764             }
765         }
766     }
767 }
768 
769 //__________________________________________________________________________________
Schur(void)770 void        _Matrix::Schur (void) {
771     if (!is_square_numeric (true)) {
772         return;
773     }
774 
775     for (long m = 1L; m < hDim-1; m++) {
776         hyFloat x = 0.0;
777         long       i = m;
778 
779         for (long j = m; j < hDim; j++)
780             if (fabs (theData[j*vDim + m-1]) > x) {
781                 x = theData[j*vDim + m-1];
782                 i = j;
783             }
784 
785         if (i!=m) {
786             for (long j=m-1; j<hDim; j++) {
787                 hyFloat t = theData[i*vDim + j];
788                 theData[i*vDim + j] = theData[m*vDim + j];
789                 theData[m*vDim + j] = t;
790             }
791             {
792                 for (long j=0; j<hDim; j++) {
793                     hyFloat t = theData[j*vDim + i];
794                     theData[j*vDim + i] = theData[j*vDim + m];
795                     theData[j*vDim + m] = t;
796                 }
797             }
798         }
799 
800         if (x)
801             for (long i = m+1; i < hDim; i++) {
802                 hyFloat y = theData[i*vDim + m -1];
803                 if (y != 0.0) {
804                     y /= x;
805                     theData[i*vDim + m -1] = y;
806                     for (long j = m; j < hDim; j++) {
807                         theData[i*vDim+j] -= y*theData[m*vDim+j];
808                     }
809                     {
810                         for (long j = 0; j < hDim; j++) {
811                             theData[j*vDim+m] += y*theData[j*vDim+i];
812                         }
813                     }
814                 }
815             }
816     }
817 
818     for (long r = 2L; r < hDim; r++)
819         InitializeArray(theData + r*hDim, r-1, 0.0);
820 }
821 
822 
823 //__________________________________________________________________________________
EigenDecomp(_Matrix & real,_Matrix & imag) const824 void        _Matrix::EigenDecomp (_Matrix& real, _Matrix & imag) const {
825     if (!is_square_numeric()) {
826         return;
827     }
828 
829     hyFloat anorm = 0.0;
830 
831     for (long k = 0L; k < hDim; k++) {
832         for (long k2 = k?k-1:0; k2 < hDim; k2++) {
833             anorm += fabs (MX_ACCESS(k,k2));
834         }
835     }
836 
837     long        nn = hDim - 1L;
838     hyFloat  t  = 0;
839 
840     CreateMatrix (&real, hDim, 1, false, true, false);
841     CreateMatrix (&imag, hDim, 1, false, true, false);
842 
843     while (nn >= 0) {
844         long its = 0,
845              l   = 0;
846         do {
847             for (l = nn; l>=1; l--) {
848                 hyFloat s = fabs (MX_ACCESS(l-1,l-1)) + fabs (MX_ACCESS(l,l));
849                 if (s == 0.0) {
850                     s = anorm;
851                 }
852 
853                 if (fabs (MX_ACCESS(l,l-1)) + s == s) {
854                     break;
855                 }
856             }
857 
858             hyFloat x = MX_ACCESS(nn,nn);
859             if (l  == nn) { // one root
860                 real.theData[nn]   = x + t;
861                 imag.theData[nn--] = 0.0;
862             } else {
863                 hyFloat y = MX_ACCESS(nn-1,nn-1),
864                            w = MX_ACCESS(nn,nn-1)*MX_ACCESS(nn-1,nn);
865 
866                 if ( l == nn - 1) { // two roots
867                     hyFloat p = 0.5 * (y-x),
868                                q = p*p + w,
869                                z = sqrt (fabs(q));
870 
871                     x += t;
872 
873                     if (q >= 0.0) { // real pair
874                         z = p + (p>0.0?z:-z);
875                         real.theData[nn] = real.theData[nn-1] = x+z;
876                         if (z) {
877                             real.theData[nn] = x-w/z;
878                         }
879                         imag.theData[nn] = imag.theData[nn-1] = 0.0;
880                     } else { // complex pair
881                         real.theData[nn]   = real.theData [nn-1] = x+p;
882                         imag.theData[nn-1] = -(imag.theData[nn] = z);
883                     }
884                     nn -= 2;
885                 } else { // no roots; continue iteration
886 
887                     hyFloat p,q,r,z,s;
888 
889                     if (its == 30) {
890                         HandleApplicationError ("Too many QR iterations in EigenDecomp");
891                         return;
892                     }
893 
894                     if (its == 10 || its == 20) {
895                         t += x;
896                         for (long i=0; i<hDim; i++) {
897                             MX_ACCESS(i,i) -= x;
898                         }
899                         hyFloat s = fabs(MX_ACCESS(nn,nn-1)) + fabs (MX_ACCESS(nn-1,nn-2));
900                         y = x = 0.75 * s;
901                         w = -0.4375*s*s;
902                     }
903                     its++;
904 
905                     long m = nn-2;
906 
907                     for (; m>=l; m--) {
908                         z = MX_ACCESS(m,m);
909                         r = x-z;
910                         s = y-z;
911                         p = (r*s - w)/MX_ACCESS(m+1,m) + MX_ACCESS(m,m+1);
912                         q = MX_ACCESS(m+1,m+1)-z-r-s;
913                         r = MX_ACCESS(m+2,m+1);
914                         s = fabs (p) + fabs (q) + fabs (r);
915 
916                         p/=s;
917                         q/=s;
918                         r/=s;
919 
920                         if (m == l) {
921                             break;
922                         }
923 
924                         hyFloat u = fabs (MX_ACCESS(m,m-1)) * (fabs (q) + fabs (r)),
925                                    v = fabs (p) * (fabs (MX_ACCESS(m-1,m-1)) + fabs(z) + fabs(MX_ACCESS(m+1,m+1)));
926 
927                         if (u+v == v) {
928                             break;
929                         }
930 
931                     }
932 
933                     for (long i = m+2; i<hDim; i++) {
934                         MX_ACCESS(i,i-2) = 0.0;
935                         if (i!=m+2) {
936                             MX_ACCESS(i,i-3) = 0.0;
937                         }
938                     }
939 
940                     for (long k = m; k <= nn-1; k++) {
941                         if (k!=m) {
942                             p = MX_ACCESS(k,k-1),
943                             q = MX_ACCESS(k+1,k-1),
944                             r = 0.0;
945                             if (k != nn-1) {
946                                 r = MX_ACCESS(k+2,k-1);
947                             }
948 
949                             if ((x = fabs(p) + fabs(q) + fabs(r)) != 0.0) {
950                                 p /= x;
951                                 q /= x;
952                                 r /= x;
953                             }
954                         }
955 
956                         s = sqrt (p*p+q*q+r*r);
957 
958                         if (s != 0.0) {
959                             if (p < 0.0) {
960                                 s = -s;
961                             }
962 
963                             if (k == m) {
964                                 if (l!=m) {
965                                     MX_ACCESS(k,k-1) = -MX_ACCESS(k,k-1);
966                                 }
967                             } else {
968                                 MX_ACCESS(k,k-1) = -s*x;
969                             }
970 
971                             p += s;
972                             x = p/s;
973                             y = q/s;
974                             z=r/s;
975                             q/=p;
976                             r/=p;
977                             for (long j=k; j<=nn; j++) {
978                                 p = MX_ACCESS(k,j)+q*MX_ACCESS(k+1,j);
979                                 if (k!=nn-1) {
980                                     p += r*MX_ACCESS(k+2,j);
981                                     MX_ACCESS(k+2,j) -= p*z;
982                                 }
983                                 MX_ACCESS(k+1,j) -= p*y;
984                                 MX_ACCESS(k,j)   -= p*x;
985                             }
986 
987                             long mmin = nn < k+3 ? nn: k+3;
988                             for (long i = 0; i<=mmin; i++) {
989                                 p = x*MX_ACCESS(i,k) + y*MX_ACCESS(i,k+1);
990                                 if (k!=nn-1) {
991                                     p += z*MX_ACCESS(i,k+2);
992                                     MX_ACCESS(i,k+2) -= p*r;
993                                 }
994                                 MX_ACCESS(i,k+1) -= p*q;
995                                 MX_ACCESS(i,k) -= p;
996                             }
997                         }
998                     }
999                 }
1000             }
1001 
1002         } while (l < nn - 1);
1003     }
1004 
1005 }
1006 
1007 //_____________________________________________________________________________________________
ValidateFormulaEntries(bool callback (long,long,_Formula *))1008 bool        _Matrix::ValidateFormulaEntries (bool callback (long, long, _Formula*)) {
1009     if (storageType == _FORMULA_TYPE) {
1010         _Formula ** formula_entires = (_Formula**)theData;
1011 
1012         long direct_index = 0L;
1013         for (unsigned long row = 0UL; row < hDim ; row++) {
1014             for (unsigned long col = 0UL; col < vDim ; col++) {
1015                 _Formula * this_cell;
1016                 if (is_dense()) {
1017                     this_cell = formula_entires[direct_index++];
1018                 } else {
1019                     direct_index = Hash (row,col);
1020                     if (direct_index >= 0) {
1021                         this_cell = formula_entires[direct_index];
1022                     } else {
1023                         this_cell = nil;
1024                     }
1025                 }
1026                 if (! callback (row, col, this_cell)) {
1027                     return false;
1028                 }
1029             }
1030         }
1031         return true;
1032     }
1033     return false;
1034 }
1035 
1036 
1037 //__________________________________________________________________________________
Eigensystem(HBLObjectRef cache) const1038 HBLObjectRef   _Matrix::Eigensystem (HBLObjectRef cache) const {
1039     // find the eigenvectors of a symmetric matrix using Jacobi rotations
1040     // The original matrix is preserved.
1041     // returns an associative list with a sorted vector of eigenvalues and
1042     // a square matrix where columns are the corresponding eigenvalues
1043     if (!is_square_numeric()) {
1044         return    new _AssociativeList();
1045     }
1046 
1047     // check for symmetry
1048 
1049     for (long k=0; k<hDim; k++) {
1050         for (long v=k+1; v<hDim; v++) {
1051             if (!CheckEqual((*this)(k,v), (*this)(v,k))) {
1052                 //_String errorMsg ("Eigensystem presently only works on symmetric matrices");
1053                 //WarnError (errorMsg);
1054                 //return      new _AssociativeList();
1055 
1056                 //_String nonSym = _String("Failed symmetry check:" ) & k & ":" & v;
1057                 //WarnError (nonSym);
1058 
1059 
1060                 _Matrix            cpy (*this),
1061                 *rl  = new _Matrix,
1062                 *im  = new _Matrix;
1063 
1064                 cpy.CheckIfSparseEnough(true);
1065                 cpy.Balance ();
1066                 cpy.Schur   ();
1067                 cpy.EigenDecomp (*rl,*im);
1068 
1069                 return & ((*new _AssociativeList) < _associative_list_key_value {"0", rl}
1070                        < _associative_list_key_value {"1", im});
1071             }
1072         }
1073     }
1074     _Matrix a (*this);
1075     a.CheckIfSparseEnough (true);
1076 
1077     hyFloat* b = new hyFloat[hDim],
1078     *   z = new hyFloat[hDim];
1079 
1080     _Matrix * d = new _Matrix(hDim,1,false, true),
1081     * v = new _Matrix(hDim,hDim,false,true);
1082 
1083     for (long cnt = 0, diagIndex=0; cnt < hDim; cnt ++, diagIndex+=hDim+1) {
1084         v->theData[diagIndex] = 1.;
1085         b[cnt] = (d->theData[cnt] = a.theData [diagIndex]);
1086         z[cnt] = 0.0;
1087     }
1088 
1089     for (int pass = 0; pass < 50; pass ++) {
1090         hyFloat sm = 0.,
1091                    tresh = 0.;
1092 
1093         for (long ec = 0; ec < hDim-1; ec ++)
1094             for (long ec2 = ec+1; ec2 < hDim; ec2++) {
1095                 sm += fabs(a.theData[ec*hDim+ec2]);
1096             }
1097 
1098         if (sm == 0.0) {
1099             break;
1100         }
1101 
1102         if (pass < 3) {
1103             tresh = 0.2 * sm / (hDim*hDim);
1104         }
1105 
1106         for (long ec=0; ec < hDim-1; ec++) {
1107             for (long ec2=ec+1; ec2 < hDim; ec2++) {
1108                 long       midx = ec*hDim+ec2;
1109 
1110                 hyFloat mel = a.theData[midx],
1111                            g   = 100. * fabs (mel),
1112                            t   = fabs (d->theData[ec]),
1113                            c   = fabs (d->theData[ec2]);
1114 
1115                 if (pass>3 && t+g == t && c+g == c) {
1116                     a.theData[midx] = 0.;
1117                 } else if (fabs(mel) > tresh) {
1118                     hyFloat h = d->theData[ec2]-d->theData[ec];
1119                     if (fabs (h) + g == fabs (h)) {
1120                         t = mel/h;
1121                     } else {
1122                         hyFloat theta = 0.5*h/mel;
1123                         t = 1./(fabs(theta)+sqrt(1.+theta*theta));
1124                         if (theta<0.0) {
1125                             t = -t;
1126                         }
1127                     }
1128 
1129                     c = 1.0/sqrt(1.0+t*t);
1130 
1131                     hyFloat s    = t*c;
1132                     hyFloat tau  = s/(1.0+c);
1133 
1134                     h = t*mel;
1135 
1136                     z[ec]           -= h;
1137                     z[ec2]          += h;
1138                     d->theData[ec]  -= h;
1139                     d->theData[ec2] += h;
1140 
1141 
1142                     a.theData[midx] = 0.;
1143 
1144                     for (long j=0; j<ec; j++) {
1145                         ROTATE (a.theData, j, ec, j, ec2, g, h, s, tau, hDim);
1146                     }
1147 
1148                     for (long j=ec+1; j<ec2; j++) {
1149                         ROTATE (a.theData, ec, j, j, ec2, g, h, s, tau, hDim);
1150 
1151                     }
1152                     for (long j=ec2+1; j<hDim; j++) {
1153                         ROTATE (a.theData, ec, j, ec2, j, g, h, s, tau, hDim);
1154                     }
1155 
1156                     for (long j=0; j<hDim; j++) {
1157                         ROTATE (v->theData, j, ec, j, ec2, g, h, s, tau, hDim);
1158                     }
1159                 }
1160             }
1161         }
1162         for (long ec=0; ec<hDim; ec++) {
1163             b[ec] += z[ec];
1164             d->theData[ec] = b[ec];
1165             z[ec] = 0.;
1166         }
1167     }
1168 
1169 
1170 
1171     _Matrix ds (hDim,2,false, true),
1172     * vs = new _Matrix(hDim,hDim,false,true),
1173     * dss;
1174 
1175     for (long r=0; r<hDim; r++) {
1176         ds.theData[2*r]   = -d->theData[r];
1177         ds.theData[2*r+1] = r;
1178     }
1179 
1180     _Constant sc (0.0);
1181     dss = (_Matrix*)ds.SortMatrixOnColumn (&sc, nil);
1182 
1183     for (long r=0; r<hDim; r++) {
1184         d->theData[r] = -dss->theData[2*r];
1185         for (long c1 = r, c2 = dss->theData[2*r+1]; c1<hDim*hDim; c1+=hDim, c2+=hDim) {
1186             vs->theData[c1] = v->theData[c2];
1187         }
1188     }
1189 
1190     DeleteObject (v);
1191     DeleteObject (dss);
1192 
1193     delete [] b;
1194     delete [] z;
1195 
1196     return & ((*new _AssociativeList) << _associative_list_key_value {"0", d}
1197             << _associative_list_key_value {"1", vs});
1198 
1199 }
1200 
1201 //__________________________________________________________________________________
LUDecompose(void) const1202 HBLObjectRef   _Matrix::LUDecompose (void) const {
1203     // perform the LU decomposition using Crout's algorithm with partial pivoting
1204     // The original matrix is preserved.
1205     // after performing this decomposition, the routine LUSolve can be called with an arbitrary vector
1206     // the return object is an nx(n+1) matrix which contains the LU decomposition followed
1207     // by a vector of row interchanges
1208     if (!is_square_numeric(false)) { // only works for numerical matrices at this stage
1209         return    new _Matrix();
1210     }
1211 
1212     hyFloat *        scalings = new hyFloat[hDim];
1213 
1214     long per_row = vDim+1;
1215     _Matrix * result = new _Matrix (hDim,per_row,false,true);
1216     // result is a dense matrix
1217 
1218     // duplicate the original matrix into result
1219 
1220 
1221     if (is_dense()) {//matrix is sparse
1222       for (long i=0L; i<hDim; i++) {
1223         long row_start = i*vDim;
1224         for (long j=0; j<vDim; j++) {
1225           result->theData[row_start+i+j]=theData[row_start+j];
1226         }
1227       }
1228     }
1229     else {
1230       for (long i=0; i<lDim; i++) {
1231         if (theIndex[i] != -1) {
1232           long cell_coord = theIndex[i];
1233           long r = cell_coord / hDim;
1234           result->Store(r,cell_coord-r*vDim,theData[i]);
1235         }
1236       }
1237     }
1238 
1239     // produce the scaling vector used in interchanging the rows
1240     for (long i=0L; i<vDim; i++) {
1241 
1242         hyFloat row_max = 0.0;
1243 
1244         for (long j=i*per_row; j<(i+1)*per_row-1; j++) {
1245             StoreIfGreater(row_max, fabs(result->theData[j]));
1246         }
1247 
1248         if (row_max==0.0) {
1249             HandleApplicationError (_String("LUDecompose doesn't work on singular matrices (row ") & i & ')');
1250             delete [] scalings;
1251             return    new _MathObject;
1252         }
1253         scalings[i]=1.0/row_max;
1254     }
1255     // main loop for finding L and U
1256 
1257     for (long j=0L; j<vDim; j++) {
1258         for (long i=0L; i<j; i++) {
1259             // fill in superdiagonal elements (U) in column j
1260             hyFloat sum = result->get(i,j);
1261             for (long k=0L; k<i; k++) {
1262                 sum -= result->get(i,k) * result->get (k,j);
1263             }
1264             result->set (i,j) = sum;
1265         }
1266         long       max_row_index   = 0;
1267         hyFloat    max_row_value     = 0.0;
1268 
1269         for (long i=j; i<hDim; i++) {
1270             // calculate the unscaled version of elements of L and the diagonal
1271             hyFloat sum = result->get(i,j);
1272 
1273             for (long k=0L; k<j; k++) {
1274                sum -= result->get(i,k) * result->get (k,j);
1275             }
1276             result->set (i,j) = sum;
1277 
1278             if (StoreIfGreater(max_row_value, scalings[i]*fabs(sum))) { // find max under the diagonal in column j
1279                 max_row_index = i;
1280             }
1281         }
1282 
1283         if (j!=max_row_index) { // interchange rows
1284             for (long k=0L; k<hDim; k++) {
1285                 Exchange(result->set(max_row_index,k), result->set(j,k));
1286             }
1287             scalings[max_row_index]=scalings[j];
1288         }
1289         // store the index permutation
1290         result->theData[j*per_row+vDim] = max_row_index;
1291 
1292         if (result->get(j,j) == 0.0) {
1293             result->set(j,j) = 1.0e-25;
1294         }
1295 
1296         // divide by the pivoting element
1297 
1298         if (j!=hDim-1) {
1299             hyFloat scaler = 1.0/result->get(j,j);
1300             for (long i=j+1L; i<hDim; i++) {
1301               result->set(i,j) *= scaler;
1302             }
1303         }
1304     }
1305     delete [] scalings;
1306     return result;
1307 }
1308 //__________________________________________________________________________________
LUSolve(HBLObjectRef p) const1309 HBLObjectRef   _Matrix::LUSolve (HBLObjectRef p) const {
1310 // takes a matrix in LU decomposed state and a vector of row permutation returned by LU
1311 // returns a vector of solutions
1312 
1313     if (storageType != _NUMERICAL_TYPE || hDim+1!=vDim || vDim<=0 ) { // only works for numerical matrices at this stage
1314         HandleApplicationError ("LUSolve only works with numerical non-empty matrices of dimension nx(n+1) returned by LUDecompose.");
1315         return  nil;
1316     }
1317     if (p->ObjectClass()==MATRIX) {
1318       _Matrix *b=(_Matrix*)p;
1319       if (!((b->hDim!=hDim)||(b->vDim!=1)||(b->storageType!=1))) {
1320         hyFloat sum;
1321         _Matrix * result = new _Matrix (*b);
1322         result->CheckIfSparseEnough(true);
1323         long i = 0L,
1324              first_index = -1L;
1325 
1326         for (; i<hDim; i++) {
1327           long row_index = get (i, vDim - 1L);
1328           if (row_index<0 || row_index>=hDim) {
1329             break;
1330           }
1331           hyFloat sum = result->theData[row_index];
1332           result->theData[row_index]=result->theData[i];
1333           if (first_index>=0)
1334             for (long j=first_index; j<i; j++) {
1335               sum -= get (i,j) *result->theData[j];
1336             }
1337           else if (sum != 0.0) {
1338             first_index = i;
1339           }
1340           result->theData[i]=sum;
1341         }
1342         if (i==hDim) {
1343           for (i=hDim-1; i>-1; i--) {
1344             hyFloat sum = result->theData[i];
1345             for (long j=i+1L; j<hDim; j++) {
1346               sum -= get (i,j) *result->theData[j];
1347             }
1348             result->theData[i]=sum/get(i,i);
1349           }
1350           return result;
1351         }
1352       }
1353     }
1354     HandleApplicationError ("LUSolve expects the 2nd parameter to be a column vector defining the right hand side of LUx=b");
1355     return new _Matrix(1,1,false,true);
1356 }
1357 
1358 
1359 
1360 //__________________________________________________________________________________
CholeskyDecompose(void) const1361 HBLObjectRef   _Matrix::CholeskyDecompose (void) const
1362 {
1363     /* ---------------------------------------------------
1364         CholeskyDecompose()
1365             Constrcts lower triangular matrix L such that
1366             its own transpose can serve as upper part in
1367             LU decomposition.
1368             Requires that matrix is symmetric and positive
1369             definite.
1370         * algorithm based on Numerical Recipes
1371        --------------------------------------------------- */
1372 
1373     if (!is_square_numeric()) { // only works for numerical square matrices at this stage
1374         return    new _Matrix();
1375     }
1376 
1377     long        n           = GetHDim();
1378     hyFloat  sum;
1379     _Matrix *   lower_triangular    = new _Matrix (*this);   // duplication constructor
1380 
1381     for (long i = 0; i < n; i++) {
1382         for (long j = i; j < n; j++) {
1383             sum = lower_triangular->get (i,j);
1384 
1385             for (long k = i-1L; k >= 0L; k--) {
1386                 sum -= lower_triangular->get (i,k) * lower_triangular->get (j,k);
1387             }
1388 
1389             if (i==j) {
1390                 if (sum <= 0.0) {   // matrix is not positive-definite
1391                     HandleApplicationError (_String("In CholeskyDecompose(): matrix not positive definite, (row ") & i & ')');
1392                     return new _MathObject;
1393                 }
1394 
1395                 lower_triangular->set (i, i) = sqrt(sum);
1396             }
1397             else {
1398                 lower_triangular->set (j, i) =  sum / lower_triangular->get(i,i);
1399             }
1400         }
1401     }
1402 
1403     /* zero upper triagonal entries */
1404     for (long i = 0L; i < n; i++) {
1405         for (long j = i+1L; j < n; j++) {
1406             lower_triangular->set (i, j) = 0.;
1407         }
1408     }
1409 
1410     return lower_triangular;
1411 }
1412 
1413 
1414 
1415 //__________________________________________________________________________________
ApplyScalarOperation(CALLBACK && functor,HBLObjectRef cache) const1416 template <typename CALLBACK> HBLObjectRef   _Matrix::ApplyScalarOperation (CALLBACK && functor, HBLObjectRef cache) const {
1417     if (storageType==_NUMERICAL_TYPE) {
1418         _Matrix* res;
1419 
1420         if (cache && cache->ObjectClass() == MATRIX) {
1421             res = (_Matrix*)cache;
1422             *res = *this;
1423             res->AddAReference();
1424         } else {
1425             res = new _Matrix (*this);
1426         }
1427 
1428         res->ForEach ([&] (hyFloat&& value, unsigned long index, long hashed) -> void {res->theData[hashed] = functor(value);},
1429                       [&] (unsigned long index) -> hyFloat {return theData[index];});
1430 
1431         return res;
1432     }
1433     HandleApplicationError ("Can't apply scalar opetarations to non-numeric matrices.");
1434     return new _Matrix(1,1,false,true);
1435 }
1436 
1437 //__________________________________________________________________________________
Inverse(HBLObjectRef cache) const1438 HBLObjectRef   _Matrix::Inverse (HBLObjectRef cache) const {
1439     if (!is_square_numeric(false)) {
1440         return    new _MathObject;
1441     }
1442 
1443     _Matrix * LUdec = (_Matrix*)LUDecompose();
1444     if (LUdec) {
1445         _Matrix b      (hDim,1,false,true),
1446                 * result = (_Matrix*)_returnMatrixOrUseCache(hDim,vDim,_NUMERICAL_TYPE,false, cache);
1447         b.theData[0]=1.0;
1448       for (long i=0L; i<hDim; i++) {
1449             if (i) {
1450                 b.theData[i]=1.0;
1451                 b.theData[i-1L]=0.0;
1452             }
1453             _Matrix* invVector = (_Matrix*)LUdec->LUSolve(&b);
1454             _Matrix corrTerm (GetHDim(),1, false, true);
1455              Multiply(corrTerm, *invVector);
1456             corrTerm -= b;
1457             //_Matrix* corrTerm = (_Matrix*)(*this*(*invVector)-b).makeDynamic();
1458             _Matrix* corrX =  (_Matrix*)LUdec->LUSolve(&corrTerm);
1459             *invVector-=*corrX;
1460             DeleteObject (corrX);
1461             for (long j=0; j<hDim; j++) {
1462                 result->set (j,i) = invVector->theData[j];
1463             }
1464             DeleteObject (invVector);
1465         }
1466         DeleteObject (LUdec);
1467         return result;
1468     }
1469     return new _Matrix (1,1,false,true);
1470 
1471 }
1472 
1473 //__________________________________________________________________________________
MultByFreqs(long freqID,bool reuse_value_object)1474 HBLObjectRef   _Matrix::MultByFreqs (long freqID, bool reuse_value_object) {
1475 // multiply this transition probs matrix by frequencies
1476     HBLObjectRef value = ComputeNumeric(true);//!reuse_value_object);
1477 
1478     //printf ("\n%s\n", _String ((_String*)toStr()).get_str());
1479 
1480     if (freqID>=0) {
1481         _Matrix* freq_matrix = nil;
1482         freqID = modelFrequenciesIndices.list_data[freqID];
1483         if (freqID>=0) {
1484             freq_matrix = (_Matrix*)LocateVar(freqID)->GetValue();
1485             if (freq_matrix->storageType != _NUMERICAL_TYPE) {
1486                 if (freq_matrix->theValue) {
1487                     freq_matrix = (_Matrix*)freq_matrix->theValue;
1488                 } else {
1489                     freq_matrix = (_Matrix*)freq_matrix->ComputeNumeric();
1490                 }
1491             }
1492         }
1493 
1494         if (theIndex) {
1495             _Matrix*    vm = (_Matrix*) value;
1496             hyFloat * __restrict dp = vm ->theData;
1497 
1498             if (vm->compressedIndex) {
1499                 //vm->_validateCompressedStorage();
1500                 long from = 0L;
1501                 if (freq_matrix) {
1502                     for (long r = 0; r < hDim; r++) {
1503                         long diagEntry = -1;
1504                         hyFloat diagAccumulator = 0.;
1505                         for (long c = from; c < vm->compressedIndex[r]; c++) {
1506                             long col_index = vm->compressedIndex[c+hDim];
1507                             if (col_index != r) {
1508                                 dp[c] *= freq_matrix->theData[col_index];
1509                                 diagAccumulator -= dp[c];
1510                             } else {
1511                                 diagEntry = c;
1512                             }
1513                         }
1514                         from = vm->compressedIndex[r];
1515                         dp[diagEntry] = diagAccumulator;
1516                     }
1517                 } else {
1518                     for (long r = 0; r < hDim; r++) {
1519                         long diagEntry = -1;
1520                         hyFloat diagAccumulator = 0.;
1521                         for (long c = from; c < vm->compressedIndex[r]; c++) {
1522                             //printf ("%ld\n", vm->theIndex[c]);
1523                             if (vm->compressedIndex[c+hDim] != r) {
1524                                 diagAccumulator -= dp[c];
1525                             } else {
1526                                 diagEntry = c;
1527                             }
1528                         }
1529                         //printf ("%ld %ld %g\n", r, diagEntry, diagAccumulator);
1530                         from = vm->compressedIndex[r];
1531                         dp[diagEntry] = diagAccumulator;
1532                     }
1533                 }
1534                 //vm->_validateCompressedStorage();
1535             } else {
1536                 hyFloat *tempDiags = (hyFloat*) alloca (sizeof(hyFloat) * hDim);
1537                 InitializeArray(tempDiags, hDim, 0.0);
1538 
1539                 if (freq_matrix) {
1540                       for (long i=0; i<lDim; i++) {
1541                           long p = theIndex[i];
1542                           if (p != -1) {
1543                               long h = p / vDim;
1544                                    p = p - h*vDim;
1545                               if (h!=p) {
1546                                   tempDiags[h] += (dp[i] *= freq_matrix->theData[p]);
1547                               }
1548                           }
1549                       }
1550                 }
1551                 else {
1552                       for (long i=0; i<lDim; i++) {
1553                           long p = theIndex[i];
1554                           if (p != -1) {
1555                               long h = p / vDim;
1556                                    p = p - h*vDim;
1557                               if (h!=p) {
1558                                   tempDiags[h] += dp[i];
1559                               }
1560                           }
1561                       }
1562                 }
1563 
1564                 for (long j=0L; j<hDim; j++) {
1565                     vm->Store (j,j,-tempDiags[j]);
1566                 }
1567             }
1568 
1569         } else {
1570             hyFloat * theMatrix = ((_Matrix*)value)->theData;
1571 
1572             if (freq_matrix) {
1573                 if (freq_matrix->theIndex) {
1574                     HandleApplicationError(_String("Sparse frequency matrices are not supported"));
1575                 } else {
1576                     for (unsigned long column=0UL; column<vDim; column++) {
1577                       const hyFloat freq_i = freq_matrix->theData[column];
1578                       unsigned long entry = column;
1579                       for (;entry < lDim - vDim; entry += vDim) {
1580                         theMatrix[entry] *= freq_i;
1581                         theMatrix[entry+=vDim] *= freq_i;
1582                       }
1583                       if (entry < lDim) {
1584                         theMatrix[entry] *= freq_i;
1585                       }
1586                     }
1587                 }
1588             }
1589 
1590             for (unsigned long row_start = 0UL, row = 0UL; row_start < lDim; row_start+=vDim, row++) {
1591               unsigned long diag = row_start + row;
1592               theMatrix [diag] = 0.;
1593               for (unsigned long col = 0UL; col < row; col++) {
1594                 theMatrix[diag] -= theMatrix[row_start + col];
1595               }
1596               for (unsigned long col = row+1; col < vDim; col++) {
1597                 theMatrix[diag] -= theMatrix[row_start + col];
1598               }
1599             }
1600         }
1601 
1602     }
1603     return value;
1604 }
1605 
1606 
1607 //__________________________________________________________________________________
Compute(void)1608 HBLObjectRef   _Matrix::Compute (void) {
1609   //if ((storageType != 1)&&(storageType != 2))
1610   if (storageType != _NUMERICAL_TYPE) {
1611     if (storageType == _POLYNOMIAL_TYPE) {
1612       if (ANALYTIC_COMPUTATION_FLAG) {
1613         return this;
1614       }
1615     }
1616 
1617     if (IsAStringMatrix()) {
1618       return this;
1619     }
1620 
1621     if (theValue) {
1622       DeleteObject (theValue);
1623     }
1624 
1625     if (storageType != _SIMPLE_FORMULA_TYPE) {
1626       theValue  = Evaluate(false);
1627     } else {
1628       theValue  = EvaluateSimple ();
1629     }
1630     return theValue;
1631   }
1632   return this;
1633 }
1634 
1635 //__________________________________________________________________________________
ComputeNumeric(bool copy)1636 HBLObjectRef   _Matrix::ComputeNumeric (bool copy) {
1637     if (storageType != _NUMERICAL_TYPE) {
1638         if (storageType == 0 && ANALYTIC_COMPUTATION_FLAG) {
1639             return this;
1640         }
1641         if (storageType != _SIMPLE_FORMULA_TYPE) {
1642             if (theValue) {
1643                 DeleteObject (theValue);
1644             }
1645             theValue  = Evaluate(false);
1646         } else {
1647             if (copy) {
1648                 if (theValue) {
1649                     DeleteObject (theValue);
1650                 }
1651                 theValue = EvaluateSimple();
1652             } else {
1653                 theValue = EvaluateSimple((_Matrix*)theValue);
1654             }
1655         }
1656         return theValue;
1657     }
1658     if (copy) {
1659         if (theValue) {
1660             DeleteObject (theValue);
1661         }
1662         return (theValue = (_Matrix*)makeDynamic());
1663     }
1664     return this;
1665 }
1666 
1667 //__________________________________________________________________________________
RetrieveNumeric(void)1668 HBLObjectRef   _Matrix::RetrieveNumeric (void) {
1669     if (storageType != _NUMERICAL_TYPE) {
1670         if (theValue) {
1671             return theValue;
1672         }
1673 
1674         return ComputeNumeric();
1675     }
1676     return this;
1677 }
1678 
1679 //__________________________________________________________________________________
Sum(HBLObjectRef cache)1680 HBLObjectRef   _Matrix::Sum (HBLObjectRef cache) {
1681     return _returnConstantOrUseCache(MaxElement (1), cache);
1682 }
1683 
1684 //__________________________________________________________________________________
1685 
1686 
ExecuteSingleOp(long opCode,_List * arguments,_hyExecutionContext * context,HBLObjectRef cache)1687 HBLObjectRef _Matrix::ExecuteSingleOp (long opCode, _List* arguments, _hyExecutionContext* context, HBLObjectRef cache)  {
1688 
1689 
1690     switch (opCode) { // first check operations without arguments
1691       case HY_OP_CODE_ABS: // Abs
1692         return Abs(cache);
1693       case HY_OP_CODE_COLUMNS:  //Columns
1694         return _returnConstantOrUseCache(vDim, cache);
1695       case HY_OP_CODE_INVERSE: //Inverse
1696         return Inverse(cache);
1697       case HY_OP_CODE_EIGENSYSTEM: //Eigensystem
1698         return Eigensystem(cache);
1699       case HY_OP_CODE_EVAL: //Eval
1700         return (HBLObjectRef)ComputeNumeric()->makeDynamic();
1701       case HY_OP_CODE_EXP: //Exp
1702         return Exponentiate();
1703       case HY_OP_CODE_LUDECOMPOSE: // LUDecompose
1704         return LUDecompose();
1705       case HY_OP_CODE_LOG: // Log
1706         return ApplyScalarOperation ([] (hyFloat h) -> hyFloat {return log (h);}, cache);
1707       case HY_OP_CODE_ROWS: // Rows
1708         return _returnConstantOrUseCache (hDim, cache);
1709       case HY_OP_CODE_SIMPLEX: // Simplex
1710         return SimplexSolve();
1711       case HY_OP_CODE_TRANSPOSE: { // Transpose
1712         _Matrix* result = new _Matrix (*this);
1713         result->Transpose();
1714         return result;
1715       }
1716       case HY_OP_CODE_TYPE: // Type
1717         return Type(cache);
1718    }
1719 
1720   _MathObject * arg0 = _extract_argument (arguments, 0UL, false);
1721 
1722   switch (opCode) { // next check operations without arguments or with one argument
1723     case HY_OP_CODE_ADD: // +
1724       if (arg0) {
1725         return AddObj (arg0, cache);
1726       } else {
1727         return Sum (cache);
1728       }
1729       break;
1730     case HY_OP_CODE_SUB: // -
1731       if (arg0) {
1732         return SubObj(arg0, cache);
1733       } else {
1734         return ApplyScalarOperation ([] (hyFloat h) -> hyFloat {return -h;}, cache);
1735 
1736         //return (HBLObjectRef)((*this)*(-1.0)).makeDynamic();
1737       }
1738       break;
1739   }
1740 
1741   if (arg0) {
1742     switch (opCode) { // operations that require exactly one argument
1743       case HY_OP_CODE_IDIV: // $
1744       case HY_OP_CODE_DIV:  // /
1745         return MultElements(arg0,opCode == HY_OP_CODE_DIV, cache);
1746       case HY_OP_CODE_MOD: // %
1747         return SortMatrixOnColumn (arg0, cache);
1748       case HY_OP_CODE_AND: // &&
1749         return pFDR (arg0, cache);
1750       case HY_OP_CODE_MUL: // *
1751         return MultObj(arg0, cache);
1752       case HY_OP_CODE_LESS: // <
1753         return PathLogLikelihood(arg0, cache);
1754       case HY_OP_CODE_LEQ: // <=
1755         return K_Means(arg0, cache);
1756       case HY_OP_CODE_EQ: // ==
1757         return _returnConstantOrUseCache(Equal (arg0), cache);
1758         //return ProfileMeanFit(arg0);
1759       case HY_OP_CODE_GREATER: // >
1760         return NeighborJoin (!CheckEqual(arg0->Value(),0.0), cache);
1761       case HY_OP_CODE_GEQ: // >=
1762         return MakeTreeFromParent (arg0->Value(), cache);
1763       case HY_OP_CODE_CCHI2: //CChi2
1764         if (arg0->ObjectClass()==NUMBER && arg0->Value()>0.999 ) {
1765           return _returnConstantOrUseCache (FisherExact(5.,80.,1.), cache);
1766         } else {
1767           return _returnConstantOrUseCache (FisherExact(0.,0.,0.), cache);
1768         }
1769       case HY_OP_CODE_LUSOLVE: // LUSolve
1770         return LUSolve (arg0);
1771       case HY_OP_CODE_RANDOM: // Random
1772         return Random (arg0, cache);
1773       case HY_OP_CODE_POWER: // ^ (Poisson log-likelihood)
1774           return  PoissonLL (arg0, cache);
1775       case HY_OP_CODE_MAX: // Max
1776       case HY_OP_CODE_MIN: // Max
1777         if (arg0->ObjectClass()==NUMBER) {
1778           if (CheckEqual (arg0->Value(), 1)) {
1779             long index = 0L;
1780             hyFloat v[2] = {opCode == HY_OP_CODE_MAX?MaxElement (0,&index):MinElement(0,&index),0.0};
1781             v[1] = index;
1782             return new _Matrix (v,1,2);
1783           }
1784         }
1785         return _returnConstantOrUseCache (opCode == HY_OP_CODE_MAX?MaxElement (0):MinElement (0), cache);
1786    }
1787     _MathObject * arg1 = _extract_argument (arguments, 1UL, false);
1788 
1789      switch (opCode) {
1790 
1791       case HY_OP_CODE_MACCESS: // MAccess
1792         return MAccess (arg0,arg1, cache);
1793 
1794       case HY_OP_CODE_MCOORD: // MCoord
1795         return MCoord (arg0, arg1, cache);
1796     }
1797 
1798   }
1799 
1800   switch (opCode) {
1801     case HY_OP_CODE_ADD: // +
1802     case HY_OP_CODE_SUB: // -
1803     case HY_OP_CODE_IDIV: // $
1804     case HY_OP_CODE_DIV:  // /
1805     case HY_OP_CODE_MOD: // %
1806     case HY_OP_CODE_AND: // &&
1807     case HY_OP_CODE_MUL: // *
1808     case HY_OP_CODE_LESS: // <
1809     case HY_OP_CODE_LEQ: // <=
1810     case HY_OP_CODE_EQ: // ==
1811     case HY_OP_CODE_GREATER: // >
1812     case HY_OP_CODE_GEQ: // >=
1813     case HY_OP_CODE_CCHI2: //CChi2
1814     case HY_OP_CODE_LUSOLVE: // LUSolve
1815     case HY_OP_CODE_RANDOM: // Random
1816     case HY_OP_CODE_POWER: // ^ (Poisson log-likelihood)
1817     case HY_OP_CODE_MAX: // Max
1818     case HY_OP_CODE_MIN: // Max
1819     case HY_OP_CODE_MACCESS: // MAccess
1820     case HY_OP_CODE_MCOORD: // MCoord
1821       WarnWrongNumberOfArguments (this, opCode,context, arguments);
1822       break;
1823     default:
1824       WarnNotDefined (this, opCode,context);
1825   }
1826 
1827    return new _MathObject;
1828 }
1829 
1830 //_____________________________________________________________________________________________
AmISparse(void)1831 bool    _Matrix::AmISparse(void) {
1832     if (theIndex) {
1833         return true;    // duh!
1834     }
1835 
1836     if (storageType == _FORMULA_TYPE || storageType == _SIMPLE_FORMULA_TYPE) {
1837         return false;
1838     }
1839 
1840     long const threshhold = lDim * _Matrix::switchThreshold / 100 + 1;
1841     long k = 0L;
1842 
1843     if (storageType==_NUMERICAL_TYPE) {
1844       for (long i=0; i<lDim; i++) {
1845           if (theData[i]!=ZEROOBJECT) {
1846               k++;
1847               if (k == threshhold) break;
1848           }
1849       }
1850     } else {
1851       for (long i=0; i<lDim; i++) {
1852           if (IsNonEmpty(i) && !GetMatrixObject(i)->IsObjectEmpty()) {
1853               k++;
1854               if (k == threshhold) break;
1855           }
1856       }
1857     }
1858 
1859 
1860 
1861     if (k < threshhold) {
1862         // we indeed are sparse enough
1863         _Matrix sparseMe (hDim,vDim,true,storageType==_NUMERICAL_TYPE);
1864         if (storageType==_NUMERICAL_TYPE) {
1865             for (long i=0; i<lDim; i++) {
1866                 if (theData[i]!=ZEROOBJECT) {
1867                     sparseMe[i]=theData[i];
1868                 }
1869             }
1870         } else if (storageType==0) {
1871             for (long i=0; i<lDim; i++) {
1872                 if ((GetMatrixObject(i)!=ZEROPOINTER)&&(!GetMatrixObject(i)->IsObjectEmpty())) {
1873                     sparseMe.StoreObject(i,GetMatrixObject(i));
1874                 }
1875                 GetMatrixObject(i)->AddAReference();
1876             }
1877         }
1878 
1879         Clear();
1880         DuplicateMatrix (this, &sparseMe);
1881         return true;
1882     }
1883     return false;
1884 }
1885 
1886 //_____________________________________________________________________________________________
AmISparseFast(_Matrix & whereTo)1887 bool    _Matrix::AmISparseFast (_Matrix& whereTo) {
1888     if (theIndex) {
1889         return true;    // duh!
1890     }
1891 
1892     long k = 0L,
1893          threshold = lDim*_Matrix::switchThreshold/100;
1894 
1895     //speculatively allocate memory to store non-zero indices
1896 
1897     long * non_zero_index = (long*)alloca (threshold*sizeof(long));
1898 
1899 #if defined _SLKP_USE_AVX_INTRINSICS
1900     __m256d zeros = _mm256_setzero_pd();
1901     long lDimMOD4 = lDim >> 2 << 2;
1902     for (long i = 0; i < lDimMOD4; i+=4) {
1903          int res  = _mm256_movemask_pd(_mm256_cmp_pd (_mm256_loadu_pd (theData+i), zeros, _CMP_NEQ_OQ));
1904          if (res) { // something is different
1905             if (res & 1) { non_zero_index[k++] = i; if (k == threshold) break; };
1906             if (res & 2) { non_zero_index[k++] = i+1; if (k == threshold) break; };
1907             if (res & 4) { non_zero_index[k++] = i+2; if (k == threshold) break; };
1908             if (res & 8) { non_zero_index[k++] = i+3; if (k == threshold) break; };
1909         }
1910     }
1911 
1912     if (k < threshold)
1913         for (long i = lDimMOD4; i < lDim; i++) {
1914             if (theData[i] != 0.0) {
1915                 non_zero_index[k++] = i;
1916                 if (k == threshold) {
1917                     return false;
1918                 }
1919             }
1920         }
1921 #elif _SLKP_USE_ARM_NEON
1922     float64x2_t zeros = vdupq_n_f64 (0.);
1923     long lDimMOD2 = lDim >> 1 << 1;
1924     for (long i = 0; i < lDimMOD2; i+=2) {
1925 
1926          uint64x2_t res  = vceqq_f64  (vld1q_f64 (theData+i), zeros);
1927          if (vaddvq_u64 (res)) { // something is different
1928             if (vgetq_lane_u64 (res,0)) { non_zero_index[k++] = i; if (k == threshold) break; };
1929             if (vgetq_lane_u64 (res,1)) { non_zero_index[k++] = i+1; if (k == threshold) break; };
1930         }
1931     }
1932 
1933     if (k < threshold)
1934         for (long i = lDimMOD2; i < lDim; i++) {
1935             if (theData[i] != 0.0) {
1936                 non_zero_index[k++] = i;
1937                 if (k == threshold) {
1938                     return false;
1939                 }
1940             }
1941         }
1942 #else
1943     for (long i = 0; i < lDim; i++) {
1944         if (theData[i] != 0.0) {
1945             non_zero_index[k++] = i;
1946             if (k == threshold) {
1947                 return false;
1948             }
1949         }
1950     }
1951 #endif
1952 
1953 
1954 
1955     if (k < threshold) {
1956         // we indeed are sparse enough
1957 
1958         bool canReuse = whereTo.lDim > k;
1959 
1960         if (k == 0L) { // empty matrix
1961             //printf ("\nZERO SIZE\n");
1962             whereTo.lDim = 1L;
1963         } else {
1964             whereTo.lDim = k;
1965         }
1966 
1967         if (whereTo.theIndex) {
1968             if (canReuse) {
1969                 whereTo.theIndex               = (long*)MemReallocate (whereTo.theIndex , whereTo.lDim*sizeof(long));
1970             } else {
1971                 free (whereTo.theIndex);
1972                 whereTo.theIndex               = (long*)MatrixMemAllocate (whereTo.lDim*sizeof(long));
1973             }
1974         } else {
1975             whereTo.theIndex               = (long*)MatrixMemAllocate (whereTo.lDim*sizeof(long));
1976         }
1977 
1978         hyFloat * _hprestrict_          newData  =  canReuse ? whereTo.theData : (hyFloat*)MatrixMemAllocate (whereTo.lDim*sizeof(hyFloat));
1979 
1980         if (whereTo.compressedIndex) {
1981             MatrixMemFree(whereTo.compressedIndex);
1982         }
1983         whereTo.compressedIndex = (long*) MatrixMemAllocate((whereTo.lDim + hDim) * sizeof (long));
1984 
1985         long                    currentRow = 0L;
1986         long   * __restrict     wci = (long*)whereTo.compressedIndex;
1987 
1988         for (long i=0L; i<k; i++) {
1989             //printf ("%ld %ld", i, theIndex[i]);
1990             long entryIndex = non_zero_index[i];
1991             whereTo.theIndex[i] = entryIndex;
1992 
1993             long indexRow = entryIndex / vDim,
1994                  indexColumn = entryIndex - indexRow * vDim;
1995 
1996             wci[i + hDim] = indexColumn;
1997             if (indexRow > currentRow) {
1998                 for (long l = currentRow; l < indexRow; l++) {
1999                     wci[l] = i;
2000                 }
2001                 currentRow = indexRow;
2002             }
2003             newData[i]  = theData [entryIndex];
2004         }
2005 
2006         for (long l = currentRow; l < hDim; l++)
2007             whereTo.compressedIndex[l] = whereTo.lDim;
2008 
2009         if (canReuse) {
2010             whereTo.theData = (hyFloat*) MemReallocate(newData, whereTo.lDim*sizeof(hyFloat));
2011         } else {
2012             free     (whereTo.theData);
2013             whereTo.theData = newData;
2014         }
2015 
2016         return true;
2017     }
2018 
2019     return false;
2020 }
2021 
2022 //_____________________________________________________________________________________________
2023 
IsValidTransitionMatrix() const2024 bool    _Matrix::IsValidTransitionMatrix() const {
2025     if (is_square() && is_numeric()) {
2026         long d = GetHDim();
2027         hyFloat * sums = (hyFloat*)alloca (sizeof (hyFloat)*d);
2028         long idx = 0L;
2029         const hyFloat tolerance = kMachineEpsilon * 10.;
2030         for (long r = 0L; r < d; r++) {
2031             sums [r] = 0.;
2032             for (long c = 0L; c < d; c++, idx++) {
2033                 hyFloat term = theData[idx];
2034                 if (term < 0.0 || term > 1.0) {
2035                     if (CheckEqual(0.0, term, tolerance)) {
2036                         theData[idx] = 0.;
2037                         continue;
2038                     }
2039                     if (CheckEqual(1.0, term, tolerance)) {
2040                         theData[idx] = 1.;
2041                         sums[r] += 1.;
2042                         continue;
2043                     }
2044                     char buffer [255];
2045                     snprintf (buffer, 255, "FAILED IsValidTransitionMatrix at (%ld, %ld) = %20.15g\n", r, c, term);
2046                     ReportWarning(buffer);
2047                     return false;
2048                 }
2049                 sums[r] += term;
2050             }
2051         }
2052         for (long r = 0L; r < d; r++) {
2053             if (!CheckEqual(1.0, sums[r])) {
2054                 char buffer [255];
2055                 snprintf (buffer, 255, "FAILED ROW SUM at (%ld) = %20.15g\n", r, sums[r]);
2056                 ReportWarning(buffer);
2057                 return false;
2058             }
2059         }
2060         return true;
2061     }
2062     return false;
2063 }
2064 
2065 
2066 //_____________________________________________________________________________________________
2067 
IsReversible(_Matrix * freqs)2068 bool    _Matrix::IsReversible(_Matrix* freqs) {
2069 
2070     try {
2071 
2072         if (!is_square()) {
2073             throw _String ("Not a square matrix in _Matrix::IsReversible");
2074         }
2075 
2076         if (freqs && freqs->GetHDim () * freqs->GetVDim () != GetHDim()) {
2077             throw _String ("Incompatible frequency and rate matrix dimensions in _Matrix::IsReversible");
2078         }
2079 
2080         if (!is_numeric() && !is_expression_based()) {
2081             throw _String ("Unsupported rate matrix type in _Matrix::IsReversible");
2082         }
2083 
2084         if (freqs && !freqs->is_numeric() && !freqs->is_expression_based()) {
2085             throw _String ("Unsupported frequency matrix type in _Matrix::IsReversible");
2086         }
2087 
2088 
2089         bool   needAnalytics = is_expression_based() || (freqs && freqs->is_expression_based());
2090         if (needAnalytics) {
2091             if (freqs) {
2092                 for (long r = 0; r < hDim; r++)
2093                     for (long c = r+1; c < hDim; c++) {
2094                         bool compResult = true;
2095                         if (is_expression_based()) {
2096                             _Formula* rc = GetFormula(r,c),
2097                                       * cr = GetFormula(c,r);
2098 
2099                             if (rc && cr) {
2100                                 _Polynomial *rcp = (_Polynomial *)rc->ConstructPolynomial(),
2101                                              *crp = (_Polynomial *)cr->ConstructPolynomial();
2102 
2103                                 if (rcp && crp) {
2104                                     HBLObjectRef     tr = nil,
2105                                                      tc = nil;
2106 
2107                                     if (freqs->is_expression_based()) {
2108                                         if (freqs->GetFormula(r,0)) {
2109                                             tr = freqs->GetFormula(r,0)->ConstructPolynomial();
2110                                             if (tr) {
2111                                                 tr->AddAReference();
2112                                             } else {
2113                                                 throw _String ("Could not convert matrix cell (") & r & ',' & c & ") to a polynomial";
2114                                             }
2115                                         }
2116                                         if (freqs->GetFormula(c,0)) {
2117                                             tc = freqs->GetFormula(c,0)->ConstructPolynomial();
2118                                             if (tc) {
2119                                                 tc->AddAReference();
2120                                             } else {
2121                                                 DeleteObject (tr);
2122                                                 throw _String ("Could not convert frequency cell (") & c & ") to a polynomial";
2123                                              }
2124                                         }
2125                                     } else {
2126                                         tr = new _Constant ((*freqs)[r]);
2127                                         tc = new _Constant ((*freqs)[c]);
2128                                     }
2129                                     if (tr && tc) {
2130                                         _Polynomial        * rcpF = (_Polynomial*)rcp->Mult(tr, nil),
2131                                                            * crpF = (_Polynomial*)crp->Mult(tc, nil);
2132 
2133                                         compResult         = rcpF->Equal(crpF);
2134                                         DeleteObject (rcpF);
2135                                         DeleteObject (crpF);
2136 
2137                                     } else {
2138                                         compResult = !(tr||tc);
2139                                     }
2140 
2141                                     DeleteObject (tr);
2142                                     DeleteObject (tc);
2143 
2144 
2145                                 } else {
2146                                     throw _String ("Could not convert a matrix cell at (") & r & ',' & c & ") to a polynomial: " & _StringBuffer ((_StringBuffer*)rc->toStr(kFormulaStringConversionNormal));
2147                                 }
2148                              } else {
2149                                 compResult = !(rc || cr);
2150                             }
2151                             if (!compResult) {
2152                                  throw _String ("Unequal cells at (") & r & ',' & c & ")";
2153                             }
2154                         }
2155 
2156                     }
2157             } else {
2158                 for (long r = 0; r < hDim; r++)
2159                     for (long c = r+1; c < hDim; c++) {
2160                         bool compResult = true;
2161                         _Formula* rc = GetFormula(r,c),
2162                                   * cr = GetFormula(c,r);
2163 
2164                         if (rc && cr) {
2165                             _Polynomial *rcp = (_Polynomial *)rc->ConstructPolynomial(),
2166                                          *crp = (_Polynomial *)cr->ConstructPolynomial();
2167 
2168                             if (rcp && crp) {
2169                                 compResult = rcp->Equal(crp);
2170                             } else {
2171                                 compResult = rc->EqualFormula(cr);
2172                             }
2173                         } else {
2174                             compResult = !(rc || cr);
2175                         }
2176 
2177                         if (!compResult) {
2178                             return false;
2179                         }
2180                     }
2181             }
2182             return true;
2183         } else {
2184             if (freqs) {
2185                 for (long r = 0; r < hDim; r++)
2186                     for (long c = r+1; c < hDim; c++)
2187                         if (! CheckEqual ((*this)(r,c)*(*freqs)[r], (*this)(c,r)*(*freqs)[c])) {
2188                             return false;
2189                         }
2190             } else {
2191                 for (long r = 0; r < hDim; r++)
2192                     for (long c = r+1; c < hDim; c++)
2193                         if (! CheckEqual ((*this)(r,c), (*this)(c,r))) {
2194                             return false;
2195                         }
2196             }
2197             return true;
2198         }
2199     } catch (const _String& reason) {
2200         ReportWarning (_String ("Reversibility checks failed: ") & reason);
2201         return false;
2202     }
2203     return false;
2204 }
2205 
2206 //_____________________________________________________________________________________________
2207 
CheckIfSparseEnough(bool force,bool copy)2208 bool    _Matrix::CheckIfSparseEnough(bool force, bool copy) {
2209 
2210 // check if matrix is sparse enough to justify compressed storage
2211 
2212     if (theIndex && (force || lDim>hDim*vDim*::_Matrix::switchThreshold/100)) {
2213         // switch to normal matrix storage - more than half elements are non-zero
2214         // -= allocationBlock;
2215 
2216         long square_dimension = vDim*hDim;
2217 
2218         if (!is_numeric()) {
2219             // pointers
2220             hyPointer* tempData = (hyPointer*) MemAllocate (square_dimension*sizeof(hyPointer));
2221             InitializeArray(tempData, square_dimension, (hyPointer)nil);
2222 
2223             if (copy) {
2224                 for (unsigned long i = 0UL; i<lDim; i++) {
2225                     if (IsNonEmpty(i)) {
2226                         tempData[theIndex[i]]=((hyPointer*)theData)[i];
2227                     }
2228                 }
2229             }
2230             MatrixMemFree( theData);
2231             theData = (hyFloat*)tempData;
2232        } else {
2233             //objects
2234             hyFloat* tempData = (hyFloat*) MemAllocate (square_dimension*sizeof(hyFloat), true, 64);
2235            // InitializeArray(tempData, square_dimension, 0.0);
2236 
2237             if (copy) {
2238                 for (unsigned long i = 0UL; i<lDim; i++) {
2239                     long k = theIndex[i];
2240                     if (k >= 0) {
2241                         tempData [k] = ((hyFloat*)theData) [i];
2242                     }
2243                 }
2244             }
2245             MatrixMemFree( theData);
2246             theData = (hyFloat*)tempData;
2247 
2248         }
2249         lDim = square_dimension;
2250         MatrixMemFree( theIndex);
2251         theIndex = nil;
2252         return true;
2253     }
2254     return false;
2255 }
2256 
2257 //_____________________________________________________________________________________________
IncreaseStorage(void)2258 bool    _Matrix::IncreaseStorage    (void) {
2259     if (compressedIndex) {
2260         HandleApplicationError("Internal error. Called _Matrix::IncreaseStorage on compressed index matrix");
2261         return false;
2262     }
2263     lDim += allocationBlock;
2264     theIndex = (long*)MemReallocate(theIndex,  lDim*sizeof(long));
2265     for (long i = lDim-allocationBlock; i < lDim; i++) {
2266         theIndex [i] = -1;
2267     }
2268 
2269     if (!is_numeric()) {
2270         // pointers or formulas
2271         theData = (hyFloat*) MemReallocate(theData, lDim*sizeof(void*));
2272         for (long i = lDim-allocationBlock; i < lDim; i++) {
2273             ((_Formula**)theData) [i] = ZEROPOINTER;
2274         }
2275 
2276     } else {
2277         //objects
2278         theData = (hyFloat*) MemReallocate(theData, lDim*sizeof(hyFloat));
2279         for (long i = lDim-allocationBlock; i < lDim; i++) {
2280             theData [i] = ZEROOBJECT;
2281         }
2282     }
2283     return TRUE;
2284 
2285 }
2286 
2287 
2288 //_____________________________________________________________________________________________
2289 
Convert2Formulas(void)2290 void    _Matrix::Convert2Formulas (void)
2291 {
2292     if (is_numeric()) {
2293         storageType = _FORMULA_TYPE;
2294         _Formula** tempData = (_Formula**)MatrixMemAllocate (sizeof(void*)*lDim);
2295         if (is_dense()) {
2296             for (long i = 0; i<lDim; i++) {
2297                 tempData[i] = new _Formula (new _Constant (((hyFloat*)theData)[i]));
2298             }
2299         } else
2300             for (long i = 0; i<lDim; i++) {
2301                 if (IsNonEmpty(i)) {
2302                     //_Constant c (((hyFloat*)theData)[i]);
2303                     //_Formula f((_PMathObj)c.makeDynamic());
2304                     //tempData[i] = (_Formula*)f.makeDynamic();
2305                     tempData[i] = new _Formula (new _Constant (((hyFloat*)theData)[i]));
2306                 } else {
2307                     tempData[i]=nil;
2308                 }
2309             }
2310 
2311         MatrixMemFree (theData);
2312         theData = (hyFloat*)tempData;
2313     }
2314 }
2315 
2316 
2317 
2318 
2319 //_____________________________________________________________________________________________
2320 
ScanForVariables(_AVLList & theReceptacle,bool inclG,_AVLListX * tagger,long weights) const2321 void    _Matrix:: ScanForVariables(_AVLList& theReceptacle, bool inclG, _AVLListX* tagger, long weights) const {
2322     ScanForVariables2 (theReceptacle, inclG, -1, true, tagger, weights);
2323 }
2324 //_____________________________________________________________________________________________
2325 
ScanForVariables2(_AVLList & theReceptacle,bool inclG,long modelID,bool inclCat,_AVLListX * tagger,long weights) const2326 void    _Matrix:: ScanForVariables2(_AVLList& theReceptacle, bool inclG, long modelID, bool inclCat, _AVLListX* tagger, long weights) const {
2327     if (is_expression_based()) { // a formula based matrix, there is stuff to do
2328         if (modelID >= 0) {
2329             _AssociativeList*      definedCache = nil;
2330             _Variable*             cachedDeps = FetchVar(LocateVarByName (CACHE_FORMULA_DEPENDANCY));
2331 
2332             if (cachedDeps && cachedDeps->ObjectClass () == ASSOCIATIVE_LIST)
2333                 // 20100316 SLKP: I am pretty sure this is broken...
2334             {
2335                 definedCache = (_AssociativeList*)cachedDeps->GetValue();
2336                 _String     matrixKey (modelID);
2337                 _Matrix*    cachedValues = (_Matrix*)definedCache->GetByKey (matrixKey,MATRIX);
2338 
2339                 if (cachedValues == nil) {
2340                     _Formula ** theFormulas = (_Formula**)theData;
2341 
2342                     _SimpleList sl1,
2343                                 sl2;
2344                     _AVLList    a1 (&sl1),
2345                                 a2 (&sl2);
2346 
2347                     if (theIndex) {
2348                         for (long i = 0; i<lDim; i++)
2349                             if (IsNonEmpty(i)) {
2350                                 theFormulas[i]->ScanFForVariables(a1,false);
2351                                 theFormulas[i]->ScanFForVariables(a2,true);
2352                             }
2353                     } else
2354                         for (long i = 0; i<lDim; i++)
2355                             if (theFormulas[i]!=(_Formula*)ZEROPOINTER) {
2356                                 theFormulas[i]->ScanFForVariables(a1,false);
2357                                 theFormulas[i]->ScanFForVariables(a2,true);
2358                             }
2359 
2360                     a1.ReorderList();
2361                     a2.ReorderList();
2362 
2363                     cachedValues = new _Matrix (2,sl2.lLength,false,true);
2364 
2365                     for (unsigned long k=0; k<sl1.lLength; k++) {
2366                         cachedValues->theData[k] = sl1.list_data[k];
2367                     }
2368                     {
2369                         for (unsigned long k=sl1.lLength; k<sl2.lLength; k++) {
2370                             cachedValues->theData[k] = -1.;
2371                         }
2372                     }
2373                     {
2374                         for (unsigned long k=0; k<sl2.lLength; k++) {
2375                             cachedValues->theData[k+sl2.lLength] = sl2.list_data[k];
2376                         }
2377                     }
2378 
2379                     _FString aKey (matrixKey,false);
2380 
2381                     definedCache->MStore (&aKey, cachedValues, false);
2382 
2383                 }
2384 
2385                 long colCount = cachedValues->GetVDim(),
2386                      rowIndex = inclG?colCount:0;
2387 
2388                 for (long k=0; k<colCount; k++,rowIndex++) {
2389                     long vI = cachedValues->theData[rowIndex];
2390                     if (vI >= 0) {
2391                         theReceptacle.Insert ((BaseRef)vI);
2392                         if (tagger) {
2393                             tagger->UpdateValue((BaseRef)vI, weights, 0);
2394                         }
2395                     } else {
2396                         break;
2397                     }
2398                 }
2399 
2400                 return;
2401             }
2402 
2403         }
2404 
2405         _Formula ** theFormulas = (_Formula**)theData;
2406 
2407         if (theIndex) {
2408             for (long i = 0; i<lDim; i++)
2409                 if (IsNonEmpty(i)) {
2410                     theFormulas[i]->ScanFForVariables(theReceptacle,inclG,false,inclCat, false, tagger, weights);
2411                 }
2412         } else
2413             for (long i = 0; i<lDim; i++) {
2414                 if (theFormulas[i]!=(_Formula*)ZEROPOINTER) {
2415                     theFormulas[i]->ScanFForVariables (theReceptacle,inclG,false,inclCat, false, tagger, weights);
2416                 }
2417             }
2418     } else if (storageType == 0) { // a polynomial based matrix, there is stuff to do
2419         _MathObject ** thePoly = (_MathObject**)theData;
2420         if (theIndex)
2421             for (long i = 0; i<lDim; i++) {
2422                 if (IsNonEmpty(i)) {
2423                     thePoly[i]->ScanForVariables(theReceptacle,inclG,tagger, weights);
2424                 }
2425             }
2426         else
2427             for (long i = 0; i<lDim; i++) {
2428                 if (thePoly[i]!=ZEROPOINTER) {
2429                     thePoly[i]->ScanForVariables (theReceptacle,inclG,tagger, weights);
2430                 }
2431             }
2432     }
2433 
2434 }
2435 
2436 //_____________________________________________________________________________________________
2437 
IsConstant(void)2438 bool    _Matrix::IsConstant(void)
2439 {
2440     if (storageType == 1) {
2441         return true;
2442     }
2443 
2444     if (storageType == 2) { // a formula based matrix, there is stuff to do
2445         _Formula ** theFormulas = (_Formula**)theData;
2446         if (theIndex) {
2447             for (long i = 0; i<lDim; i++)
2448                 if (IsNonEmpty(i) && !theFormulas[i]->IsConstant()) {
2449                     return false;
2450                 }
2451         } else
2452             for (long i = 0; i<lDim; i++)
2453                 if (theFormulas[i]!=(_Formula*)ZEROPOINTER && !theFormulas[i]->IsConstant()) {
2454                     return false;
2455                 }
2456 
2457         return true;
2458 
2459     }
2460     return false;
2461 }
2462 
2463 //_____________________________________________________________________________________________
2464 
ProcessFormulas(long & stackLength,_AVLList & varList,_SimpleList & newFormulas,_SimpleList & references,_AVLListX & flaStrings,bool runAll,_Matrix * stencil)2465 bool        _Matrix::ProcessFormulas (long& stackLength, _AVLList& varList,   _SimpleList& newFormulas,
2466                                       _SimpleList& references, _AVLListX& flaStrings,
2467                                       bool runAll, _Matrix * stencil) {
2468     _Formula *      thisFormula = nil;
2469     _Formula **     theFormulas = (_Formula**)theData;
2470 
2471     bool isGood = true;
2472 
2473     if (theIndex) {
2474         for (long i = 0L; i<lDim; i++) {
2475             long cellIndex = theIndex [i];
2476             if (cellIndex>-1) {
2477                 if (stencil && CheckEqual(stencil->theData[cellIndex],0.0)) {
2478                     references << -1;
2479                     continue;
2480                 }
2481                 thisFormula = theFormulas[i];
2482 
2483                 if (runAll || thisFormula->AmISimple(stackLength,varList)) {
2484                     _String * flaString = (_String*)thisFormula->toStr(kFormulaStringConversionNormal, nil,true);
2485                     long      fref = flaStrings.Insert(flaString,newFormulas.lLength);
2486                     if (fref < 0) {
2487                         references << flaStrings.GetXtra (-fref-1);
2488                         DeleteObject (flaString);
2489                     } else {
2490                         newFormulas << (long)thisFormula;
2491                         references << fref;
2492                     }
2493 
2494                 } else {
2495                     isGood = false;
2496                     break;
2497                 }
2498             } else {
2499                 references << -1;
2500             }
2501         }
2502     } else {
2503         for (long i = 0L; i<lDim; i++) {
2504             if ((theFormulas[i]!=(_Formula*)ZEROPOINTER)&&(!theFormulas[i]->IsEmpty())) {
2505                 thisFormula = theFormulas[i];
2506 
2507                 if (stencil && CheckEqual(stencil->theData[i],0.0)) {
2508                     references << -1;
2509                     continue;
2510                 }
2511 
2512                 if (runAll || thisFormula->AmISimple(stackLength,varList)) {
2513                     _String * flaString = (_String*)thisFormula->toStr(kFormulaStringConversionNormal, nil,true);
2514                     long      fref = flaStrings.Insert(flaString,newFormulas.lLength);
2515                     if (fref < 0) {
2516                         references << flaStrings.GetXtra (-fref-1);
2517                         DeleteObject (flaString);
2518                     } else {
2519                         newFormulas << (long)thisFormula;
2520                         references << fref;
2521                     }
2522                 } else {
2523                     isGood = false;
2524                     break;
2525                 }
2526             } else {
2527                 references << -1;
2528             }
2529         }
2530     }
2531     return isGood;
2532 }
2533 
2534 //_____________________________________________________________________________________________
BranchLengthStencil(void) const2535 _Matrix*        _Matrix::BranchLengthStencil (void) const {
2536 
2537     _Matrix * stencil = (_Matrix*)hy_env::EnvVariableGet(hy_env::branch_length_stencil, MATRIX);
2538     if (stencil) {
2539         if (stencil->storageType == _NUMERICAL_TYPE && stencil->hDim==stencil->vDim && stencil->hDim == hDim) {
2540             stencil->CheckIfSparseEnough (true);
2541         } else {
2542             stencil = nil;
2543         }
2544     }
2545 
2546     return stencil;
2547 }
2548 
2549 //_____________________________________________________________________________________________
BranchLengthExpression(_Matrix * baseFreqs,bool mbf)2550 _String*        _Matrix::BranchLengthExpression (_Matrix* baseFreqs, bool mbf) {
2551     if (storageType == _FORMULA_TYPE) {
2552 
2553         long            stack_length = 0L;
2554 
2555         _SimpleList     new_formulas,
2556                         references;
2557 
2558         _List           converted_expressions;
2559         _AVLListX       converted_expressions_avl(&converted_expressions);
2560         _Matrix*        stencil = BranchLengthStencil();
2561 
2562         print_digit_specification = hy_env::EnvVariableGetDefaultNumber(hy_env::print_float_digits);
2563 
2564        _SimpleList varList = PopulateAndSort([&] (_AVLList & list) -> void {
2565            ProcessFormulas (stack_length,list,new_formulas,references,converted_expressions_avl,true,stencil);
2566        });
2567 
2568         _StringBuffer * sendMeBack = new _StringBuffer(256L);
2569 
2570         if (baseFreqs->is_numeric()) {
2571             // numerical base frequencies
2572             _Matrix   multipliersByRate (new_formulas.countitems(),1,false,true);
2573 
2574             ForEach([this, &multipliersByRate, &references, baseFreqs, mbf] (BaseRef object, unsigned long direct_index, unsigned long index) -> void {
2575                 long this_ref = references.get (index);
2576                 if (this_ref >= 0) {
2577                     multipliersByRate.set(0,this_ref) += (*baseFreqs)(direct_index/vDim,0) *
2578                                                          (mbf?(*baseFreqs)(direct_index%vDim,0):1.0);
2579                 }
2580 
2581             }, [] (unsigned long) -> BaseRef {return nil;});
2582 
2583 
2584             for (unsigned long k=0UL; k<new_formulas.countitems(); k++) {
2585                 hyFloat this_multiplier = multipliersByRate (k, 0);
2586 
2587                 if (!CheckEqual(this_multiplier,0.0)) {
2588                     if (sendMeBack->nonempty()) {
2589                         (*sendMeBack) << '+';
2590                     }
2591 
2592                     (*sendMeBack) << '('
2593                                   << (_String*)converted_expressions.GetItem(k)
2594                                   << ")*"
2595                                   << _String(this_multiplier, print_digit_specification);
2596 
2597                 }
2598             }
2599         } else if (baseFreqs->is_expression_based()) {
2600             // formula-based equilibrium frequencies
2601 
2602             _List   freqFla,
2603                     multipliersByRate;
2604 
2605             for (long k=0L; k<new_formulas.countitems(); k++) {
2606                 multipliersByRate.AppendNewInstance(new _StringBuffer (128L));
2607             }
2608 
2609             for (long k=0L; k<hDim; k++) {
2610                 freqFla.AppendNewInstance ((_String*)baseFreqs->GetFormula(k,0)->toStr(kFormulaStringConversionNormal, nil,true));
2611             }
2612 
2613             ForEach([this, &multipliersByRate, &references, baseFreqs, mbf, &freqFla] (BaseRef object, unsigned long direct_index, unsigned long index) -> void {
2614                 long this_ref = references.get (index);
2615                 if (this_ref >= 0L) {
2616                     _StringBuffer * thisAdder = (_StringBuffer*)multipliersByRate(this_ref);
2617                     if (thisAdder->nonempty()) {
2618                         (*thisAdder) << '+';
2619                     }
2620                     (*thisAdder) << '(';
2621                     if (mbf) {
2622                         (*thisAdder) << (_String*)freqFla(direct_index%vDim)
2623                         << ")*(";
2624                     }
2625                     (*thisAdder) << (_String*)freqFla(direct_index/vDim) << ')';
2626                 }
2627             }, [] (unsigned long) -> BaseRef {return nil;});
2628 
2629             for (long k=0L; k<new_formulas.countitems(); k++) {
2630                 ((_StringBuffer*)multipliersByRate(k))->TrimSpace();
2631                 if (k) {
2632                     (*sendMeBack) << '+';
2633                 }
2634 
2635                 (*sendMeBack) << '('
2636                     << (_String*)converted_expressions.GetItem(k)
2637                     << ")*("
2638                     << (_String*)multipliersByRate(k)
2639                     << ')';
2640             }
2641         }
2642         sendMeBack->TrimSpace();
2643         if (sendMeBack->nonempty()) {
2644             _Formula        blF (*sendMeBack);
2645             _Polynomial*    isPoly = (_Polynomial*)blF.ConstructPolynomial();
2646             if (isPoly) {
2647                 DeleteObject (sendMeBack);
2648                 sendMeBack = (_StringBuffer*)isPoly->toStr();
2649             }
2650         }
2651         return sendMeBack;
2652     }
2653     return new _String;
2654 }
2655 
2656 //_____________________________________________________________________________________________
MakeMeSimple(void)2657 void        _Matrix::MakeMeSimple (void) {
2658     if (is_expression_based()) {
2659         long            stackLength = 0L;
2660 
2661         _SimpleList     newFormulas,
2662                         references;
2663 
2664         _List           flaStringsL;
2665         _AVLListX       flaStrings(&flaStringsL);
2666 
2667 
2668         _SimpleList varListAux;
2669         _AVLList    varList (&varListAux);
2670 
2671 
2672         if (!is_dense()) {
2673             CompressSparseMatrix(false, (hyFloat*)alloca (sizeof (hyFloat) * lDim));
2674         }
2675         if (ProcessFormulas (stackLength,varList,newFormulas,references,flaStrings)) {
2676             storageType = _SIMPLE_FORMULA_TYPE;
2677 
2678             cmd                         = new _CompiledMatrixData;
2679             cmd->has_volatile_entries   = false;
2680 
2681             for (unsigned long k = 0; k < newFormulas.lLength; k++) {
2682                 cmd->has_volatile_entries = ((_Formula*)newFormulas.get(k))->ConvertToSimple(varList) || cmd->has_volatile_entries;
2683             }
2684 
2685             cmd->varIndex.Duplicate     (&varListAux);
2686             cmd->theStack               = (_SimpleFormulaDatum*)MatrixMemAllocate (stackLength*sizeof(_SimpleFormulaDatum));
2687             cmd->varValues              = (_SimpleFormulaDatum*)MatrixMemAllocate ((cmd->varIndex.countitems()>0?varList.countitems():1)*sizeof(_SimpleFormulaDatum));
2688             long allocation_size = MAX (references.lLength, 1) * sizeof (long);
2689             cmd->formulaRefs            = (long*)MemAllocate (allocation_size);
2690             memcpy (cmd->formulaRefs, references.list_data, allocation_size);
2691             cmd->formulaValues          = new hyFloat [newFormulas.lLength];
2692             cmd->formulasToEval.Duplicate (&newFormulas);
2693         }
2694 
2695     }
2696 }
2697 //_____________________________________________________________________________________________
MakeMeGeneral(void)2698 void        _Matrix::MakeMeGeneral (void) {
2699     if (storageType == _SIMPLE_FORMULA_TYPE) {
2700         for (long k = 0L; k < cmd->formulasToEval.lLength; k++) {
2701             ((_Formula*)cmd->formulasToEval.list_data[k])->ConvertFromSimpleList(cmd->varIndex);
2702         }
2703 
2704         delete [] cmd->formulaValues;
2705         free   (cmd->formulaRefs);
2706 
2707         MatrixMemFree   (cmd->theStack);
2708         MatrixMemFree   (cmd->varValues);
2709         delete          (cmd);
2710         cmd             = nil;
2711         storageType     = _FORMULA_TYPE;
2712     }
2713 }
2714 //_____________________________________________________________________________________________
Evaluate(bool replace)2715 HBLObjectRef   _Matrix::Evaluate (bool replace)
2716 // evaluate the matrix  overwriting (or not) the old one
2717 {
2718     _Matrix result (hDim, vDim, bool (theIndex), true);
2719 
2720     if (storageType == 2) {
2721         HBLObjectRef formValue = nil;
2722         _Formula ** theFormulas = (_Formula**)theData;
2723         if (theIndex) {
2724             for (long i = 0; i<lDim; i++) {
2725                 //long k =
2726                 if (theIndex[i]!=-1) {
2727                     formValue = theFormulas[i]->Compute();
2728                     if (formValue) {
2729                         result[HashBack(i)] = formValue->Value();
2730                         //DeleteObject (formValue);
2731                     } else {
2732                         result[HashBack(i)] = 0;
2733                     }
2734                 }
2735             }
2736             // check for probablilty matrices * fillers
2737             if ((hDim==vDim)&&(!replace))
2738                 for (long i = 0; i<hDim; i++) {
2739                     long k = Hash(i,i);
2740                     if ((k>=0)&&theFormulas[k]->IsEmpty()) {
2741                         hyFloat *st = &result[k];
2742                         *st=0;
2743                         for (long j = 0; j<vDim; j++) {
2744                             if (j==i) {
2745                                 continue;
2746                             }
2747                             *st-=result(i,j);
2748                         }
2749                     } else if (k<0) {
2750                         hyFloat *st = &result[i*vDim+i];
2751                         *st=0;
2752                         for (long j = 0; j<vDim; j++) {
2753                             if (j==i) {
2754                                 continue;
2755                             }
2756                             *st-=result(i,j);
2757                         }
2758                     }
2759                 }
2760         } else {
2761             for (long i = 0; i<lDim; i++) {
2762                 if (theFormulas[i]!=(_Formula*)ZEROPOINTER) {
2763                     formValue = theFormulas[i]->Compute();
2764                     if (formValue && formValue->ObjectClass() == NUMBER) {
2765                         result.theData[i] = formValue->Value();
2766                         //DeleteObject (formValue);
2767                     } else {
2768                         result.theData[i] = 0;
2769                     }
2770                 }
2771             }
2772             // check for probablilty matrices * fillers
2773 
2774             if ((hDim==vDim)&&(!replace))
2775                 for (long i = 0; i<lDim; i+=vDim+1) {
2776                     if (theFormulas[i]!=(_Formula*)ZEROPOINTER) {
2777                         if (theFormulas[i]->IsEmpty()) {
2778                             hyFloat st = 0;
2779                             long k = i/vDim,j;
2780                             for (j = k*vDim; j<k*vDim+k; j++) {
2781                                 st-=result.theData[j];
2782                             }
2783                             for (j = k*vDim+k+1; j<(k+1)*vDim; j++) {
2784                                 st-=result.theData[j];
2785                             }
2786                             result.theData[i] = st;
2787                         }
2788                     }
2789                 }
2790         }
2791     }
2792     if (storageType == 0) {
2793         HBLObjectRef polValue = nil;
2794         _MathObject ** thePoly = (_MathObject**)theData;
2795         if (theIndex) {
2796             for (long i = 0; i<lDim; i++) {
2797                 if (IsNonEmpty(i)) {
2798                     polValue = thePoly[i]->Compute();
2799                     if (polValue) {
2800                         result[HashBack(i)] = polValue->Value();
2801                         DeleteObject (polValue);
2802                     } else {
2803                         result[i] = 0;
2804                     }
2805                 }
2806             }
2807 
2808         } else {
2809             for (long i = 0; i<lDim; i++) {
2810                 if (thePoly[i]!=(_MathObject*)ZEROPOINTER) {
2811                     polValue = thePoly[i]->Compute();
2812                     if (polValue) {
2813                         result[i] = polValue->Value();
2814                         DeleteObject (polValue);
2815                     } else {
2816                         result[i] = 0;
2817                     }
2818                 }
2819             }
2820         }
2821     }
2822     if (replace) {
2823         *this = result;
2824     } else {
2825         return (HBLObjectRef)result.makeDynamic();
2826     }
2827     return nil;
2828 }
2829 
2830 //_____________________________________________________________________________________________
ConvertToSimpleList(_SimpleList & sl)2831 void        _Matrix::ConvertToSimpleList (_SimpleList & sl)
2832 {
2833     sl.Clear();
2834     if (storageType == _NUMERICAL_TYPE) {
2835         sl.RequestSpace (hDim*vDim+1);
2836 
2837         for (long i=0; i<hDim; i++)
2838             for (long j=0; j<vDim; j++) {
2839                 sl << (*this)(i,j);
2840             }
2841     } else {
2842       if (storageType == _FORMULA_TYPE) {
2843         _Matrix * c = ((_Matrix*)Compute());
2844         if (c->storageType == _NUMERICAL_TYPE) {
2845           c -> ConvertToSimpleList (sl);
2846         }
2847       }
2848     }
2849 }
2850 
2851 //_____________________________________________________________________________________________
IsAStringMatrix(void) const2852 bool        _Matrix::IsAStringMatrix (void) const
2853 // check if a formula matrix contains strings
2854 {
2855     if (is_expression_based()) {
2856         try {
2857             return Any ([&] (_Formula * f, unsigned long) -> bool {
2858                             if (f) {
2859                                 if (f->ObjectClass() == STRING)
2860                                     return true;
2861                                 throw (0);
2862                             }
2863                             return false;
2864                         },
2865                         [&] (unsigned long i) -> _Formula * {return ((_Formula**)theData)[i];});
2866 
2867         } catch (int ) {
2868             return false;
2869         }
2870 
2871     }
2872     return false;
2873 }
2874 
2875 //_____________________________________________________________________________________________
FillInList(_List & fillMe,bool convert_numbers) const2876 void        _Matrix::FillInList (_List& fillMe, bool convert_numbers) const {
2877 // check if a formula matrix contains strings
2878     if (is_expression_based()) {
2879           for (unsigned long r=0UL; r<hDim; r++)
2880               for (unsigned long c=0UL; c<vDim; c++) {
2881                   _Formula * entryFla = GetFormula(r,c);
2882                   if (entryFla) {
2883                       HBLObjectRef computedValue = FetchObjectFromFormulaByType (*entryFla, STRING);
2884                       if (computedValue) {
2885                           fillMe < new _StringBuffer (((_FString*)computedValue)->get_str());
2886                       } else {
2887                         fillMe.Clear();
2888                         return;
2889                       }
2890                   }
2891               }
2892     } else {
2893         if (convert_numbers && is_numeric()) {
2894             for (unsigned long r=0UL; r<hDim; r++) {
2895                 for (unsigned long c=0UL; c<vDim; c++) {
2896                     fillMe.AppendNewInstance (new _String ((*this)(r,c)));
2897                 }
2898             }
2899         }
2900     }
2901 }
2902 
2903 //_____________________________________________________________________________________________
_validateCompressedStorage(void) const2904 bool   _Matrix::_validateCompressedStorage (void) const {
2905     if (theIndex && compressedIndex) {
2906         long from = 0L;
2907         long last_index = -1L;
2908         for (long r = 0; r < hDim; r++) {
2909             if (compressedIndex[r] < from || compressedIndex[r] > lDim ) {
2910                 HandleApplicationError(_String ("Inconsistent compressedIndex row element count at " ) & r & " : " & from & " vs " & compressedIndex[r]);
2911                 return false;
2912             }
2913             for (long c = from; c < compressedIndex[r]; c++) {
2914                 long myIndex = theIndex[c];
2915                 if (myIndex <= last_index) {
2916                     HandleApplicationError(_String ("Lack of sortedness in theIndex at " ) & c & " : " & myIndex & " vs " & last_index);
2917                     return false;
2918                 }
2919                 if (myIndex < 0 || myIndex >= hDim * vDim) {
2920                     HandleApplicationError(_String ("Out of bounds in theIndex at " ) & c & " : " & myIndex & ", lDim = " & lDim);
2921                     return false;
2922                 }
2923                 last_index = myIndex;
2924                 if (c > from) {
2925                     if (compressedIndex[c+hDim] <= compressedIndex [c-1+hDim]) {
2926                         HandleApplicationError(_String ("Lack of sortedness in columns at " ) & c & " : " & compressedIndex[c+hDim] & " vs " & compressedIndex[c+hDim-1]);
2927                         return false;
2928                     }
2929                 }
2930                 if (myIndex / vDim != r || myIndex % hDim != compressedIndex[c+vDim]) {
2931                     HandleApplicationError(_String ("Stored index does match row/column " ) & myIndex & "(" & c & ") : " & r & " , " & compressedIndex[c+hDim]);
2932                     return false;
2933                 }
2934             }
2935             from = compressedIndex[r];
2936         }
2937 
2938         if (compressedIndex[hDim-1] != lDim) {
2939             HandleApplicationError(_String ("Incompatible compressedIndex[hDim-1] and lDim: " ) & compressedIndex[hDim-1] & " : " & lDim);
2940             return false;
2941         }
2942 
2943         return true;
2944     }
2945     return false;
2946 }
2947 
2948 //_____________________________________________________________________________________________
EvaluateSimple(_Matrix * existing_storage)2949 HBLObjectRef   _Matrix::EvaluateSimple (_Matrix* existing_storage) {
2950 // evaluate the matrix  overwriting the old one
2951     _Matrix * result;
2952 
2953     if (existing_storage && existing_storage->hDim == hDim && existing_storage->vDim == vDim && existing_storage->is_numeric() && ((bool)existing_storage->theIndex == (bool)theIndex)) {
2954         existing_storage->ZeroNumericMatrix();
2955         result = existing_storage;
2956     } else {
2957         if (existing_storage) {
2958             DeleteObject (existing_storage);
2959         }
2960         result = new _Matrix (hDim, vDim, bool (theIndex), true);
2961     }
2962 
2963 
2964     if (cmd->varIndex.lLength) {
2965         for (long i=0; i<cmd->varIndex.lLength; i++) {
2966             _Variable* curVar = LocateVar(cmd->varIndex.list_data[i]);
2967             if (curVar->ObjectClass () != MATRIX) {
2968                 if (curVar->IsIndependent()) {
2969                     cmd->varValues[i].value = LocateVar (cmd->varIndex.list_data[i])->Value();
2970                 } else {
2971                     cmd->varValues[i].value = LocateVar (cmd->varIndex.list_data[i])->Compute()->Value();
2972                 }
2973             } else {
2974                 cmd->varValues[i].reference = (hyPointer)((_Matrix*)LocateVar (cmd->varIndex.list_data[i])->Compute())->theData;
2975             }
2976         }
2977     }
2978 
2979 
2980     for (long f = 0L; f < cmd->formulasToEval.lLength; f++) {
2981         cmd->formulaValues [f] = ((_Formula*)cmd->formulasToEval.list_data[f])->ComputeSimple(cmd->theStack, cmd->varValues);
2982     }
2983 
2984     long * fidx = cmd->formulaRefs;
2985 
2986     if (theIndex) {
2987 
2988         result->bufferPerRow = bufferPerRow;
2989         result->overflowBuffer = overflowBuffer;
2990         result->allocationBlock = allocationBlock;
2991 
2992         long* diagIndices = nil;
2993 
2994         if (compressedIndex) {
2995             if (result->lDim < lDim + hDim) {
2996                 result->theIndex = (long*)MemReallocate((hyPointer)result->theIndex,sizeof(long)*(lDim + hDim));
2997                 result->theData = (hyFloat*)MemReallocate ((hyPointer)result->theData,sizeof(hyFloat)*(lDim+hDim));
2998 
2999             }
3000 
3001             result->lDim = lDim;
3002 
3003             if (result->compressedIndex) {
3004                 result->compressedIndex = (long*)MemReallocate ((hyPointer)result->compressedIndex,sizeof(long)*(lDim+hDim+hDim));
3005             } else {
3006                 result->compressedIndex = (long*)MemAllocate (sizeof(long)*(lDim+hDim+hDim));
3007             }
3008 
3009             if (hDim == vDim) {
3010 
3011                 long elements_added                   = 0L;
3012                 long current_element_index_old_matrix = 0L;
3013                 long current_element_index_new_matrix = 0L;
3014                 long from = 0L;
3015                 auto copy_indices = [&] () -> void {
3016                     result->theIndex[current_element_index_new_matrix] = theIndex[current_element_index_old_matrix];
3017                     result->compressedIndex[current_element_index_new_matrix+hDim] = compressedIndex[hDim+current_element_index_old_matrix];
3018                 };
3019                 diagIndices = (long*)alloca (sizeof (long) * hDim);
3020                 auto inject_diagonal = [&] (long r) -> void {
3021                     elements_added++;
3022                     result->lDim ++;
3023                     diagIndices [r] = current_element_index_new_matrix;
3024                     result->theIndex[current_element_index_new_matrix] = r*vDim + r;
3025                     result->theData[current_element_index_new_matrix] = 0.;
3026                     result->compressedIndex[current_element_index_new_matrix+hDim] = r;
3027                     current_element_index_new_matrix++;
3028                 };
3029                 /*if (!_validateCompressedStorage()) {
3030                     HandleApplicationError("Error in compressed storage [before]");
3031                 }*/
3032                 for (long r = 0; r < hDim; r++) {
3033                     diagIndices[r] = -1L;
3034                     for (long c = from; c < compressedIndex[r]; c++, current_element_index_old_matrix++, current_element_index_new_matrix++) {
3035                         if (compressedIndex[c + hDim] < r) { // column before diagonal; copy data
3036                             result->theData[current_element_index_new_matrix] = cmd->formulaValues[fidx[current_element_index_old_matrix]];
3037                             copy_indices();
3038                         } else if (compressedIndex[c + hDim] > r) {
3039                             if (diagIndices[r] == -1) { // no diagonal entry
3040                                 inject_diagonal(r);
3041                             }
3042                             result->theData[current_element_index_new_matrix] = cmd->formulaValues[fidx[current_element_index_old_matrix]];
3043                             copy_indices();
3044                         } else { // diagnoal entry
3045                            copy_indices();
3046                            diagIndices[r] = current_element_index_new_matrix;
3047                         }
3048                     }
3049                     if (diagIndices[r] == -1) { // no diagonal entry
3050                         inject_diagonal(r);
3051                     }
3052                     from = compressedIndex[r];
3053                     result->compressedIndex[r] = from+elements_added;
3054 
3055                 }
3056                 /*if (!result->_validateCompressedStorage()) {
3057                     HandleApplicationError("Error in compressed storage");
3058                 }*/
3059             } else {
3060                 for (long i = 0; i<lDim; i++) {
3061                     long idx = theIndex[i];
3062                     result->theData[i] = cmd->formulaValues[fidx[i]];
3063                     result->theIndex[i] = idx;
3064                     result->compressedIndex[i] = compressedIndex[i];
3065                 }
3066                 for (long i = lDim; i<lDim+hDim; i++) {
3067                     result->compressedIndex[i] = compressedIndex[i];
3068                 }
3069             }
3070 
3071         } else {
3072 
3073             if (result->lDim != lDim) {
3074                 result->lDim = lDim;
3075                 result->theIndex = (long*)MemReallocate((hyPointer)result->theIndex,sizeof(long)*lDim);
3076                 result->theData = (hyFloat*)MemReallocate ((hyPointer)result->theData,sizeof(hyFloat)*lDim);
3077             }
3078 
3079             for (long i = 0; i<lDim; i++) {
3080                 long idx = theIndex[i];
3081 
3082                 if (idx != -1) {
3083                     result->theData[i] = cmd->formulaValues[fidx[i]];
3084                 }
3085 
3086                 result->theIndex[i] = idx;
3087             }
3088         }
3089 
3090 
3091         if (hDim==vDim) {
3092 
3093             if (result->compressedIndex) {
3094                 long from = 0L;
3095                 for (long r = 0; r < hDim; r++) {
3096                     //printf ("%ld\n", diagIndices[r]);
3097                     long di = diagIndices[r];
3098                     for (long c = from; c < result->compressedIndex[r]; c++) {
3099                         //printf ("%ld %g\n", c, result->theData[c]);
3100                         if (c != di) {
3101                             result->theData[di] -= result->theData[c];
3102                         }
3103                     }
3104                     from = result->compressedIndex[r];
3105                 }
3106                 /*for (long r = 0; r < hDim; r++) {
3107                     printf ("%ld %g\n", diagIndices[r], result->theData[diagIndices[r]]);
3108                 }
3109                 exit (0);*/
3110             }
3111             else {
3112                 hyFloat* diagStorage = (hyFloat*)alloca (sizeof(hyFloat) * hDim);
3113                 memset (diagStorage, 0, sizeof(hyFloat) * hDim);
3114                 for (long i = 0; i<lDim; i++) {
3115                     long k = result->theIndex[i];
3116                     if (k!=-1) {
3117                         diagStorage[k/hDim] += result->theData[i];
3118                     }
3119                 }
3120                 for (long i = 0; i<hDim; i++) {
3121                      (*result)[i*hDim+i] = -diagStorage[i];
3122                 }
3123            }
3124 
3125 
3126         }
3127     } else {
3128 
3129         for (long i = 0; i<lDim; i++) {
3130             if (fidx[i]>= 0) {
3131                 result->theData[i] = cmd->formulaValues[fidx[i]];
3132             }
3133         }
3134 
3135         if (hDim==vDim)
3136             for (long i = 0L, r = 0L; i<lDim; i+=vDim+1L, r++) {
3137                 if (fidx[i] < 0) { // mod Aug 2 2005
3138                     //if (theFormulas[i]->IsEmpty())
3139                     //{
3140 
3141                     hyFloat st = 0.;
3142                     long j;
3143 
3144                     for (j = r*vDim; j<r*vDim+r; j++) {
3145                         st += result->theData[j];
3146                     }
3147 
3148                     for (j = r*vDim+r+1; j<(r+1)*vDim; j++) {
3149                         st += result->theData[j];
3150                     }
3151 
3152                     result->theData[i] = -st;
3153                     //}
3154                 }
3155             }
3156     }
3157     //return (_PMathObj)result.makeDynamic();
3158     return result;
3159 }
3160 //_____________________________________________________________________________________________
ClearFormulae(void)3161 void    _Matrix::ClearFormulae (void)
3162 {
3163     _Formula ** theFormulas = (_Formula**)theData;
3164     if (theIndex) {
3165         for (long i = 0; i<lDim; i++) {
3166             if (IsNonEmpty(i)) {
3167                 delete (theFormulas[i]);
3168             }
3169         }
3170     } else
3171         for (long i = 0; i<lDim; i++) {
3172             if (theFormulas[i]!=(_Formula*)ZEROPOINTER) {
3173                 delete (theFormulas[i]);
3174             }
3175         }
3176 }
3177 
3178 //_____________________________________________________________________________________________
ClearObjects(void)3179 void    _Matrix::ClearObjects (void)
3180 {
3181     _MathObject ** thePolys = (_MathObject**)theData;
3182     if (theIndex) {
3183         for (long i = 0; i<lDim; i++) {
3184             if (IsNonEmpty(i)) {
3185                 DeleteObject (thePolys[i]);
3186             }
3187         }
3188     } else
3189         for (long i = 0; i<lDim; i++) {
3190             if (thePolys[i]!=(_MathObject*)ZEROPOINTER) {
3191                 DeleteObject (thePolys[i]);
3192             }
3193         }
3194 }
3195 
3196 //_____________________________________________________________________________________________
3197 
Clear(bool complete)3198 void    _Matrix::Clear (bool complete) {
3199     DeleteObject (theValue);
3200     if (is_expression_based()) { // has formulas in it - must delete
3201         ClearFormulae();
3202     }
3203     if (is_polynomial()) { // has objects in it - must delete
3204         ClearObjects();
3205     }
3206 
3207     if (theIndex) {
3208         if (complete) {
3209             MatrixMemFree (theIndex);
3210             theIndex = nil;
3211         } else {
3212             InitializeArray(theIndex, lDim, -1L);
3213         }
3214     }
3215     if (theData) {
3216         if (complete) {
3217             MatrixMemFree (theData);
3218             hDim = vDim = 0;
3219             theData = nil;
3220         } else {
3221             memset (theData, 0, lDim * (is_numeric() ? sizeof (hyFloat): sizeof (void*)));
3222         }
3223     }
3224     if (compressedIndex) {
3225         MatrixMemFree (compressedIndex);
3226         compressedIndex = nil;
3227     }
3228 
3229 }
3230 
3231 //_____________________________________________________________________________________________
3232 
ZeroNumericMatrix(void)3233 void    _Matrix::ZeroNumericMatrix (void) {
3234     if (is_numeric()) {
3235         memset (theData, 0, sizeof (hyFloat) * lDim);
3236         if (!is_dense()) {
3237             InitializeArray (theIndex, lDim, -1L);
3238         }
3239     }
3240 }
3241 
3242 //_____________________________________________________________________________________________
3243 
Resize(long newH)3244 void    _Matrix::Resize (long newH) {
3245     if (newH >= 0 && newH != hDim && is_numeric() && is_dense()) {
3246         hDim = newH;
3247         lDim = newH*vDim;
3248 
3249         if (theData) {
3250             theData = (hyFloat*) MemReallocate ((hyPointer)theData,sizeof (hyFloat)*lDim);
3251         } else {
3252             theData = (hyFloat*) MemAllocate (sizeof (hyFloat)*lDim);
3253         }
3254     }
3255 }
3256 
3257 //_____________________________________________________________________________________________
3258 
~_Matrix(void)3259 _Matrix::~_Matrix (void) {
3260     _Matrix::Clear();
3261 }
3262 
3263 //_____________________________________________________________________________________________
3264 
operator =(_Matrix const & m)3265 _Matrix const&    _Matrix::operator = (_Matrix const& m) {
3266     // SLKP 20180917 : reuse memory if copying dense numeric matrices of the same dimension
3267     if (m.is_numeric() && is_numeric() && CanFreeMe() && m.theIndex == nil && theIndex == nil && m.GetHDim () == GetHDim () && GetVDim () == m.GetVDim()) {
3268       unsigned long i = 0UL;
3269       for (unsigned long r = 0UL; r < hDim; r++) {
3270         for (unsigned long c = 0UL; c < vDim; c++, i++) {
3271           theData[i] = m.theData[i];
3272         }
3273       }
3274     } else {
3275       Clear();
3276       DuplicateMatrix (this, &m);
3277     }
3278     return *this;
3279 }
3280 
3281 //_____________________________________________________________________________________________
3282 
operator =(_Matrix const * m)3283 _Matrix const&    _Matrix::operator = (_Matrix const* m) {
3284     //Clear();
3285     //DuplicateMatrix (this, m);
3286     *this = *m;
3287     return *this;
3288 }
3289 
3290 
3291 //_____________________________________________________________________________________________
AbsValue(void) const3292 hyFloat _Matrix::AbsValue (void) const{
3293     if (is_numeric() && (is_row() || is_column())) {
3294         hyFloat norm = 0.;
3295 
3296         this->ForEach ([&] (hyFloat&& value, unsigned long, long) -> void {norm += value * value;},
3297                  [&] (unsigned long index) -> hyFloat {return theData[index];});
3298 
3299         return sqrt(norm);
3300     }
3301 
3302     return 0.;
3303 }
3304 
3305 //_____________________________________________________________________________________________
Abs(HBLObjectRef cache)3306 HBLObjectRef _Matrix::Abs (HBLObjectRef cache)
3307 {
3308     if (storageType == 1 && (hDim==1 || vDim == 1)) {
3309         return _returnConstantOrUseCache(AbsValue(), cache);
3310     }
3311     return _returnConstantOrUseCache(MaxElement(), cache);
3312 
3313 }
3314 
3315 //_____________________________________________________________________________________________
3316 
AddMatrix(_Matrix & storage,_Matrix & secondArg,bool subtract)3317 void    _Matrix::AddMatrix  (_Matrix& storage, _Matrix& secondArg, bool subtract)
3318 // addition operation on matrices
3319 // internal function
3320 
3321 {
3322 
3323     // check matrix dimensions to ensure that they are addable
3324     if (!((hDim==secondArg.hDim)&&(storage.hDim==secondArg.hDim)&&(vDim==secondArg.vDim)&&(storage.vDim==secondArg.vDim))) {
3325         HandleApplicationError  (_String ("Incompatible dimensions when trying to add or subtract matrices: first argument was a ") & _String (hDim) & 'x'
3326                           & _String (vDim) & " matrix and the second was a "& _String (secondArg.hDim) & 'x'  & _String (secondArg.vDim) & " matrix.");
3327         return;
3328     }
3329 
3330     if (is_numeric()) {
3331         if (&storage != this) { // not an add&store operation
3332             // copy *this to storage
3333             if (theIndex) { //sparse matrix
3334                 for (long i = 0; i<lDim; i++) {
3335                     long k = theIndex[i];
3336                     if (k!=-1) {
3337                         storage[k] = theData[i];
3338                     }
3339                 }
3340             } else { // dense matrix
3341                 memcpy (storage.theData, theData, sizeof (hyFloat)*lDim);
3342             }
3343         }
3344 
3345         if (secondArg.theIndex) { //sparse matrix
3346             if (storage.theIndex) {
3347                 if (subtract) {
3348                     for (long i = 0; i<secondArg.lDim; i++) {
3349                         long k = secondArg.theIndex[i];
3350                         if (k!=-1) {
3351                             storage[k]-=secondArg.theData[i];
3352                         }
3353                     }
3354                 } else {
3355                     for (long i = 0; i<secondArg.lDim; i++) {
3356                         long k = secondArg.theIndex[i];
3357                         if (k!=-1) {
3358                             storage[k]+=secondArg.theData[i];
3359                         }
3360                     }
3361                 }
3362             } else {
3363                 if (subtract) {
3364                     for (long i = 0; i<secondArg.lDim; i++) {
3365                         long k = secondArg.theIndex[i];
3366                         if (k!=-1) {
3367                             storage.theData[k]-=secondArg.theData[i];
3368                         }
3369                     }
3370                 } else {
3371                     for (long i = 0; i<secondArg.lDim; i++) {
3372                         long k = secondArg.theIndex[i];
3373                         if (k!=-1) {
3374                             storage.theData[k]+=secondArg.theData[i];
3375                         }
3376                     }
3377                 }
3378             }
3379 
3380         } else {
3381             hyFloat * _hprestrict_ argData = secondArg.theData;
3382             hyFloat * _hprestrict_ stData  = storage.theData;
3383 
3384             long    upto = secondArg.lDim >> 4 << 4;
3385 
3386             if (subtract) {
3387 #ifdef  _SLKP_USE_AVX_INTRINSICS
3388         #define     CELL_OP1(x,y) __m256d y = _mm256_sub_pd (_mm256_loadu_pd (stData+x), _mm256_loadu_pd (argData+x))
3389         #define CELL_OP2(x,y) _mm256_storeu_pd (stData+x,y)
3390 
3391         #pragma GCC unroll 4
3392         #pragma clang loop vectorize(enable)
3393         #pragma clang loop interleave(enable)
3394         #pragma clang loop unroll(enable)
3395         #pragma GCC ivdep
3396         #pragma ivdep
3397                for (long idx = 0; idx < upto; idx+=16) {
3398                     CELL_OP1 (idx,r1);
3399                     CELL_OP1 (idx+4,r2);
3400                     CELL_OP1 (idx+8,r3);
3401                     CELL_OP1 (idx+12,r4);
3402                     CELL_OP2 (idx,r1);
3403                     CELL_OP2 (idx+4,r2);
3404                     CELL_OP2 (idx+8,r3);
3405                     CELL_OP2 (idx+12,r4);
3406                 }
3407 #elif defined  _SLKP_USE_ARM_NEON
3408         #define     CELL_OP1(x,y) float64x2_t y = vsubq_f64 (vld1q_f64 (stData+x), vld1q_f64 (argData+x))
3409         #define CELL_OP2(x,y) vst1q_f64 (stData+x,y)
3410         #pragma GCC unroll 4
3411         #pragma clang loop vectorize(enable)
3412         #pragma clang loop interleave(enable)
3413         #pragma clang loop unroll(enable)
3414         #pragma GCC ivdep
3415         #pragma ivdep
3416                for (long idx = 0; idx < upto; idx+=8) {
3417                     CELL_OP1 (idx,r1);
3418                     CELL_OP1 (idx+2,r2);
3419                     CELL_OP1 (idx+4,r3);
3420                     CELL_OP1 (idx+6,r4);
3421                     CELL_OP2 (idx,r1);
3422                     CELL_OP2 (idx+2,r2);
3423                     CELL_OP2 (idx+4,r3);
3424                     CELL_OP2 (idx+6,r4);
3425                 }
3426 
3427 #else
3428                 for (long idx = 0; idx < upto; idx+=4) {
3429                     stData[idx]-=argData[idx];
3430                     stData[idx+1]-=argData[idx+1];
3431                     stData[idx+2]-=argData[idx+2];
3432                     stData[idx+3]-=argData[idx+3];
3433                 }
3434 #endif
3435             } else {
3436 #ifdef  _SLKP_USE_AVX_INTRINSICS
3437             #define     CELL_OP(x) _mm256_storeu_pd (stData+x, _mm256_add_pd (_mm256_loadu_pd (stData+x), _mm256_loadu_pd (argData+x)))
3438 
3439 
3440             #pragma GCC unroll 4
3441             #pragma clang loop vectorize(enable)
3442             #pragma clang loop interleave(enable)
3443             #pragma clang loop unroll(enable)
3444                  for (long idx = 0; idx < upto; idx+=16) {
3445                      CELL_OP (idx);
3446                      CELL_OP (idx+4);
3447                      CELL_OP (idx+8);
3448                      CELL_OP (idx+12);
3449                  }
3450 
3451 #elif defined  _SLKP_USE_ARM_NEON
3452         #define     CELL_OP1(x,y) float64x2_t y = vaddq_f64 (vld1q_f64 (stData+x), vld1q_f64 (argData+x))
3453         #define CELL_OP2(x,y) vst1q_f64 (stData+x,y)
3454         #pragma GCC unroll 4
3455         #pragma clang loop vectorize(enable)
3456         #pragma clang loop interleave(enable)
3457         #pragma clang loop unroll(enable)
3458         #pragma GCC ivdep
3459         #pragma ivdep
3460                for (long idx = 0; idx < upto; idx+=8) {
3461                     CELL_OP1 (idx,r1);
3462                     CELL_OP1 (idx+2,r2);
3463                     CELL_OP1 (idx+4,r3);
3464                     CELL_OP1 (idx+6,r4);
3465                     CELL_OP2 (idx,r1);
3466                     CELL_OP2 (idx+2,r2);
3467                     CELL_OP2 (idx+4,r3);
3468                     CELL_OP2 (idx+6,r4);
3469                 }
3470 
3471 #else
3472                 for (long idx = 0; idx < upto; idx+=4) {
3473                     stData[idx]+=argData[idx];
3474                     stData[idx+1]+=argData[idx+1];
3475                     stData[idx+2]+=argData[idx+2];
3476                     stData[idx+3]+=argData[idx+3];
3477                 }
3478 #endif
3479             }
3480             if (subtract)
3481                 for (long idx = upto; idx < secondArg.lDim; idx++) {
3482                     stData[idx]-=argData[idx];
3483                  }
3484             else
3485                 for (long idx = upto; idx < secondArg.lDim; idx++) {
3486                     stData[idx]+=argData[idx];
3487                  }
3488 
3489         }
3490     } else
3491 
3492         if (storageType == 0) {
3493             long i;
3494             if (&storage != this) { // not an add&store operation
3495                 /*              if (theIndex) //sparse matrix
3496                                 {
3497                                     for (i = 0; i<lDim; i++)
3498                                         if (IsNonEmpty(i))
3499                                             storage.StoreObject(theIndex[i],GetMatrixObject(i),true);
3500                                 }
3501                                 else // normal matrix
3502                                 {
3503                                     for (i = 0; i<lDim; i++)
3504                                         storage.StoreObject(i,GetMatrixObject(i),true);
3505                                 }*/
3506             }
3507 
3508             if (secondArg.theIndex) { //sparse matrix
3509                 if (theIndex) { // both matrices are sparse
3510                     if (subtract) {
3511                         for (i = 0; i<secondArg.lDim; i++)
3512                             if (secondArg.IsNonEmpty(i)) {
3513                                 long hb =secondArg.HashBack (i), h = Hash (hb/vDim, hb%vDim);
3514                                 if (h<0) { // kEmptyString slot in matrix 1
3515                                     storage.StoreObject (hb,secondArg.GetMatrixObject(i)->Minus());
3516                                 } else {
3517                                     storage.StoreObject (hb, GetMatrixObject(h)->Sub (secondArg.GetMatrixObject(i)));
3518                                 }
3519                             }
3520                     } else {
3521                         for (i = 0; i<secondArg.lDim; i++)
3522                             if (secondArg.IsNonEmpty(i)) {
3523                                 long hb =secondArg.HashBack (i), h = Hash (hb/vDim, hb%vDim);
3524                                 if (h<0) { // kEmptyString slot in matrix 1
3525                                     storage.StoreObject (hb,secondArg.GetMatrixObject(i),true);
3526                                 } else {
3527                                     storage.StoreObject (hb,GetMatrixObject(h)->Add (secondArg.GetMatrixObject(i)));
3528                                 }
3529                             }
3530                     }
3531                 } else { // *this is not sparse
3532                     DuplicateMatrix(&storage,this);
3533                     if (subtract) {
3534                         for (i = 0; i<secondArg.lDim; i++)
3535                             if (secondArg.IsNonEmpty(i)) {
3536                                 long p = secondArg.HashBack (i);
3537                                 if (CheckObject(p)) {
3538                                     storage.StoreObject (p,GetMatrixObject(p)->Sub(secondArg.GetMatrixObject(i)));
3539                                 } else {
3540                                     storage.StoreObject (p,secondArg.GetMatrixObject(i)->Minus());
3541                                 }
3542                             }
3543                     } else {
3544                         for (i = 0; i<secondArg.lDim; i++)
3545                             if (secondArg.IsNonEmpty(i)) {
3546                                 long p = secondArg.HashBack (i);
3547                                 if (CheckObject(p)) {
3548                                     storage.StoreObject (p,GetMatrixObject(p)->Add(secondArg.GetMatrixObject(i)));
3549                                 } else {
3550                                     storage.StoreObject (p,secondArg.GetMatrixObject(i),true);
3551                                 }
3552                             }
3553                     }
3554                 }
3555             } else { // secondarg isn't sparse - storage must also be non-sparse
3556                 if (storage.theIndex) { // storage is sparse - oops
3557                     storage.CheckIfSparseEnough(true);    // force to non-sparse storage
3558                 }
3559                 if (!theIndex) { // * this is not sparse
3560                     HBLObjectRef tempP;
3561                     DuplicateMatrix(&storage,this);
3562                     if (subtract) {
3563                         for (i = 0; i<secondArg.lDim; i++) {
3564                             tempP = secondArg.GetMatrixObject(i);
3565                             if (tempP) {
3566                                 if (CheckObject(i)) {
3567                                     storage.StoreObject(i,GetMatrixObject(i)->Sub(tempP));
3568                                 } else {
3569                                     storage.StoreObject(i,tempP->Minus());
3570                                 }
3571                             }
3572                         }
3573                     } else {
3574                         for (i = 0; i<secondArg.lDim; i++) {
3575                             tempP = secondArg.GetMatrixObject(i);
3576                             if (tempP) {
3577                                 if (CheckObject(i)) {
3578                                     storage.StoreObject(i,GetMatrixObject(i)->Add(tempP));
3579                                 } else {
3580                                     storage.StoreObject(i,tempP,true);
3581                                 }
3582                             }
3583                         }
3584                     }
3585                 } else { // *this is sparse
3586                     HBLObjectRef tempP;
3587                     long h;
3588                     if (subtract) {
3589                         for (i = 0; i<secondArg.lDim; i++) {
3590                             tempP = secondArg.GetMatrixObject(i);
3591                             if (tempP) {
3592                                 h = Hash (i/hDim,i%hDim);
3593                                 if (h>=0) {
3594                                     storage.StoreObject(i,GetMatrixObject(h)->Sub(tempP));
3595                                 } else {
3596                                     storage.StoreObject(i,tempP->Minus());
3597                                 }
3598                             }
3599                         }
3600                     } else {
3601                         for (i = 0; i<secondArg.lDim; i++) {
3602                             tempP = secondArg.GetMatrixObject(i);
3603                             if (tempP) {
3604                                 h = Hash (i/hDim,i%hDim);
3605                                 if (h>=0) {
3606                                     storage.StoreObject(i,GetMatrixObject(h)->Add(tempP));
3607                                 } else {
3608                                     storage.StoreObject(i,tempP,true);
3609                                 }
3610                             }
3611                         }
3612                     }
3613                 }
3614             }
3615         }
3616     if (storage.theIndex) {
3617         storage.CheckIfSparseEnough();
3618     }
3619 }
3620 
3621 //_____________________________________________________________________________________________
3622 
AddWithThreshold(_Matrix & secondArg,hyFloat prec)3623 bool    _Matrix::AddWithThreshold  (_Matrix& secondArg, hyFloat prec)
3624 {
3625     bool res = true;
3626     if (secondArg.theIndex) { //sparse matrix
3627         long i,k;
3628         for (i = 0; res&&(i<secondArg.lDim); i++) {
3629             k = secondArg.theIndex[i];
3630             if (k!=-1) {
3631                 if (secondArg.theData[i]/theData[k] > prec) {
3632                     res = false;
3633                 }
3634                 theData[k]+=secondArg.theData[i];
3635             }
3636         }
3637         for (; i<secondArg.lDim; i++) {
3638             k = secondArg.theIndex[i];
3639             if (k!=-1) {
3640                 theData[k]+=secondArg.theData[i];
3641             }
3642         }
3643     } else {
3644         hyFloat* argData = secondArg.theData, *stData = theData,
3645                     *bound = theData+lDim;
3646         for (; res&&(stData!=bound); argData++, stData++) {
3647             if (*argData/ *stData> prec) {
3648                 res = false;
3649             }
3650             *stData+=*argData;
3651         }
3652         for (; stData!=bound; argData++, stData++) {
3653             *stData+=*argData;
3654         }
3655     }
3656     return !res;
3657 }
3658 
3659 //_____________________________________________________________________________________________
3660 
Subtract(_Matrix & storage,_Matrix & secondArg)3661 void    _Matrix::Subtract  (_Matrix& storage, _Matrix& secondArg)
3662 // subtraction operation on matrices
3663 // internal function
3664 
3665 {
3666     AddMatrix (storage,secondArg,true);
3667 }
3668 
3669 //_____________________________________________________________________________________________
3670 
Multiply(_Matrix & storage,hyFloat c)3671 void    _Matrix::Multiply  (_Matrix& storage, hyFloat c)
3672 // multiply a matrix by a scalar
3673 // internal function
3674 
3675 {
3676     if (is_numeric()) { // numbers
3677         hyFloat * _hprestrict_  destination = storage.theData;
3678         hyFloat const *  source      = theData;
3679 
3680         if (theIndex) {
3681             for (long k = 0L; k < lDim; k++)
3682                 if (storage.theIndex[k] != -1) {
3683                     destination[k] = source[k]*c;
3684                 }
3685         } else {
3686   #ifdef  _SLKP_USE_AVX_INTRINSICS
3687       #define                 CELL_OP(k) _mm256_storeu_pd (destination + k, _mm256_mul_pd(value_op, _mm256_loadu_pd (source+k)))
3688             long lDimM4 = lDim >> 4 << 4,
3689                  k = 0;
3690 
3691             __m256d  value_op = _mm256_set1_pd (c);
3692              for (k = 0L; k < lDimM4; k+=16) {
3693                  CELL_OP (k);
3694                  CELL_OP (k+4);
3695                  CELL_OP (k+8);
3696                  CELL_OP (k+12);
3697             }
3698             for (; k < lDim; k++) {
3699                 destination[k] = source[k]*c;
3700             }
3701   #elif defined _SLKP_USE_ARM_NEON
3702             #define                 CELL_OP(k) vst1q_f64 (destination + k, vmulq_f64(value_op, vld1q_f64 (source+k)))
3703             long lDimM8 = lDim >> 3 << 3,
3704                  k = 0;
3705 
3706             float64x2_t  value_op = vdupq_n_f64 (c);
3707              for (k = 0L; k < lDimM8; k+=8) {
3708                  CELL_OP (k);
3709                  CELL_OP (k+2);
3710                  CELL_OP (k+4);
3711                  CELL_OP (k+6);
3712             }
3713             for (; k < lDim; k++) {
3714                 destination[k] = source[k]*c;
3715             }
3716   #else
3717             for (long k = 0L; k < lDim; k++) {
3718                 destination[k] = source[k]*c;
3719             }
3720   #endif
3721         }
3722 
3723     } else {
3724         _Constant * cc = new _Constant (c);
3725 
3726         if (storageType == 2) {
3727             _String const    star ('*');
3728 
3729             for (long i=0; i<lDim; i++)
3730                 if (IsNonEmpty (i)) {
3731                     long h       = HashBack (i);
3732                     _Formula * f = GetFormula (h/vDim,h%vDim);
3733                     f->GetList().AppendNewInstance (new _Operation (cc));
3734                     f->GetList().AppendNewInstance (new _Operation (star,2));
3735                 }
3736         } else {
3737             if (storageType != 3) {
3738                 if (theIndex)
3739                     //sparse matrix
3740                 {
3741                     for (long i=0; i<lDim; i++)
3742                         if (IsNonEmpty (i)) {
3743                             storage.StoreObject (HashBack(i),GetMatrixObject(i)->Mult (cc));
3744                         }
3745                 } else {
3746                     for (long i=0; i<lDim; i++)
3747                         if (IsNonEmpty (i)) {
3748                             storage.StoreObject (i,GetMatrixObject(i)->Mult (cc));
3749                         }
3750                 }
3751             }
3752             DeleteObject (cc);
3753         }
3754 
3755     }
3756 }
3757 
3758 
3759 //_____________________________________________________________________________________________
3760 
Multiply(_Matrix & storage,_Matrix const & secondArg) const3761 void    _Matrix::Multiply  (_Matrix& storage, _Matrix const& secondArg) const
3762 // multiplication operation on matrices
3763 // internal function
3764 // storage is assumed to NOT be *this
3765 
3766 {
3767     HBLObjectRef tempP, tempP2;
3768 
3769     if ( !theIndex && !secondArg.theIndex)
3770         // simplest case of two non-sparse matrices - multiply in a straightforward way
3771     {
3772         if ( storageType == 0 && secondArg.storageType ==0) { // both matrices are polynomial in nature
3773             for (long i=0; i<hDim; i++)
3774                 for (long j=i*secondArg.vDim; j<(i+1)*secondArg.vDim; j++) {
3775                     _MathObject* secTerm = secondArg.GetMatrixObject(j%secondArg.vDim), *firstTerm = GetMatrixObject (i*vDim);
3776                     if (firstTerm&&secTerm) {
3777                         storage.StoreObject (j,firstTerm->Mult (secTerm));
3778                     } else {
3779                         storage.StoreObject (j,new _Polynomial(0.0));
3780                     }
3781                     for (long k=i*vDim+1, l=j%secondArg.vDim+secondArg.vDim; k<(i+1)*vDim; k++, l+=secondArg.vDim) {
3782                         tempP = GetMatrixObject (k), tempP2 = secondArg.GetMatrixObject(l);
3783                         if (tempP&&tempP2) {
3784                             _MathObject* temp = tempP->Mult (tempP2);
3785                             storage.StoreObject (j,temp->Add(storage.GetMatrixObject(j)));
3786                             DeleteObject (temp);
3787                         }
3788                     }
3789                 }
3790         } else {
3791             if ( hDim == vDim && secondArg.hDim == secondArg.vDim)
3792                 /* two square dense matrices */
3793             {
3794                 unsigned long cumulativeIndex = 0UL;
3795                 const unsigned long dimm4 = (vDim >> 2) << 2;
3796 
3797                 //const hyFloat * row = theData;
3798                 hyFloat  * dest = storage.theData;
3799 
3800 #if defined _SLKP_USE_AVX_INTRINSICS
3801                 #define DO_GROUP_OP0(X,Y,k) Y = _mm256_loadu_pd(secondArg.theData + col_offset + k); X = _mm256_mul_pd (A4,Y);
3802                 #ifdef _SLKP_USE_FMA3_INTRINSICS
3803                     #define DO_GROUP_OP(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k); Y = _mm256_loadu_pd(secondArg.theData + col_offset + k); _mm256_storeu_pd (dest + row_offset + k, _mm256_fmadd_pd (A4,Y,X));
3804                     #define DO_GROUP_OP1(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k); Y = _mm256_loadu_pd(secondArg.theData + col_offset + k); X = _mm256_fmadd_pd (A4,Y,X);
3805                     #define DO_GROUP_OP2(X,k) _mm256_storeu_pd (dest + row_offset + k,X);
3806                 #else
3807                     #define DO_GROUP_OP(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k);  Y = _mm256_loadu_pd(secondArg.theData + col_offset + k); _mm256_storeu_pd (dest + row_offset + k, _mm256_add_pd (X, _mm256_mul_pd(A4, Y)));
3808 
3809                     #define DO_GROUP_OP1(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k); Y = _mm256_loadu_pd(secondArg.theData + col_offset + k); X = _mm256_add_pd (X, _mm256_mul_pd(A4, Y));
3810                     #define DO_GROUP_OP2(X,k) _mm256_storeu_pd (dest + row_offset + k,X);
3811                #endif
3812 
3813 #elif _SLKP_USE_SSE_INTRINSICS
3814             #define DO_GROUP_OP1(X,Y,k) X = _mm_loadu_pd(dest + row_offset + k); Y = _mm_loadu_pd(secondArg.theData + col_offset + k); X = _mm_add_pd (X, _mm_mul_pd(A4, Y));
3815             #define DO_GROUP_OP2(X,k) _mm_storeu_pd (dest + row_offset + k,X);
3816 
3817 #elif _SLKP_USE_ARM_NEON
3818             #define DO_GROUP_OP1(X,Y,k) X = vld1q_f64 (dest + row_offset + k); Y = vld1q_f64 (secondArg.theData + col_offset + k); X = vfmaq_f64 (X, A4, Y);
3819             #define DO_GROUP_OP2(X,k) vst1q_f64 (dest + row_offset + k,X);
3820 #endif
3821 
3822 
3823 #ifndef _SLKP_SSE_VECTORIZATION_
3824 
3825               if (dimm4 == vDim) {
3826                   #if defined  _SLKP_USE_AVX_INTRINSICS
3827                       long ti = 0L,
3828                            row_offset = 0L;
3829 
3830                       if (vDim == 20UL) {
3831                           for (long r = 0; r < 20; r++, row_offset += 20) {
3832                               long col_offset = 0L;
3833                               __m256d A4 = _mm256_set1_pd(theData[ti]);
3834                               __m256d D4, B4, D4_1, B4_1, D4_2, B4_2, D4_3, B4_3, D4_4, B4_4;
3835                               DO_GROUP_OP0 (D4, B4, 0);
3836                               DO_GROUP_OP0 (D4_1, B4_1, 4);
3837                               DO_GROUP_OP0 (D4_2, B4_2, 8);
3838                               DO_GROUP_OP0 (D4_3, B4_3, 12);
3839                               DO_GROUP_OP0 (D4_4, B4_4, 16);
3840                               DO_GROUP_OP2 (D4, 0);
3841                               DO_GROUP_OP2 (D4_1, 4);
3842                               DO_GROUP_OP2 (D4_2, 8);
3843                               DO_GROUP_OP2 (D4_3, 12);
3844                               DO_GROUP_OP2 (D4_4, 16);
3845                               ti++;
3846                               col_offset = 20L;
3847                               for (long c = 1; c < 20L; c++, ti++, col_offset += 20L) {
3848                                   A4 = _mm256_set1_pd(theData[ti]);
3849                                   //for (long k = 0; k < 20L; k+=4) {
3850                                       DO_GROUP_OP1 (D4, B4, 0);
3851                                       DO_GROUP_OP1 (D4_1, B4_1, 4);
3852                                       DO_GROUP_OP1 (D4_2, B4_2, 8);
3853                                       DO_GROUP_OP1 (D4_3, B4_3, 12);
3854                                       DO_GROUP_OP1 (D4_4, B4_4, 16);
3855                                       DO_GROUP_OP2 (D4, 0);
3856                                       DO_GROUP_OP2 (D4_1, 4);
3857                                       DO_GROUP_OP2 (D4_2, 8);
3858                                       DO_GROUP_OP2 (D4_3, 12);
3859                                       DO_GROUP_OP2 (D4_4, 16);
3860                                   //}
3861                             }
3862                           }
3863                       } else if (vDim == 60UL) {
3864                           for (long r = 0; r < 60; r++, row_offset += 60) {
3865                               long col_offset = 0L;
3866                               __m256d A4 = _mm256_set1_pd(theData[ti]);
3867                               for (long k = 0; k < 60L; k+=4) {
3868                                   __m256d D4, B4;
3869                                   DO_GROUP_OP0 (D4, B4, k);
3870                                   DO_GROUP_OP2 (D4, k);
3871                               }
3872                               col_offset = 60L;
3873                               ti++;
3874                               for (long c = 1; c < 60L; c++, ti++, col_offset += 60L) {
3875                                   A4 = _mm256_set1_pd(theData[ti]);
3876                                   for (long k = 0; k < 60L; k+=4) {
3877                                       __m256d D4, B4;
3878                                       DO_GROUP_OP (D4, B4, k);
3879                                   }
3880                             }
3881                           }
3882                       } else
3883                           for (long r = 0; r < vDim; r++, row_offset += vDim) {
3884                               long col_offset = 0L;
3885                               __m256d A4 = _mm256_set1_pd(theData[ti]);
3886                               for (long k = 0; k < vDim; k+=4) {
3887                                   __m256d D4, B4;
3888                                   DO_GROUP_OP0 (D4, B4, k);
3889                                   DO_GROUP_OP2 (D4, k);
3890                               }
3891                               col_offset = vDim;
3892                               ti++;
3893                               for (long c = 1; c < vDim; c++, ti++, col_offset += vDim) {
3894                                  A4 = _mm256_set1_pd(theData[ti]);
3895                                  #pragma GCC unroll 4
3896                                  #pragma clang loop vectorize(enable)
3897                                  #pragma clang loop interleave(enable)
3898                                  #pragma clang loop unroll(enable)
3899                                  for (long k = 0; k < vDim; k+=4) {
3900                                       __m256d D4, B4;
3901                                       DO_GROUP_OP (D4, B4, k);
3902                                   }
3903                             }
3904                           }
3905                       return;
3906                   #elif _SLKP_USE_SSE_INTRINSICS
3907                      memset (dest, 0, lDim * sizeof (hyFloat));
3908                      long ti = 0L,
3909                            row_offset = 0L;
3910 
3911                       if (vDim == 20UL) {
3912                           for (long r = 0; r < 20; r++, row_offset += 20) {
3913                               long col_offset = 0L;
3914                               for (long c = 0; c < 20L; c++, ti++, col_offset += 20L) {
3915                                   __m128d A4 = _mm_set1_pd(theData[ti]);
3916                                   //for (long k = 0; k < 20L; k+=4) {
3917                                       __m128d D4, B4, D4_1, B4_1, D4_2, B4_2, D4_3, B4_3, D4_4, B4_4;
3918                                       DO_GROUP_OP1 (D4, B4, 0);
3919                                       DO_GROUP_OP1 (D4_1, B4_1, 2);
3920                                       DO_GROUP_OP1 (D4_2, B4_2, 4);
3921                                       DO_GROUP_OP1 (D4_3, B4_3, 6);
3922                                       DO_GROUP_OP1 (D4_4, B4_4, 8);
3923                                       DO_GROUP_OP2 (D4, 0);
3924                                       DO_GROUP_OP2 (D4_1, 2);
3925                                       DO_GROUP_OP2 (D4_2, 4);
3926                                       DO_GROUP_OP2 (D4_3, 6);
3927                                       DO_GROUP_OP2 (D4_4, 8);
3928                                       DO_GROUP_OP1 (D4, B4, 10);
3929                                       DO_GROUP_OP1 (D4_1, B4_1, 12);
3930                                       DO_GROUP_OP1 (D4_2, B4_2, 14);
3931                                       DO_GROUP_OP1 (D4_3, B4_3, 16);
3932                                       DO_GROUP_OP1 (D4_4, B4_4, 18);
3933                                       DO_GROUP_OP2 (D4, 10);
3934                                       DO_GROUP_OP2 (D4_1, 12);
3935                                       DO_GROUP_OP2 (D4_2, 14);
3936                                       DO_GROUP_OP2 (D4_3, 16);
3937                                       DO_GROUP_OP2 (D4_4, 18);
3938                                   //}
3939                             }
3940                           }
3941                       } else
3942                           for (long r = 0; r < vDim; r++, row_offset += vDim) {
3943                               long col_offset = 0L;
3944                               for (long c = 0; c < vDim; c++, ti++, col_offset += vDim) {
3945                                   __m128d A4 = _mm_set1_pd(theData[ti]);
3946                                   #pragma GCC unroll 4
3947                                   #pragma clang loop vectorize(enable)
3948                                   #pragma clang loop interleave(enable)
3949                                   #pragma clang loop unroll(enable)
3950                                   for (long k = 0; k < vDim; k+=2) {
3951                                       __m128d D4, B4;
3952                                       DO_GROUP_OP1 (D4, B4, k);
3953                                       DO_GROUP_OP2 (D4, k);
3954                                   }
3955                             }
3956                           }
3957                       return;
3958         #elif _SLKP_USE_ARM_NEON
3959                memset (dest, 0, lDim * sizeof (hyFloat));
3960                long ti = 0L,
3961                      row_offset = 0L;
3962 
3963                 if (vDim == 20UL) {
3964                     for (long r = 0; r < 20; r++, row_offset += 20) {
3965                         long col_offset = 0L;
3966                         for (long c = 0; c < 20L; c++, ti++, col_offset += 20L) {
3967                             float64x2_t A4 = vdupq_n_f64(theData[ti]);
3968                             //for (long k = 0; k < 20L; k+=4) {
3969                                 float64x2_t D4, B4, D4_1, B4_1, D4_2, B4_2, D4_3, B4_3, D4_4, B4_4;
3970                                 DO_GROUP_OP1 (D4, B4, 0);
3971                                 DO_GROUP_OP1 (D4_1, B4_1, 2);
3972                                 DO_GROUP_OP1 (D4_2, B4_2, 4);
3973                                 DO_GROUP_OP1 (D4_3, B4_3, 6);
3974                                 DO_GROUP_OP1 (D4_4, B4_4, 8);
3975                                 DO_GROUP_OP2 (D4, 0);
3976                                 DO_GROUP_OP2 (D4_1, 2);
3977                                 DO_GROUP_OP2 (D4_2, 4);
3978                                 DO_GROUP_OP2 (D4_3, 6);
3979                                 DO_GROUP_OP2 (D4_4, 8);
3980                                 DO_GROUP_OP1 (D4, B4, 10);
3981                                 DO_GROUP_OP1 (D4_1, B4_1, 12);
3982                                 DO_GROUP_OP1 (D4_2, B4_2, 14);
3983                                 DO_GROUP_OP1 (D4_3, B4_3, 16);
3984                                 DO_GROUP_OP1 (D4_4, B4_4, 18);
3985                                 DO_GROUP_OP2 (D4, 10);
3986                                 DO_GROUP_OP2 (D4_1, 12);
3987                                 DO_GROUP_OP2 (D4_2, 14);
3988                                 DO_GROUP_OP2 (D4_3, 16);
3989                                 DO_GROUP_OP2 (D4_4, 18);
3990                             //}
3991                       }
3992                     }
3993                 } else
3994                     for (long r = 0; r < vDim; r++, row_offset += vDim) {
3995                         long col_offset = 0L;
3996                         for (long c = 0; c < vDim; c++, ti++, col_offset += vDim) {
3997                             float64x2_t A4 = vdupq_n_f64(theData[ti]);
3998                             #pragma GCC unroll 4
3999                             #pragma clang loop vectorize(enable)
4000                             #pragma clang loop interleave(enable)
4001                             #pragma clang loop unroll(enable)
4002                             for (long k = 0; k < vDim; k+=2) {
4003                                 float64x2_t D4, B4;
4004                                 DO_GROUP_OP1 (D4, B4, k);
4005                                 DO_GROUP_OP2 (D4, k);
4006                             }
4007                       }
4008                     }
4009                 return;
4010                 #endif
4011                 memset (dest, 0, lDim * sizeof (hyFloat));
4012 
4013                 for (unsigned long c = 0UL; c < secondArg.vDim; c ++) {
4014 
4015                   /*
4016                    load a series of 4 consecutive elements from a column in the second matrix,
4017                    say c [] = [i,i+1,i+2,i+3: c]
4018 
4019                    next, iterate over all rows in the first matrix, looking for matched consecutive
4020                    elements, e.g.
4021 
4022                    r [] = [r: i,i+1,i+2,i+3]
4023 
4024                    compute sum_{t=0..3} c[t] * r[t]
4025 
4026                    add to the element (r,c) in the destination matrix
4027 
4028                    */
4029 
4030                     const unsigned long
4031                                             column_shift2 = secondArg.vDim << 1,
4032                                             column_shift3 = (secondArg.vDim << 1) + secondArg.vDim,
4033                                             column_shift4 = secondArg.vDim << 2;
4034 
4035                     for (unsigned long i = 0UL, vector_index = c; i < secondArg.hDim; i += 4UL, vector_index += column_shift4) {
4036                       hyFloat c0 = secondArg.theData[vector_index],
4037                                  c1 = secondArg.theData[vector_index+secondArg.vDim],
4038                                  c2 = secondArg.theData[vector_index+column_shift2],
4039                                  c3 = secondArg.theData[vector_index+column_shift3];
4040 
4041                       for (unsigned long r = 0UL; r < hDim; r ++) {
4042 
4043                         unsigned long element = r*vDim + i;
4044 
4045                         hyFloat r0 = theData[element]   * c0,
4046                                    r1 = theData[element+1] * c1,
4047                                    r2 = theData[element+2] * c2,
4048                                    r3 = theData[element+3] * c3;
4049 
4050                         r0 += r1;
4051                         r2 += r3;
4052                         dest[r*vDim + c] += r0 + r2;
4053 
4054                       }
4055                    }
4056                 }
4057               } else {
4058                   #if defined  _SLKP_USE_AVX_INTRINSICS
4059                     /*
4060 
4061                         dest [i,j] = sum_k A[i,k] * B[k,j]
4062 
4063                         # data from B[k,j], B[k,j+1], B[k,j+2], B[k,j+3] goes into dest[i,j] ... dest [i,j+3]
4064                         # dest [i,x] will take data from dest A[i,x]
4065 
4066                         9x9 matrix
4067 
4068                         dest [0,0] = A[0,0] * B[0,0] + A[0,1] * B[1,0] + A[0,2] * B[2,0] + A[0,3] * B[3,0] + ...;
4069                         dest [0,1] = A[0,0] * B[0,1] + A[0,1] * B[1,1] + A[0,2] * B[2,1] + A[0,3] * B[3,1] + ...;
4070                         dest [0,2] = A[0,0] * B[0,2] + A[0,1] * B[1,2] + A[0,2] * B[2,2] + A[0,3] * B[3,2] + ...;
4071                         dest [0,3] = A[0,0] * B[0,3] + A[0,1] * B[1,3] + A[0,2] * B[2,3] + A[0,3] * B[3,3] + ...;
4072                         dest [0,4] = A[0,0] * B[0,4] + A[0,1] * B[1,4] + A[0,2] * B[2,4] + A[0,3] * B[3,4] + ...;
4073                         dest [0,5] = A[0,0] * B[0,5] + A[0,1] * B[1,5] + A[0,2] * B[2,5] + A[0,3] * B[3,5] + ...;
4074                         dest [0,6] = A[0,0] * B[0,6] + A[0,1] * B[1,6] + A[0,2] * B[2,6] + A[0,3] * B[3,6] + ...;
4075                         dest [0,7] = A[0,0] * B[0,7] + A[0,1] * B[1,7] + A[0,2] * B[2,7] + A[0,3] * B[3,7] + ...;
4076                         dest [0,8] = A[0,0] * B[0,8] + A[0,1] * B[1,8] + A[0,2] * B[2,8] + A[0,3] * B[3,8] + ...;
4077 
4078                         ...
4079 
4080                         dest [3,0] = A[3,0] * B[0,0] + A[3,1] * B[1,0] + A[3,2] * B[2,0] + A[3,3] * B[3,0] + ...;
4081                         dest [3,1] = A[3,0] * B[0,1] + A[3,1] * B[1,1] + A[3,2] * B[2,1] + A[3,3] * B[3,1] + ...;
4082                         dest [3,2] = A[3,0] * B[0,2] + A[3,1] * B[1,2] + A[3,2] * B[2,2] + A[3,3] * B[3,2] + ...;
4083                         dest [3,3] = A[3,0] * B[0,3] + A[3,1] * B[1,3] + A[3,2] * B[2,3] + A[3,3] * B[3,3] + ...;
4084                         ....
4085                         dest [3,8] = A[3,0] * B[0,8] + A[3,1] * B[1,8] + A[3,2] * B[2,8] + A[3,3] * B[3,8] + ...;
4086 
4087                         ...
4088 
4089                         iterate by row  (R)
4090                             iterate by column (C)
4091                                 A4 = load element A[R,C] into a 4 element vector
4092 
4093                                 iterate by summing index (K), stride 4
4094                                     D4 = destination [R:K, R:K+4] // linear access
4095                                     B4 = B [C:K, C:K+4] // linear access
4096                                     D4 += A4*B4
4097                                     write D4 back to memory
4098 
4099 
4100                                 if something is left over
4101 
4102                                 iterate by summing index (K)
4103                                     desination [R,K] += A[R,C] * B[C,K],
4104 
4105                      */
4106                     long ti = 0L,
4107                          row_offset = 0L;
4108 
4109                   if (dimm4 == 60UL) { // codons
4110 
4111                       for (long r = 0; r < hDim; r++, row_offset += vDim) {
4112                           long col_offset = 0L;
4113                           for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4114                               __m256d A4 = _mm256_set1_pd(theData[ti]);
4115 
4116                               __m256d D4_1, D4_2, D4_3, D4_4;
4117                               __m256d B4_1, B4_2, B4_3, B4_4;
4118 
4119                               DO_GROUP_OP1 (D4_1,B4_1,0);
4120                               DO_GROUP_OP1 (D4_2,B4_2,4);
4121                               DO_GROUP_OP1 (D4_3,B4_3,8);
4122                               DO_GROUP_OP1 (D4_4,B4_4,12);
4123                               DO_GROUP_OP2 (D4_1,0);
4124                               DO_GROUP_OP2 (D4_2,4);
4125                               DO_GROUP_OP2 (D4_3,8);
4126                               DO_GROUP_OP2 (D4_4,12);
4127 
4128                               DO_GROUP_OP1 (D4_1,B4_1,16);
4129                               DO_GROUP_OP1 (D4_2,B4_2,20);
4130                               DO_GROUP_OP1 (D4_3,B4_3,24);
4131                               DO_GROUP_OP1 (D4_4,B4_4,28);
4132                               DO_GROUP_OP2 (D4_1,16);
4133                               DO_GROUP_OP2 (D4_2,20);
4134                               DO_GROUP_OP2 (D4_3,24);
4135                               DO_GROUP_OP2 (D4_4,28);
4136 
4137                               DO_GROUP_OP1 (D4_1,B4_1,32);
4138                               DO_GROUP_OP1 (D4_2,B4_2,36);
4139                               DO_GROUP_OP1 (D4_3,B4_3,40);
4140                               DO_GROUP_OP1 (D4_4,B4_4,44);
4141                               DO_GROUP_OP2 (D4_1,32);
4142                               DO_GROUP_OP2 (D4_2,36);
4143                               DO_GROUP_OP2 (D4_3,40);
4144                               DO_GROUP_OP2 (D4_4,44);
4145 
4146                               DO_GROUP_OP1 (D4_1,B4_1,48);
4147                               DO_GROUP_OP1 (D4_2,B4_2,52);
4148                               DO_GROUP_OP1 (D4_3,B4_3,56);
4149                               DO_GROUP_OP2 (D4_1,48);
4150                               DO_GROUP_OP2 (D4_2,52);
4151                               DO_GROUP_OP2 (D4_3,56);
4152 
4153                               for (long k = dimm4; k < vDim; k++) {
4154                                   dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4155                               }
4156                           }
4157                       }
4158                   } else { // something else
4159                       for (long r = 0; r < hDim; r++, row_offset += vDim) {
4160                           long col_offset = 0L;
4161                           for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4162                               __m256d A4 = _mm256_set1_pd(theData[ti]);
4163                             #pragma GCC unroll 4
4164                             #pragma clang loop vectorize(enable)
4165                             #pragma clang loop interleave(enable)
4166                             #pragma clang loop unroll(enable)
4167                               for (long k = 0; k < dimm4; k+=4) {
4168                                   __m256d D4, B4;
4169                                   DO_GROUP_OP (D4, B4, k);
4170                               }
4171 
4172                               for (long k = dimm4; k < vDim; k++) {
4173                                   dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4174                               }
4175                           }
4176                       }
4177                   }
4178                   #elif _SLKP_USE_SSE_INTRINSICS
4179                               long ti = 0L,
4180                                    row_offset = 0L;
4181 
4182                               if (dimm4 == 60UL) { // codons
4183 
4184 
4185                                 for (long r = 0; r < hDim; r++, row_offset += vDim) {
4186                                     long col_offset = 0L;
4187                                     for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4188                                         __m128d A4 = _mm_set1_pd(theData[ti]);
4189 
4190                                         __m128d D4_1, D4_2, D4_3, D4_4;
4191                                         __m128d B4_1, B4_2, B4_3, B4_4;
4192 
4193                                         DO_GROUP_OP1 (D4_1,B4_1,0);
4194                                         DO_GROUP_OP1 (D4_2,B4_2,2);
4195                                         DO_GROUP_OP1 (D4_3,B4_3,4);
4196                                         DO_GROUP_OP1 (D4_4,B4_4,6);
4197                                         DO_GROUP_OP2 (D4_1,0);
4198                                         DO_GROUP_OP2 (D4_2,2);
4199                                         DO_GROUP_OP2 (D4_3,4);
4200                                         DO_GROUP_OP2 (D4_4,6);
4201 
4202                                         DO_GROUP_OP1 (D4_1,B4_1,8);
4203                                         DO_GROUP_OP1 (D4_2,B4_2,10);
4204                                         DO_GROUP_OP1 (D4_3,B4_3,12);
4205                                         DO_GROUP_OP1 (D4_4,B4_4,14);
4206                                         DO_GROUP_OP2 (D4_1,8);
4207                                         DO_GROUP_OP2 (D4_2,10);
4208                                         DO_GROUP_OP2 (D4_3,12);
4209                                         DO_GROUP_OP2 (D4_4,14);
4210 
4211                                         DO_GROUP_OP1 (D4_1,B4_1,16);
4212                                         DO_GROUP_OP1 (D4_2,B4_2,18);
4213                                         DO_GROUP_OP1 (D4_3,B4_3,20);
4214                                         DO_GROUP_OP1 (D4_4,B4_4,22);
4215                                         DO_GROUP_OP2 (D4_1,16);
4216                                         DO_GROUP_OP2 (D4_2,18);
4217                                         DO_GROUP_OP2 (D4_3,20);
4218                                         DO_GROUP_OP2 (D4_4,22);
4219 
4220                                         DO_GROUP_OP1 (D4_1,B4_1,24);
4221                                         DO_GROUP_OP1 (D4_2,B4_2,26);
4222                                         DO_GROUP_OP1 (D4_3,B4_3,28);
4223                                         DO_GROUP_OP1 (D4_4,B4_4,30);
4224                                         DO_GROUP_OP2 (D4_1,24);
4225                                         DO_GROUP_OP2 (D4_2,26);
4226                                         DO_GROUP_OP2 (D4_3,28);
4227                                         DO_GROUP_OP2 (D4_4,30);
4228 
4229                                         DO_GROUP_OP1 (D4_1,B4_1,32);
4230                                         DO_GROUP_OP1 (D4_2,B4_2,34);
4231                                         DO_GROUP_OP1 (D4_3,B4_3,36);
4232                                         DO_GROUP_OP1 (D4_4,B4_4,38);
4233                                         DO_GROUP_OP2 (D4_1,32);
4234                                         DO_GROUP_OP2 (D4_2,34);
4235                                         DO_GROUP_OP2 (D4_3,36);
4236                                         DO_GROUP_OP2 (D4_4,38);
4237 
4238                                         DO_GROUP_OP1 (D4_1,B4_1,40);
4239                                         DO_GROUP_OP1 (D4_2,B4_2,42);
4240                                         DO_GROUP_OP1 (D4_3,B4_3,44);
4241                                         DO_GROUP_OP1 (D4_4,B4_4,46);
4242                                         DO_GROUP_OP2 (D4_1,40);
4243                                         DO_GROUP_OP2 (D4_2,42);
4244                                         DO_GROUP_OP2 (D4_3,44);
4245                                         DO_GROUP_OP2 (D4_4,46);
4246 
4247                                         DO_GROUP_OP1 (D4_1,B4_1,48);
4248                                         DO_GROUP_OP1 (D4_2,B4_2,50);
4249                                         DO_GROUP_OP1 (D4_3,B4_3,52);
4250                                         DO_GROUP_OP1 (D4_4,B4_4,54);
4251                                         DO_GROUP_OP2 (D4_1,48);
4252                                         DO_GROUP_OP2 (D4_2,50);
4253                                         DO_GROUP_OP2 (D4_3,52);
4254                                         DO_GROUP_OP2 (D4_4,54);
4255 
4256                                         DO_GROUP_OP1 (D4_1,B4_1,56);
4257                                         DO_GROUP_OP1 (D4_2,B4_2,58);
4258                                         DO_GROUP_OP2 (D4_1,56);
4259                                         DO_GROUP_OP2 (D4_2,58);
4260 
4261                                         for (long k = dimm4; k < vDim; k++) {
4262                                             dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4263                                         }
4264                                     }
4265                                 }
4266                               } else { // something else
4267                                 for (long r = 0; r < hDim; r++, row_offset += vDim) {
4268                                     long col_offset = 0L;
4269                                     for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4270                                         __m128d A4 = _mm_set1_pd(theData[ti]);
4271                                         #pragma GCC unroll 4
4272                                         #pragma clang loop vectorize(enable)
4273                                         #pragma clang loop interleave(enable)
4274                                         #pragma clang loop unroll(enable)
4275                                         for (long k = 0; k < dimm4; k+=2) {
4276                                             __m128d D4, B4;
4277                                             DO_GROUP_OP1 (D4, B4, k);
4278                                             DO_GROUP_OP2 (D4, k);
4279                                         }
4280 
4281                                         for (long k = dimm4; k < vDim; k++) {
4282                                             dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4283                                         }
4284                                     }
4285                                 }
4286                             }
4287             #elif _SLKP_USE_ARM_NEON
4288                         long ti = 0L,
4289                              row_offset = 0L;
4290 
4291                         if (dimm4 == 60UL) { // codons
4292 
4293 
4294                           for (long r = 0; r < hDim; r++, row_offset += vDim) {
4295                               long col_offset = 0L;
4296                               for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4297                                   float64x2_t A4 = vdupq_n_f64(theData[ti]);
4298 
4299                                   float64x2_t D4_1, D4_2, D4_3, D4_4;
4300                                   float64x2_t B4_1, B4_2, B4_3, B4_4;
4301 
4302                                   DO_GROUP_OP1 (D4_1,B4_1,0);
4303                                   DO_GROUP_OP1 (D4_2,B4_2,2);
4304                                   DO_GROUP_OP1 (D4_3,B4_3,4);
4305                                   DO_GROUP_OP1 (D4_4,B4_4,6);
4306                                   DO_GROUP_OP2 (D4_1,0);
4307                                   DO_GROUP_OP2 (D4_2,2);
4308                                   DO_GROUP_OP2 (D4_3,4);
4309                                   DO_GROUP_OP2 (D4_4,6);
4310 
4311                                   DO_GROUP_OP1 (D4_1,B4_1,8);
4312                                   DO_GROUP_OP1 (D4_2,B4_2,10);
4313                                   DO_GROUP_OP1 (D4_3,B4_3,12);
4314                                   DO_GROUP_OP1 (D4_4,B4_4,14);
4315                                   DO_GROUP_OP2 (D4_1,8);
4316                                   DO_GROUP_OP2 (D4_2,10);
4317                                   DO_GROUP_OP2 (D4_3,12);
4318                                   DO_GROUP_OP2 (D4_4,14);
4319 
4320                                   DO_GROUP_OP1 (D4_1,B4_1,16);
4321                                   DO_GROUP_OP1 (D4_2,B4_2,18);
4322                                   DO_GROUP_OP1 (D4_3,B4_3,20);
4323                                   DO_GROUP_OP1 (D4_4,B4_4,22);
4324                                   DO_GROUP_OP2 (D4_1,16);
4325                                   DO_GROUP_OP2 (D4_2,18);
4326                                   DO_GROUP_OP2 (D4_3,20);
4327                                   DO_GROUP_OP2 (D4_4,22);
4328 
4329                                   DO_GROUP_OP1 (D4_1,B4_1,24);
4330                                   DO_GROUP_OP1 (D4_2,B4_2,26);
4331                                   DO_GROUP_OP1 (D4_3,B4_3,28);
4332                                   DO_GROUP_OP1 (D4_4,B4_4,30);
4333                                   DO_GROUP_OP2 (D4_1,24);
4334                                   DO_GROUP_OP2 (D4_2,26);
4335                                   DO_GROUP_OP2 (D4_3,28);
4336                                   DO_GROUP_OP2 (D4_4,30);
4337 
4338                                   DO_GROUP_OP1 (D4_1,B4_1,32);
4339                                   DO_GROUP_OP1 (D4_2,B4_2,34);
4340                                   DO_GROUP_OP1 (D4_3,B4_3,36);
4341                                   DO_GROUP_OP1 (D4_4,B4_4,38);
4342                                   DO_GROUP_OP2 (D4_1,32);
4343                                   DO_GROUP_OP2 (D4_2,34);
4344                                   DO_GROUP_OP2 (D4_3,36);
4345                                   DO_GROUP_OP2 (D4_4,38);
4346 
4347                                   DO_GROUP_OP1 (D4_1,B4_1,40);
4348                                   DO_GROUP_OP1 (D4_2,B4_2,42);
4349                                   DO_GROUP_OP1 (D4_3,B4_3,44);
4350                                   DO_GROUP_OP1 (D4_4,B4_4,46);
4351                                   DO_GROUP_OP2 (D4_1,40);
4352                                   DO_GROUP_OP2 (D4_2,42);
4353                                   DO_GROUP_OP2 (D4_3,44);
4354                                   DO_GROUP_OP2 (D4_4,46);
4355 
4356                                   DO_GROUP_OP1 (D4_1,B4_1,48);
4357                                   DO_GROUP_OP1 (D4_2,B4_2,50);
4358                                   DO_GROUP_OP1 (D4_3,B4_3,52);
4359                                   DO_GROUP_OP1 (D4_4,B4_4,54);
4360                                   DO_GROUP_OP2 (D4_1,48);
4361                                   DO_GROUP_OP2 (D4_2,50);
4362                                   DO_GROUP_OP2 (D4_3,52);
4363                                   DO_GROUP_OP2 (D4_4,54);
4364 
4365                                   DO_GROUP_OP1 (D4_1,B4_1,56);
4366                                   DO_GROUP_OP1 (D4_2,B4_2,58);
4367                                   DO_GROUP_OP2 (D4_1,56);
4368                                   DO_GROUP_OP2 (D4_2,58);
4369 
4370                                   for (long k = dimm4; k < vDim; k++) {
4371                                       dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4372                                   }
4373                               }
4374                           }
4375                         } else { // something else
4376                           for (long r = 0; r < hDim; r++, row_offset += vDim) {
4377                               long col_offset = 0L;
4378                               for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4379                                   float64x2_t A4 = vdupq_n_f64(theData[ti]);
4380                                   #pragma GCC unroll 4
4381                                   #pragma clang loop vectorize(enable)
4382                                   #pragma clang loop interleave(enable)
4383                                   #pragma clang loop unroll(enable)
4384                                   for (long k = 0; k < dimm4; k+=2) {
4385                                       float64x2_t D4, B4;
4386                                       DO_GROUP_OP1 (D4, B4, k);
4387                                       DO_GROUP_OP2 (D4, k);
4388                                   }
4389 
4390                                   for (long k = dimm4; k < vDim; k++) {
4391                                       dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4392                                   }
4393                               }
4394                           }
4395                       }
4396                 #else
4397                       const unsigned long
4398                               column_shift2 = secondArg.vDim << 1,
4399                               column_shift3 = (secondArg.vDim << 1) + secondArg.vDim,
4400                               column_shift4 = secondArg.vDim << 2;
4401 
4402                       hyFloat * row = theData;
4403 
4404                       for (unsigned long i=0UL; i<hDim; i++, row += vDim) {
4405                           for (unsigned long j=0UL; j<secondArg.vDim; j++) {
4406                               hyFloat resCell  = 0.0;
4407 
4408                               unsigned long k = 0UL,
4409                                            column = j;
4410 
4411 
4412                               for (; k < dimm4; k+=4, column += column_shift4) {
4413                                   hyFloat pr1 = row[k]   * secondArg.theData [column],
4414                                              pr2 = row[k+1] * secondArg.theData [column + secondArg.vDim ],
4415                                              pr3 = row[k+2] * secondArg.theData [column + column_shift2],
4416                                              pr4 = row[k+3] * secondArg.theData [column + column_shift3];
4417 
4418                                   pr1 += pr2;
4419                                   pr3 += pr4;
4420 
4421                                   resCell += pr1 + pr3;
4422                               }
4423 
4424                               for (; k < vDim; k++, column += secondArg.vDim) {
4425                                   resCell += row[k] * secondArg.theData[column];
4426                               }
4427 
4428                               dest[cumulativeIndex++] = resCell;
4429                          }
4430                       }
4431 
4432 
4433                     #endif
4434               }
4435 
4436     #else
4437                 secondArg.Transpose();
4438                 for (long i=0; i<hDim; i++, row += vDim) {
4439                     for (long j=0; j<hDim; j++) {
4440                         hyFloat resCell  = 0.0;
4441                         #pragma GCC unroll 8
4442                         #pragma clang loop vectorize(enable)
4443                         #pragma clang loop interleave(enable)
4444                         #pragma clang loop unroll(enable)
4445                         for (long k = 0, column = j*hDim; k < vDim; k++, column ++) {
4446                             resCell += row[k] * secondArg.theData [column];
4447                         }
4448 
4449                         storage.theData[cumulativeIndex++] = resCell;
4450                     }
4451                 }
4452                 secondArg.Transpose();
4453 #endif
4454             } else
4455                 /* rectangular matrices */
4456             {
4457 #define _HY_MATRIX_CACHE_BLOCK 128
4458                  if (vDim >= 256) {
4459                      long nt = 1;
4460 #ifdef _OPENMP
4461                       #define GCC_VERSION (__GNUC__ * 10000 \
4462                                + __GNUC_MINOR__ * 100 \
4463                                + __GNUC_PATCHLEVEL__)
4464 #ifdef __HYPHYMPI__
4465                      if (hy_mpi_node_rank == 0)
4466 
4467 #endif
4468                      nt           = MIN(omp_get_max_threads(),secondArg.vDim / _HY_MATRIX_CACHE_BLOCK + 1);
4469 #endif
4470                      for (long r = 0; r < hDim; r ++) {
4471 #ifdef _OPENMP
4472   #if _OPENMP>=201511
4473     #pragma omp parallel for default(none) shared(r,secondArg,storage) schedule(monotonic:guided) proc_bind(spread) if (nt>1)  num_threads (nt)
4474   #else
4475     #if _OPENMP>=200803
4476       #pragma omp parallel for default(none) shared(r,secondArg,storage) schedule(guided) proc_bind(spread) if (nt>1)  num_threads (nt)
4477     #endif
4478   #endif
4479 #endif
4480                          for (long c = 0; c < secondArg.vDim; c+= _HY_MATRIX_CACHE_BLOCK) {
4481                              hyFloat cacheBlockInMatrix2 [_HY_MATRIX_CACHE_BLOCK][_HY_MATRIX_CACHE_BLOCK];
4482                              const long upto_p = (secondArg.vDim-c>=_HY_MATRIX_CACHE_BLOCK)?_HY_MATRIX_CACHE_BLOCK:(secondArg.vDim-c);
4483                              for (long r2 = 0; r2 < secondArg.hDim; r2+= _HY_MATRIX_CACHE_BLOCK) {
4484                                  const long upto_p2 = (secondArg.hDim-r2)>=_HY_MATRIX_CACHE_BLOCK?_HY_MATRIX_CACHE_BLOCK:(secondArg.hDim-r2);
4485                                  for (long p = 0; p < upto_p; p++) {
4486                                      for (long p2 = 0; p2 < upto_p2; p2++) {
4487                                          cacheBlockInMatrix2[p][p2] = secondArg.theData [(r2+p2)*secondArg.vDim+c+p];
4488                                      }
4489                                  }
4490                                  if (upto_p2 % 4 == 0) {
4491                                      for (long p = 0; p < upto_p; p++) {
4492                                          hyFloat updater = 0.;
4493                                          for (long p2 = 0; p2 < upto_p2; p2+=4) {
4494                                              hyFloat pr1 = theData[r*vDim + r2 + p2]*cacheBlockInMatrix2[p][p2],
4495                                                         pr2 = theData[r*vDim + r2 + p2+1]*cacheBlockInMatrix2[p][p2+1],
4496                                                         pr3 = theData[r*vDim + r2 + p2+2]*cacheBlockInMatrix2[p][p2+2],
4497                                                         pr4 = theData[r*vDim + r2 + p2+3]*cacheBlockInMatrix2[p][p2+3];
4498                                              pr1 += pr2;
4499                                              pr3 += pr4;
4500                                              updater += pr1 + pr3;
4501                                          }
4502                                          storage.theData[r*secondArg.vDim + c + p] += updater;
4503                                      }
4504                                  } else
4505                                      for (long p = 0; p < upto_p; p++) {
4506                                          hyFloat updater = 0.;
4507                                          for (long p2 = 0; p2 < upto_p2; p2++) {
4508                                              updater += theData[r*vDim + r2 + p2]*cacheBlockInMatrix2[p][p2];
4509                                          }
4510                                          storage.theData[r*secondArg.vDim + c + p] += updater;
4511                                      }
4512                              }
4513                          }
4514                      }
4515 
4516                  } else {
4517 
4518 
4519                     if (vDim % 4) {
4520                         long mod4 = vDim-vDim%4;
4521                         for (long i=0; i<hDim; i++) {
4522                             for (long j=0; j<secondArg.vDim; j++) {
4523                                 hyFloat resCell = 0.0;
4524                                 long k = 0;
4525                                 for (; k < mod4; k+=4) {
4526                                     resCell += theData[i*vDim + k] * secondArg.theData[k*secondArg.vDim + j] +
4527                                     theData[i*vDim + k + 1] * secondArg.theData[(k+1)*secondArg.vDim + j] +
4528                                     theData[i*vDim + k + 2] * secondArg.theData[(k+2)*secondArg.vDim + j] +
4529                                     theData[i*vDim + k + 3] * secondArg.theData[(k+3)*secondArg.vDim + j];
4530                                 }
4531                                 for (; k < vDim; k++) {
4532                                     resCell += theData[i*vDim + k] * secondArg.theData[k*secondArg.vDim + j];
4533                                 }
4534 
4535                                 storage.theData[i*secondArg.vDim + j] = resCell;
4536                             }
4537                         }
4538                     } else {
4539                         for (long i=0; i<hDim; i++) {
4540                             for (long j=0; j<secondArg.vDim; j++) {
4541                                 hyFloat resCell = 0.0;
4542                                 for (long k = 0; k < vDim; k+=4) {
4543                                     resCell += theData[i*vDim + k] * secondArg.theData[k*secondArg.vDim + j] +
4544                                     theData[i*vDim + k + 1] * secondArg.theData[(k+1)*secondArg.vDim + j] +
4545                                     theData[i*vDim + k + 2] * secondArg.theData[(k+2)*secondArg.vDim + j] +
4546                                     theData[i*vDim + k + 3] * secondArg.theData[(k+3)*secondArg.vDim + j];
4547                                 }
4548 
4549 
4550                                 storage.theData[i*secondArg.vDim + j] = resCell;
4551                             }
4552                         }
4553                     }
4554                 }
4555 
4556             }
4557         }
4558 
4559     } else if (theIndex && !secondArg.theIndex) { // sparse multiplied by non-sparse
4560         if (storageType == 1 && secondArg.storageType ==1) { // both numeric
4561             if ( vDim == hDim && secondArg.vDim==secondArg.hDim) { // both square and same dimension
4562                 /*
4563                   break out a special case for universal code
4564                 */
4565 
4566               if (vDim == 61) {
4567 
4568                 if (compressedIndex) {
4569                     long currentXIndex = 0L;
4570                     hyFloat  * _hprestrict_ res               = storage.theData;
4571 
4572                     for (long i = 0; i < hDim; i++) { // row in source
4573                       while (currentXIndex < compressedIndex[i]) {
4574                             long currentXColumn = compressedIndex[currentXIndex + hDim];
4575                             // go into the second matrix and look up all the non-zero entries in the currentXColumn row
4576 
4577                             hyFloat value = theData[currentXIndex];
4578                             hyFloat  * _hprestrict_ secArg            = secondArg.theData  + currentXColumn*61;
4579                             #ifdef  _SLKP_USE_AVX_INTRINSICS
4580                                 __m256d  value_op = _mm256_set1_pd (value);
4581                                 #ifdef _SLKP_USE_FMA3_INTRINSICS
4582                                     #define                 CELL_OP(x) _mm256_storeu_pd (res+x, _mm256_fmadd_pd (value_op, _mm256_loadu_pd (secArg+x),_mm256_loadu_pd(res+x)))
4583                                 #else
4584                                     #define                 CELL_OP(x) _mm256_storeu_pd (res+x,   _mm256_add_pd (_mm256_loadu_pd(res+x),    _mm256_mul_pd(value_op, _mm256_loadu_pd (secArg+x))))
4585                                 #endif
4586 
4587                                 CELL_OP(0);CELL_OP(4);CELL_OP(8);CELL_OP(12);
4588                                 CELL_OP(16);CELL_OP(20);CELL_OP(24);CELL_OP(28);
4589                                 CELL_OP(32);CELL_OP(36);CELL_OP(40);CELL_OP(44);
4590                                 CELL_OP(48);CELL_OP(52);CELL_OP(56);
4591                             #elif  _SLKP_USE_ARM_NEON
4592                                  float64x2_t  value_op = vdupq_n_f64 (value);
4593                                  #define                 CELL_OP(x) vst1q_f64 (res+x, vfmaq_f64 (vld1q_f64(res+x), value_op, vld1q_f64 (secArg+x)))
4594 
4595 
4596                                   CELL_OP(0);CELL_OP(2);CELL_OP(4);CELL_OP(6);
4597                                   CELL_OP(8);CELL_OP(10);CELL_OP(12);CELL_OP(14);
4598                                   CELL_OP(16);CELL_OP(18);CELL_OP(20);CELL_OP(22);
4599                                   CELL_OP(24);CELL_OP(26);CELL_OP(28);
4600                                   CELL_OP(30);CELL_OP(32);CELL_OP(34);CELL_OP(36);CELL_OP(38);
4601                                   CELL_OP(40);CELL_OP(42);CELL_OP(44);CELL_OP(46);
4602                                   CELL_OP(48);CELL_OP(50);CELL_OP(52);CELL_OP(54);
4603                                   CELL_OP(56);CELL_OP(58);
4604                             #else
4605                                 for (unsigned long i = 0UL; i < 60UL; i+=4UL) {
4606                                     res[i]   += value * secArg[i];
4607                                     res[i+1] += value * secArg[i+1];
4608                                     res[i+2] += value * secArg[i+2];
4609                                     res[i+3] += value * secArg[i+3];
4610                                 }
4611                             #endif
4612                             res[60]   += value * secArg[60];
4613                             currentXIndex ++;
4614                       }
4615                       res += vDim;
4616 
4617                   }
4618 
4619 
4620 
4621                 } else {
4622 
4623                      for (unsigned long k=0UL; k<lDim; k++) { // loop over entries in the sparse matrix
4624                       long m = theIndex[k];
4625                       if (m >= 0L) {
4626                         long i = ((unsigned long)m)%61;
4627 
4628                         hyFloat  value                            = theData[k];
4629                         hyFloat  * _hprestrict_ res               = storage.theData    + (m-i);
4630                         hyFloat  * _hprestrict_ secArg            = secondArg.theData  + i*61;
4631 
4632                         #ifdef  _SLKP_USE_AVX_INTRINSICS
4633                           __m256d  value_op = _mm256_set1_pd (value);
4634 
4635                          #ifdef _SLKP_USE_FMA3_INTRINSICS
4636                             #define                 CELL_OP(x) _mm256_storeu_pd (res+x, _mm256_fmadd_pd (value_op, _mm256_loadu_pd (secArg+x),_mm256_loadu_pd(res+x)))
4637                           #else
4638                             #define                 CELL_OP(x) _mm256_storeu_pd (res+x,   _mm256_add_pd (_mm256_loadu_pd(res+x),    _mm256_mul_pd(value_op, _mm256_loadu_pd (secArg+x))))
4639                           #endif
4640                           CELL_OP(0);CELL_OP(4);CELL_OP(8);CELL_OP(12);
4641                           CELL_OP(16);CELL_OP(20);CELL_OP(24);CELL_OP(28);
4642                           CELL_OP(32);CELL_OP(36);CELL_OP(40);CELL_OP(44);
4643                           CELL_OP(48);CELL_OP(52);CELL_OP(56);
4644                         #elif  _SLKP_USE_ARM_NEON
4645                              float64x2_t  value_op = vdupq_n_f64 (value);
4646                              #define                 CELL_OP(x) vst1q_f64 (res+x, vfmaq_f64 (vld1q_f64(res+x), value_op, vld1q_f64 (secArg+x)))
4647 
4648 
4649                               CELL_OP(0);CELL_OP(2);CELL_OP(4);CELL_OP(6);
4650                               CELL_OP(8);CELL_OP(10);CELL_OP(12);CELL_OP(14);
4651                               CELL_OP(16);CELL_OP(18);CELL_OP(20);CELL_OP(22);
4652                               CELL_OP(24);CELL_OP(26);CELL_OP(28);
4653                               CELL_OP(30);CELL_OP(32);CELL_OP(34);CELL_OP(36);CELL_OP(38);
4654                               CELL_OP(40);CELL_OP(42);CELL_OP(44);CELL_OP(46);
4655                               CELL_OP(48);CELL_OP(50);CELL_OP(52);CELL_OP(54);
4656                               CELL_OP(56);CELL_OP(58);
4657                         #else
4658                             for (unsigned long i = 0UL; i < 60UL; i+=4UL) {
4659                                 res[i]   += value * secArg[i];
4660                                 res[i+1] += value * secArg[i+1];
4661                                 res[i+2] += value * secArg[i+2];
4662                                 res[i+3] += value * secArg[i+3];
4663                                }
4664                         #endif
4665                           res[60]   += value * secArg[60];
4666                       }
4667                      }
4668                 }
4669 
4670               } else {
4671                   long loopBound = (vDim >> 2) << 2;
4672 
4673                   if (compressedIndex) {
4674                     long currentXIndex = 0L;
4675                     hyFloat  * _hprestrict_ res               = storage.theData;
4676 
4677                     for (long i = 0; i < hDim; i++) { // row in source
4678                       while (currentXIndex < compressedIndex[i]) {
4679                             long currentXColumn = compressedIndex[currentXIndex + hDim];
4680                             // go into the second matrix and look up all the non-zero entries in the currentXColumn row
4681 
4682                             hyFloat value = theData[currentXIndex];
4683                             hyFloat  * _hprestrict_ secArg            = secondArg.theData  + currentXColumn*vDim;
4684                             #ifdef  _SLKP_USE_AVX_INTRINSICS
4685                                 __m256d  value_op = _mm256_set1_pd (value);
4686                             #endif
4687                             #ifdef  _SLKP_USE_ARM_NEON
4688                                 float64x2_t  value_op = vdupq_n_f64 (value);
4689                             #endif
4690                             for (unsigned long i = 0UL; i < loopBound; i+=4) {
4691                                   #ifdef  _SLKP_USE_AVX_INTRINSICS
4692                                       #ifdef _SLKP_USE_FMA3_INTRINSICS
4693                                             _mm256_storeu_pd (res+i, _mm256_fmadd_pd (value_op, _mm256_loadu_pd (secArg+i),_mm256_loadu_pd(res+i)));
4694                                       #else
4695                                             _mm256_storeu_pd (res+i, _mm256_add_pd (_mm256_loadu_pd(res+i),  _mm256_mul_pd(value_op, _mm256_loadu_pd (secArg+i))));
4696                                       #endif
4697                                   #elif defined _SLKP_USE_ARM_NEON
4698                                     vst1q_f64 (res+i, vfmaq_f64 (vld1q_f64(res+i), value_op, vld1q_f64 (secArg+i)));
4699                                     vst1q_f64 (res+i+2, vfmaq_f64 (vld1q_f64(res+i+2), value_op, vld1q_f64 (secArg+i+2)));
4700                                   #else
4701                                     res[i]   += value * secArg[i];
4702                                     res[i+1] += value * secArg[i+1];
4703                                     res[i+2] += value * secArg[i+2];
4704                                     res[i+3] += value * secArg[i+3];
4705                                   #endif
4706                             }
4707                             for (unsigned long i = loopBound; i < vDim; i++) {
4708                                 res[i]   += value * secArg[i];
4709                             }
4710                             currentXIndex ++;
4711                       }
4712                       res += vDim;
4713 
4714                  }
4715              } else {
4716                       for (unsigned long k=0UL; k<lDim; k++) { // loop over entries in the sparse matrix
4717                           long m = theIndex[k];
4718                           if  (m != -1L ) { // non-zero
4719                               long i = ((unsigned long)m)%vDim;
4720                               // this element will contribute to (r, c' = [0..vDim-1]) entries in the result matrix
4721                               // in the form of A_rc * B_cc'
4722 
4723                               hyFloat  value                           = theData[k];
4724                               hyFloat  *_hprestrict_ res               = storage.theData    + (m-i);
4725                               hyFloat  *_hprestrict_ secArg            = secondArg.theData  + i*vDim;
4726                               #ifdef  _SLKP_USE_AVX_INTRINSICS
4727                                     __m256d  value_op = _mm256_set1_pd (value);
4728                               #endif
4729                               #ifdef  _SLKP_USE_ARM_NEON
4730                                     float64x2_t  value_op = vdupq_n_f64 (value);
4731                               #endif
4732 
4733                               for (unsigned long i = 0UL; i < loopBound; i+=4) {
4734                               #ifdef  _SLKP_USE_AVX_INTRINSICS
4735                                 #ifdef _SLKP_USE_FMA3_INTRINSICS
4736                                       _mm256_storeu_pd (res+i, _mm256_fmadd_pd (value_op, _mm256_loadu_pd (secArg+i),_mm256_loadu_pd(res+i)));
4737                                 #else
4738                                       _mm256_storeu_pd (res+i, _mm256_add_pd (_mm256_loadu_pd(res+i),  _mm256_mul_pd(value_op, _mm256_loadu_pd (secArg+i))));
4739                                 #endif
4740                                 #elif defined _SLKP_USE_ARM_NEON
4741                                   vst1q_f64 (res+i, vfmaq_f64 (vld1q_f64(res+i), value_op, vld1q_f64 (secArg+i)));
4742                                   vst1q_f64 (res+i+2, vfmaq_f64 (vld1q_f64(res+i+2), value_op, vld1q_f64 (secArg+i+2)));
4743                                 #else
4744                                   res[i]   += value * secArg[i];
4745                                   res[i+1] += value * secArg[i+1];
4746                                   res[i+2] += value * secArg[i+2];
4747                                   res[i+3] += value * secArg[i+3];
4748                                 #endif
4749 
4750                               }
4751                                for (unsigned long i = loopBound; i < vDim; i++) {
4752                                   res[i]   += value * secArg[i];
4753                               }
4754 
4755                           }
4756                       }
4757                  }
4758               } // special codon case
4759             } else {
4760                 for (long k=0; k<lDim; k++) {
4761                     long m = theIndex[k];
4762                     if (m!=-1) {
4763                         long i = ((unsigned long)m)/vDim;
4764                         long j = m - i*vDim;
4765                         hyFloat c = theData[k];
4766                         hyFloat* stData = storage.theData+i*secondArg.vDim,
4767                                     * secArgData=secondArg.theData+j*secondArg.vDim,
4768                                       * stopper = secArgData+secondArg.vDim;
4769                         for (; secArgData!=stopper; stData++,secArgData++) {
4770                             *stData+=c**secArgData;
4771                         }
4772                     }
4773                 }
4774             }
4775         } else { // polynomial entries
4776             for (long k=0; k<lDim; k++) {
4777                 if (IsNonEmpty(k)) {
4778                     long i = theIndex[k]/vDim;
4779                     long j = theIndex[k]%vDim;
4780                     _MathObject* p = GetMatrixObject(k);
4781                     for (long l=j*secondArg.vDim, m=i*secondArg.vDim; l<(j+1)*secondArg.vDim; l++,m++) {
4782                         tempP = secondArg.GetMatrixObject (l);
4783                         if (!tempP) {
4784                             continue;
4785                         }
4786                         _MathObject* temp = p->Mult (secondArg.GetMatrixObject (l));
4787                         tempP = storage.GetMatrixObject(m);
4788                         if (tempP) {
4789                             storage.StoreObject (m, tempP->Add (temp));
4790                         } else {
4791                             storage.StoreObject (m, temp, true);
4792                         }
4793                         DeleteObject (temp);
4794                     }
4795                 }
4796             }
4797         }
4798 
4799     } else if ( !theIndex && secondArg.theIndex)
4800         // non-sparse multiplied by sparse
4801     {
4802         if ( storageType == 1 && secondArg.storageType ==1) {
4803             if (vDim == hDim && secondArg.vDim==secondArg.hDim)
4804                 // both are square matrices
4805             {
4806                 for (long k=0; k<secondArg.lDim; k++) {
4807                     long m = secondArg.theIndex[k];
4808                     if (m!=-1) { // a non-zero value
4809                         // because r_ij = sum_k a_ik * b_kj
4810                         // a non-zero b_kj will contribute a_ik * b_kj to the a_ij cell of the result
4811                         // loop over i...
4812 
4813                         hyFloat c = secondArg.theData[k];
4814 
4815                         for (long cell = m%secondArg.vDim, secondCell = m/secondArg.vDim; cell < lDim; cell += vDim, secondCell += vDim) {
4816                             storage.theData[cell] += c * theData[secondCell];
4817                         }
4818 
4819                     }
4820                 }
4821             } else {
4822                 for (long k=0; k<secondArg.lDim; k++) {
4823                     long m = secondArg.theIndex[k];
4824                     if (m!=-1) {
4825                         long i = m/secondArg.vDim;
4826                         long j = m%secondArg.vDim;
4827                         hyFloat c = secondArg.theData[k];
4828                         hyFloat *stData = storage.theData+j,
4829                                     *secData = theData+i,
4830                                      *stopper = theData+lDim;
4831                         for (; secData<stopper; secData+=vDim, stData+=secondArg.vDim) {
4832                             *stData += c**secData;
4833                         }
4834                     }
4835                 }
4836             }
4837         } else { // polynomial entries
4838             for (long k=0; k<secondArg.lDim; k++) {
4839 
4840                 if (secondArg.IsNonEmpty(k)) {
4841                     long i = secondArg.theIndex[k]/secondArg.vDim;
4842                     long j = secondArg.theIndex[k]%secondArg.vDim;
4843                     _MathObject* p = secondArg.GetMatrixObject(k);
4844                     for (long l=i, m=j; l<lDim; l+=vDim,m+=secondArg.vDim) {
4845                         tempP = GetMatrixObject (l);
4846                         if (!tempP) {
4847                             continue;
4848                         }
4849                         _MathObject* temp = p->Mult (tempP);
4850                         tempP = storage.GetMatrixObject(m);
4851                         if (tempP) {
4852                             storage.StoreObject (m, tempP->Add (temp));
4853                         } else {
4854                             storage.StoreObject (m, temp, true);
4855                         }
4856                         DeleteObject (temp);
4857                     }
4858                 }
4859             }
4860 
4861         }
4862     } else {
4863         //sparse by sparse
4864 
4865         /**
4866              X Y where both X and Y are sparse can be multipled more efficiently than O (N^3)
4867              if cell (i,j) is non-zero in X, it will contribute to cells (i,k) in the result matrix,
4868              it will contribute to the cells of the result matrix (i,k) where k is such that there are non-zero entries in thh k-th row of matrix Y
4869         */
4870 
4871         if (compressedIndex && secondArg.compressedIndex) {
4872             long currentXIndex = 0,
4873                  storageIndex  = 0;
4874 
4875             for (long i = 0; i < hDim; i++) { // row in source
4876 
4877                 while (currentXIndex < compressedIndex[i]) {
4878                     long currentXColumn = compressedIndex[currentXIndex + hDim];
4879                     // go into the second matrix and look up all the non-zero entries in the currentXColumn row
4880                     hyFloat c = theData[currentXIndex];
4881                     long from = currentXColumn ? secondArg.compressedIndex[currentXColumn-1] : 0,
4882                           to   = secondArg.compressedIndex[currentXColumn];
4883                     for (long secondIndex = from; secondIndex < to; secondIndex ++) {
4884                         storage.theData[storageIndex + secondArg.compressedIndex[secondIndex + secondArg.hDim]] += c*secondArg.theData[secondIndex];
4885                     }
4886                     currentXIndex ++;
4887                 }
4888 
4889                 storageIndex += secondArg.vDim;
4890 
4891             }
4892         } else {
4893 
4894             long * indexVector = (long*)alloca ( sizeof(long)*secondArg.hDim);
4895                 // how many non-zero elements are there in the i-th row of matrix Y
4896             memset (indexVector,0,secondArg.hDim*sizeof(long));
4897 
4898             long *indexTable,
4899                  *indexTable2,
4900                  indexTableColumnWidth = secondArg.vDim,
4901                  indexTableDim = secondArg.hDim*indexTableColumnWidth;
4902 
4903             indexTable  = (long*)MatrixMemAllocate( sizeof(long)*indexTableDim);
4904               // element (i,j) of this matrix is the COLUMN index in which the j-th non-zero entry appears in row i of matrix Y
4905             indexTable2 = (long*)MatrixMemAllocate( sizeof(long)*indexTableDim);
4906               // element (i,j) of this matrix is the index (in theData) for the j-th non-zero entry appears in row i of matrix Y
4907 
4908             memset (indexTable,0,indexTableDim*sizeof(long));
4909             memset (indexTable2,0,indexTableDim*sizeof(long));
4910 
4911             if (is_numeric()) {
4912                 // numeric
4913 
4914                 for (long i=0; i<secondArg.lDim; i++) {
4915                     long elementIndex = secondArg.theIndex[i];
4916                     if (__builtin_expect (elementIndex >= 0L,1)) {
4917                         long k = elementIndex/secondArg.vDim;
4918                         long j = k*secondArg.vDim+(indexVector[k]++);
4919                         indexTable [j] = elementIndex % secondArg.vDim;
4920                         indexTable2[j] = i;
4921                     }
4922                 }
4923                 for (long k=0; k<lDim; k++) {
4924                     long elementIndex = theIndex[k];
4925                     if (__builtin_expect (elementIndex >= 0L,1)) {
4926                         hyFloat c = theData[k];
4927                         long i = elementIndex / vDim,
4928                              j = elementIndex - i*vDim,
4929                              n = j*indexTableColumnWidth,
4930                              m = i*secondArg.vDim,
4931                              nonZeroCount = indexVector[j];
4932 
4933                         for (long l=n; l<n+nonZeroCount; l++) {
4934                             storage.theData[m+indexTable[l]] += c*secondArg.theData[indexTable2[l]];
4935                         }
4936                     }
4937                 }
4938             } else { // polynomial entries
4939 
4940                 for (long i=0; i<secondArg.lDim; i++) {
4941                     long elementIndex  = secondArg.theIndex[i];
4942                     if (IsNonEmpty(i)) {
4943                         long k = elementIndex/secondArg.vDim;
4944                         long j = k*secondArg.vDim+(indexVector[k]++);
4945                         indexTable [j] = elementIndex%secondArg.vDim;
4946                         indexTable2[j] = i;
4947                     }
4948                 }
4949                 for (long k=0; k<lDim; k++) {
4950                     if (IsNonEmpty(k)) {
4951                         long i = theIndex[k]/vDim;
4952                         long j = theIndex[k]%vDim;
4953                         _MathObject* p = GetMatrixObject(k);
4954                         long n = j*secondArg.vDim;
4955                         long m = i*secondArg.vDim;
4956                         for (long l=n; l<n+indexVector[j]; l++) {
4957                             _MathObject* temp = p->Mult (secondArg.GetMatrixObject (indexTable2[l]));
4958                             tempP = storage.GetMatrixObject(m+indexTable[l]%secondArg.vDim);
4959                             if (tempP) {
4960                                 storage.StoreObject (m+indexTable[l]%secondArg.vDim, tempP->Add (temp));
4961                             } else {
4962                                 storage.StoreObject (m+indexTable[l]%secondArg.vDim, temp, true);
4963                             }
4964 
4965                             DeleteObject (temp);
4966                         }
4967                     }
4968                 }
4969             }
4970 
4971             MatrixMemFree( indexTable);
4972             MatrixMemFree( indexTable2);
4973         }
4974     }
4975 
4976 
4977 }
4978 
4979 //_____________________________________________________________________________________________
4980 
HashBack(long logicalIndex) const4981 long    _Matrix::HashBack  (long logicalIndex) const {
4982 // returns element's matrix index in the form vDim*(i-1)+j, where i, j are the matrix coordinates
4983 // given a buffer index
4984     return theIndex?theIndex [logicalIndex]:logicalIndex;
4985 }
4986 
4987 //_____________________________________________________________________________________________
4988 
MaxElement(char runMode,long * indexStore) const4989 hyFloat  _Matrix::MaxElement  (char runMode, long* indexStore) const {
4990 // returns matrix's largest abs value element
4991     if (storageType == _NUMERICAL_TYPE) {
4992         hyFloat max  = 0.0,
4993                    temp;
4994 
4995         bool doAbsValue = runMode != 1 && runMode != 3,
4996              doMaxElement = runMode == 0 || runMode == 3;
4997 
4998         if (doMaxElement) {
4999             max = -INFINITY;
5000         }
5001 
5002         if (theIndex) {
5003             for (long i = 0; i<lDim; i++) {
5004                 long k = theIndex[i];
5005                 if  (k != -1) {
5006                     temp = theData[i];
5007                     if (doAbsValue && temp<0.0) {
5008                         temp = -temp;
5009                     }
5010 
5011                     if (doMaxElement) {
5012                         if (temp>max) {
5013                             max = temp;
5014                             if (indexStore) {
5015                                 *indexStore = k;
5016                             }
5017                         }
5018                     } else {
5019                         max += temp;
5020                     }
5021                 }
5022             }
5023             return max;
5024         } else {
5025 
5026             if (doAbsValue) {
5027                 if (doMaxElement) {
5028                     for (long i = 0; i<lDim; i++) {
5029                         hyFloat t = fabs(theData[i]);
5030                         if (t > max) {
5031                             max = t;
5032                             if (indexStore) {
5033                                 *indexStore = i;
5034                             }
5035                         }
5036                     }
5037                 } else {
5038                     for (long i = 0; i<lDim; i++) {
5039                         max += fabs (theData[i]);
5040                     }
5041                 }
5042             } else {
5043                 if (doMaxElement) {
5044                     for (long i = 0; i<lDim; i++) {
5045                         hyFloat t = theData[i];
5046                         if (t > max) {
5047                             max = t;
5048                             if (indexStore) {
5049                                 *indexStore = i;
5050                             }
5051                         }
5052                     }
5053                 } else {
5054                     for (long i = 0; i<lDim; i++) {
5055                         max += theData[i];
5056                     }
5057                 }
5058             }
5059 
5060             /*for (long i = 0; i<lDim; i++) {
5061                 temp = theData[i];
5062                 if (doAbsValue && temp<0.0) {
5063                     temp = -temp;
5064                 }
5065 
5066                 if (doMaxElement) {
5067                     if (temp>max) {
5068                         max = temp;
5069                         if (indexStore) {
5070                             *indexStore = i;
5071                         }
5072                     }
5073                 } else {
5074                     max += temp;
5075                 }
5076             }*/
5077             return max;
5078         }
5079     }
5080     if (runMode) {
5081         return 0;
5082     }
5083 
5084     return 10.0;
5085 }
5086 
5087 //_____________________________________________________________________________________________
5088 
RowAndColumnMax(hyFloat & r,hyFloat & c,hyFloat * cache)5089 void    _Matrix::RowAndColumnMax  (hyFloat& r, hyFloat &c, hyFloat * cache) {
5090 
5091 // returns the maximum row sum / column sum
5092 // the cache must be big enough to hold hDim + vDim
5093 // leave as nil to allocate cache run time
5094     r = c = 10.;
5095 
5096     if (is_numeric()) { // numeric matrix
5097         hyFloat  *maxScratch = cache;
5098         r = c = 0.;
5099 
5100         if (maxScratch == nil) {
5101             maxScratch = (hyFloat*)MemAllocate ((hDim+vDim)*sizeof(hyFloat), true);
5102         } else
5103             InitializeArray(maxScratch, hDim + vDim, 0.0);
5104 
5105         hyFloat * rowMax = maxScratch,
5106                      * colMax = maxScratch + hDim;
5107 
5108         if (theIndex) {
5109             if (compressedIndex) {
5110                 long from = 0L;
5111                 for (long row = 0; row < hDim; row++) {
5112                     for (long col = from; col < compressedIndex[row]; col++) {
5113                         hyFloat temp = theData[col];
5114                         if (temp > 0.0) {
5115                             rowMax[row] += temp;
5116                             colMax[compressedIndex[col+hDim]] += temp;
5117                         } else {
5118                             rowMax[row] -= temp;
5119                             colMax[compressedIndex[col+hDim]] -= temp;
5120                         }
5121                     }
5122                     from = compressedIndex[row];
5123                 }
5124             } else {
5125                 for (long i = 0; i<lDim; i++) {
5126                     long k = theIndex[i];
5127                     if  (k!=-1) {
5128                         hyFloat temp = theData[i];
5129 
5130                         if (temp>0.0) {
5131                             rowMax[k/vDim] += temp;
5132                             colMax[k%vDim] += temp;
5133                         } else {
5134                             rowMax[k/vDim] -= temp;
5135                             colMax[k%vDim] -= temp;
5136                         }
5137                     }
5138                 }
5139             }
5140         } else
5141             // dense matrix
5142             for (long i = 0, k=0; i<hDim; i++) {
5143                 for (long j=0; j<vDim; j++, k++) {
5144                     hyFloat temp = theData[k];
5145                     if (temp<0.0) {
5146                         rowMax[i] -= temp;
5147                         colMax[j] -= temp;
5148                     } else {
5149                         rowMax[i] += temp;
5150                         colMax[j] += temp;
5151                     }
5152                 }
5153             }
5154 
5155         for (long i=0; i<hDim; i++) if (rowMax[i]>r)    {
5156                 r = rowMax [i];
5157             }
5158         for (long j=0; j<vDim; j++) if (colMax[j]>c)    {
5159                 c = colMax [j];
5160             }
5161 
5162         if (!cache) {
5163             free(maxScratch);
5164         }
5165     }
5166 }
5167 
5168 //_____________________________________________________________________________________________
5169 
IsMaxElement(hyFloat bench)5170 bool    _Matrix::IsMaxElement  (hyFloat bench) {
5171 // returns matrix's largest abs value element
5172     if (is_numeric()) {
5173 
5174         hyFloat mBench = -bench;
5175         if (!theIndex || compressedIndex) {
5176             for (long i = 0; i<lDim; i++) {
5177                 hyFloat t = theData[i];
5178                 if ( t>bench || t<mBench ) {
5179                     return true;
5180                 }
5181             }
5182         } else {
5183             for (long i = 0; i<lDim; i++) {
5184                 if (theIndex [i] >= 0) {
5185                     hyFloat t = theData[i];
5186                     if ( t>bench || t<mBench ) {
5187                         return true;
5188                     }
5189                 }
5190             }
5191 
5192         }
5193         return false;
5194     } else if (storageType == 0) {
5195         _Polynomial ** pData = (_Polynomial **)theData;
5196         for (long i = 0; i<lDim; i++, pData++) {
5197             if ((*pData)->IsMaxElement(bench)) {
5198                 return true;
5199             }
5200         }
5201         return false;
5202     }
5203     return true;
5204 }
5205 
5206 //_____________________________________________________________________________________________
5207 
MaxRelError(_Matrix & compMx)5208 hyFloat  _Matrix::MaxRelError  (_Matrix& compMx)
5209 // returns matrix's largest abs value element
5210 {
5211     if (storageType == 1) {
5212         hyFloat max = 0, temp;
5213         for (long i = 0; i<lDim; i++) {
5214             temp = theData[i]/compMx.theData[i];
5215             if (temp<0.0) {
5216                 temp*=-1.0;
5217             }
5218             if (temp>max) {
5219                 max = temp;
5220             }
5221         }
5222         return max;
5223     }
5224     return 10.0;
5225 }
5226 
5227 
5228 //_____________________________________________________________________________________________
5229 
MinElement(char doAbsValue,long * storeIndex)5230 hyFloat  _Matrix::MinElement  (char doAbsValue, long* storeIndex)
5231 // returns matrix's smalles non-zero abs value element
5232 {
5233     if (storageType == 1) {
5234         hyFloat min = DBL_MAX;
5235 
5236         if (theIndex)
5237             for (long i = 0; i<lDim; i++) {
5238                 if (theIndex[i] < 0) {
5239                     continue;
5240                 }
5241 
5242                 hyFloat temp = theData[i];
5243 
5244                 if (temp < 0.0 && doAbsValue) {
5245                     temp = -temp;
5246                 }
5247 
5248                 if (temp<min) {
5249                     if (storeIndex) {
5250                         *storeIndex = theIndex[i];
5251                     }
5252                     min = temp;
5253                 }
5254 
5255             }
5256         else
5257             for (long i = 0; i<lDim; i++) {
5258                 hyFloat temp = theData[i];
5259 
5260                 if (temp < 0.0 && doAbsValue) {
5261                     temp = -temp;
5262                 }
5263 
5264                 if (temp<min) {
5265                     if (storeIndex) {
5266                         *storeIndex = i;
5267                     }
5268                     min = temp;
5269                 }
5270             }
5271 
5272         return min;
5273     } else {
5274         return 1.0;
5275     }
5276 }
5277 //_____________________________________________________________________________________________
Transpose(void)5278 void    _Matrix::Transpose (void)
5279 // transpose a matrix
5280 {
5281     if (storageType == 1) {
5282         if (hDim == vDim) { // do an in place swap
5283             if (!theIndex) { // non-sparse
5284                 for (long i = 0; i<hDim; i++)
5285                     for (long j = i+1; j<vDim; j++) {
5286                         hyFloat z      = theData[i*vDim+j];
5287                         theData[i*vDim+j] = theData[j*vDim+i];
5288                         theData[j*vDim+i] = z;
5289                     }
5290             } else { // sparse
5291                 for (long i = 0; i<lDim; i++) {
5292                     long p = theIndex[i];
5293                     if (p!=-1) {
5294                         long k      = p/vDim;
5295                         long l      = p - k*vDim;
5296 
5297                         if (l!=k) { // off - diag
5298                             p            = Hash (l,k);
5299                             hyFloat z = theData[i];
5300                             if (p>=0) {
5301                                 theData[i] = theData[p];
5302                                 theData[p] = z;
5303                             } else {
5304                                 theIndex[i]=-1;
5305                                 (*this)[l*vDim+k]=z;
5306                             }
5307                         }
5308                     }
5309                 }
5310             }
5311         } else {
5312             _Matrix result (vDim, hDim, bool(theIndex), true);
5313             if (!theIndex) { // dense
5314                 for (long i = 0; i<hDim; i++)
5315                     for (long j = 0; j<vDim; j++) {
5316                         result.theData[j*hDim+i]=theData[i*vDim+j];
5317                     }
5318             } else {
5319                 for (long i = 0; i<lDim; i++)
5320                     if (IsNonEmpty(i)) {
5321                         long r = theIndex[i]/vDim,
5322                              c = theIndex[i]%vDim;
5323 
5324                         result[c*hDim+r]=theData[i];
5325                     }
5326             }
5327             *this = result;
5328         }
5329     } else { // polynomial entries
5330         hyPointer z;
5331         if (hDim == vDim) {
5332             if (!theIndex) { // non-sparse
5333                 for (long i = 0; i<hDim; i++)
5334                     for (long j = i+1; j<vDim; j++) {
5335                         if (storageType==2) {
5336                             z = (hyPointer)GetFormula(i,j);
5337                         } else {
5338                             z = (hyPointer)GetMatrixObject(i*vDim+j);
5339                         }
5340 
5341                         if (storageType==2) {
5342                             ((_Formula**)theData)[i*vDim+j] = GetFormula(j,i);
5343                         } else {
5344                             ((HBLObjectRef*)theData)[i*vDim+j] = GetMatrixObject(j*vDim+i);
5345                         }
5346 
5347                         ((hyPointer*)theData)[j*vDim+i] = z;
5348                     }
5349             } else { // sparse
5350                 long i,k,l,p;
5351                 for (i = 0; i<lDim; i++) {
5352                     if (IsNonEmpty(i)) {
5353                         k = theIndex[i]/vDim;
5354                         l = theIndex[i]%vDim;
5355                         if (l!=k) {
5356                             p = Hash (l,k);
5357 
5358                             if (storageType==2) {
5359                                 z = (hyPointer)GetFormula(k,l);
5360                             } else {
5361                                 z = (hyPointer)GetMatrixObject(i);
5362                             }
5363 
5364                             if (p>=0) {
5365                                 if (storageType==2) {
5366                                     ((_Formula**)theData)[i]    = GetFormula(l,k);
5367                                 } else {
5368                                     ((_MathObject**)theData)[i] = GetMatrixObject(p);
5369                                 }
5370 
5371                                 ((hyPointer*)theData)[p] = z;
5372                             } else {
5373                                 theIndex[i]=-1;
5374                                 if (storageType==2) {
5375                                     StoreFormula(l,k,*(_Formula*)z,false,false);
5376                                 } else {
5377                                     StoreObject(l*vDim+k,(HBLObjectRef)z);
5378                                 }
5379                             }
5380                         }
5381                     }
5382                 }
5383             }
5384         } else {
5385 
5386             _Matrix result;
5387             CreateMatrix (&result,vDim, hDim, bool(theIndex),false,storageType==2);
5388             result.storageType = storageType;
5389             if (!theIndex) {
5390                 for (long i = 0; i<hDim; i++)
5391                     for (long j = 0; j<vDim; j++) {
5392                         if (storageType == 2) {
5393                             result.StoreFormula(j,i,*GetFormula(i,j),true,false);
5394                         } else {
5395                             z =   (hyPointer)GetMatrixObject (i*vDim+j);
5396                             result.StoreObject(j*hDim+i,(HBLObjectRef)z);
5397                             ((HBLObjectRef)z)->AddAReference();
5398                         }
5399                     }
5400             } else {
5401                 long r,c;
5402                 for (long i = 0; i<lDim; i++)
5403                     if (IsNonEmpty(i)) {
5404                         r = theIndex[i]/vDim;
5405                         c = theIndex[i]%vDim;
5406                         if (storageType == 2) {
5407                             result.StoreFormula(c,r,*GetFormula(r,c),true,false);
5408                         } else {
5409                             z =   (hyPointer)GetMatrixObject (i);
5410                             result.StoreObject(c*hDim+r,(HBLObjectRef)z);
5411                             ((HBLObjectRef)z)->AddAReference();
5412                         }
5413                     }
5414             }
5415             Swap (result);
5416             //*this = result;
5417         }
5418     }
5419 }
5420 
5421 //_____________________________________________________________________________________________
5422 
CompressSparseMatrix(bool transpose,hyFloat * stash)5423 void    _Matrix::CompressSparseMatrix (bool transpose, hyFloat * stash) {
5424     /**
5425         The purpose of this function is to "pack" the elements of a sparse matrix into an index array (theIndex) so that
5426                     (1) there are no "unused" elements in the middle
5427                     (2) the elements in theIndex are in the same order as they are in the matrix (row by row)
5428 
5429         Optonally, the packe matrix can be transposed.
5430     */
5431 
5432 
5433     if (theIndex) {
5434 
5435         if (compressedIndex && transpose) {
5436             long ai = 0;
5437             long from = 0L;
5438             memcpy (stash, theData, sizeof (hyFloat)*lDim);
5439             long * by_column_counts = (long*)alloca ((hDim + lDim) * sizeof (long));
5440 
5441             memset (by_column_counts, 0, hDim * sizeof (long));
5442 
5443             // pass 1 : count records by column
5444             long * stored_by_col = (long*)alloca (hDim * sizeof (long));
5445             memset (stored_by_col, 0, hDim * sizeof (long));
5446 
5447             for (long r = 0L; r < hDim; r++) {
5448                 for (long c = from; c < compressedIndex[r]; c++) {
5449                     by_column_counts[compressedIndex[hDim+c]] ++;
5450                 }
5451                 from = compressedIndex[r];
5452             }
5453 
5454 
5455             for (long r = 1L; r < hDim; r++) {
5456                 by_column_counts[r] += by_column_counts[r-1];
5457                 //printf ("%ld %ld (%ld)\n", r, by_column_counts[r], compressedIndex[r]);
5458             }
5459 
5460             // pass 2 : invert indices
5461             from = 0L;
5462 
5463 
5464             for (long r = 0L; r < hDim; r++) {
5465                 for (long c = from; c < compressedIndex[r]; c++) {
5466                     long my_col = compressedIndex[c+hDim];
5467                     // (i,j) => (j,i)
5468                     long new_index = my_col * hDim + r;
5469                     long col_offset = my_col ? by_column_counts[my_col-1] : 0;
5470                     long new_compressed_index = stored_by_col[my_col] + col_offset;
5471                     //printf ("%ld => %ld (%ld, %ld)\n", c, new_compressed_index,r*hDim + my_col, new_index);
5472                     theIndex[new_compressed_index] = new_index;
5473                     theData[new_compressed_index] = stash[c];
5474                     by_column_counts[hDim + new_compressed_index] = r;
5475                     stored_by_col[my_col]++;
5476                 }
5477                 from = compressedIndex[r];
5478             }
5479 
5480 
5481 
5482             memcpy (compressedIndex, by_column_counts, sizeof (long)*(lDim+hDim));
5483             //_validateCompressedStorage();
5484 
5485             /*for (long r = 0; r < hDim; r++) {
5486                 long trow = r;
5487                 for (long c = from; c < compressedIndex[r]; c++) {
5488                     long tcol = compressedIndex[hDim+c];
5489                     long transposed_index = tcol * vDim + trow;
5490                     stash[ai] = theData[c];
5491                     //sortedIndex.list_data[ai] = transposed_index;
5492                     //sortedIndex3.list_data[ai] = transposed_index;
5493                     ai++;
5494                     //if (max < transposed_index) max = transposed_index;
5495                 }
5496                 from = compressedIndex[r];
5497             }*/
5498             return;
5499         }
5500 
5501         _SimpleList sortedIndex  ((unsigned long)lDim, (long*)alloca (lDim * sizeof (long))),
5502                     sortedIndex3 ((unsigned long)lDim, (long*)alloca (lDim * sizeof (long))),
5503                     sortedIndex2;
5504 
5505 
5506         /*const long blockChunk = 32,
5507                    blockShift = hDim / blockChunk + 1*/
5508 
5509         long  max        = 0,
5510               secondDim = transpose ? hDim : vDim,
5511               firstDim  = transpose ? vDim : hDim;
5512 
5513 
5514         if (transpose) {
5515             long ai = 0L;
5516 
5517             for (long i2=0; i2<lDim; i2++) {
5518                 long k = theIndex[i2];
5519                 if  (__builtin_expect (k!=-1,1)) {
5520                     long trow = k/vDim, tcol = k - trow*vDim;
5521                     long transposed_index = tcol * vDim + trow;
5522                     stash[ai] = theData[i2];
5523                     sortedIndex.list_data[ai] = transposed_index;
5524                     sortedIndex3.list_data[ai] = transposed_index;
5525                     ai++;
5526                     if (max < transposed_index) max = transposed_index;
5527                 }
5528             }
5529 
5530             sortedIndex3.lLength = ai;
5531             sortedIndex.lLength = ai;
5532         } else {
5533             for (long i2=0; i2<lDim; i2++) {
5534                 long k = theIndex[i2];
5535                 if  (__builtin_expect (k!=-1,1)) {
5536                     stash[sortedIndex.lLength] = theData[i2];
5537                     sortedIndex  << k;
5538                     sortedIndex3 << k;
5539                     if (max < k) max = k;
5540                 }
5541             }
5542         }
5543 
5544 
5545         //printf ("_Matrix::CompressSparseMatrix %d\n%s\n%ld\n", transpose, _String ((_String*)sortedIndex3.toStr()).get_str(),max);
5546 
5547         /*if (max > (lDim<<4)) {
5548             sortedIndex2. Populate(sortedIndex.lLength,0,1);
5549             SortLists(&sortedIndex3,&sortedIndex2);
5550         } else {*/
5551             sortedIndex3.CountingSort(max+1, &sortedIndex2,false);
5552         //}
5553 
5554        lDim = sortedIndex.lLength;
5555 
5556        /*for (long i=0; i<sortedIndex.lLength; i++) {
5557            //printf ("%ld %ld", i, theIndex[i]);
5558            theIndex[i] = sortedIndex.list_data[sortedIndex2.list_data[i]];
5559            theData[i]  = stash[sortedIndex2.list_data[i]];
5560        }*/
5561 
5562         if (compressedIndex) {
5563             MatrixMemFree(compressedIndex);
5564         }
5565 
5566         compressedIndex = (long*) MatrixMemAllocate((lDim + firstDim) * sizeof (long));
5567 
5568         long              currentRow = 0;
5569         lDim = sortedIndex.lLength;
5570 
5571         for (long i=0; i<sortedIndex.lLength; i++) {
5572             //printf ("%ld %ld", i, theIndex[i]);
5573             long entryIndex = sortedIndex.list_data[sortedIndex2.list_data[i]];
5574             theIndex[i] = entryIndex;
5575 
5576             long indexRow = entryIndex / secondDim,
5577                  indexColumn = entryIndex - indexRow * secondDim;
5578 
5579             //printf ("[%ld] %ld %ld %ld\n", i, indexRow, indexColumn, currentRow);
5580 
5581             compressedIndex[i + firstDim] = indexColumn;
5582             if (indexRow > currentRow) {
5583                 for (long l = currentRow; l < indexRow; l++) {
5584                     compressedIndex[l] = i;
5585                 }
5586                 //printf (">[%ld] %ld\n", currentRow, compressedIndex[currentRow]);
5587                 currentRow = indexRow;
5588             } /*else {
5589                 if (currentRow > indexRow) {
5590                     printf ("\n\n\nBARF\n\n\n");
5591                 }
5592             }*/
5593 
5594             //printf (" %ld\n", theIndex[i]);
5595             theData[i]  = stash[sortedIndex2.list_data[i]];
5596         }
5597 
5598         for (long l = currentRow; l < firstDim; l++)
5599             compressedIndex[l] = lDim;
5600     }
5601 }
5602 
5603 
5604 //_____________________________________________________________________________________________
5605 
Exponentiate(hyFloat scale_to,bool check_transition,_Matrix * existing_storage)5606 _Matrix*    _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Matrix * existing_storage) {
5607     // find the maximal elements of the matrix
5608 
5609 
5610     try {
5611         if (!is_square()) {
5612             throw _String ("Exponentiate is not defined for non-square matrices");
5613         }
5614 
5615         long i,
5616              power2 = 0L;
5617 
5618 #ifndef _OPENMP
5619         matrix_exp_count++;
5620 #endif
5621 
5622         hyFloat max     = 1.0,
5623                 *stash,
5624                 *stash2 = 0;
5625         //  = new hyFloat[hDim*(1+vDim)];
5626 
5627         if (!is_polynomial()) {
5628             stash = (hyFloat*)alloca(sizeof (hyFloat) * hDim*(1+vDim));
5629             if (theIndex) {
5630                 // transpose sparse matrix
5631                 CompressSparseMatrix (true,stash);
5632             }
5633 
5634             hyFloat t;
5635             //bool    censor = false;
5636             RowAndColumnMax (max, t, stash);
5637             /*if (t > 10000. || max > 10000.) {
5638                 censor = true;
5639                 t   = MIN (t,10000.);
5640                 max = MIN (max, 10000.);
5641             }*/
5642             max *= t;
5643             //max = MaxElement();
5644             //max = max * max;
5645             if (max > .1) {
5646                 max             = scale_to*sqrt (10.*max);
5647                 power2          = (long)((log (max)/_log2))+1L;
5648                 max             = exp (power2 * _log2);
5649                 stash2 = (hyFloat*)alloca(sizeof (hyFloat) * lDim);
5650                 memcpy   (stash2, theData, sizeof (hyFloat) * lDim);
5651                 /*if (censor) {
5652                     for (long i = 0; i < lDim; i++) {
5653                         if (theData[i] < -10000.) {
5654                             theData[i] = -10000.;
5655                         } else {
5656                             if (theData[i] > 10000.) {
5657                                 theData[i] = 10000;
5658                             }
5659                         }
5660                     }
5661                 }*/
5662                 (*this)         *= 1.0/max;
5663             } else {
5664                 power2 = 0;
5665             }
5666 
5667 
5668         } else {
5669             max = 1.;
5670         }
5671 
5672         _Matrix *result;
5673 
5674         if (!is_polynomial() && existing_storage && existing_storage->hDim == hDim && existing_storage->vDim == vDim && existing_storage->is_numeric() && existing_storage->is_dense()) {
5675             result = existing_storage;
5676             InitializeArray(result->theData, result->lDim, 0.0);
5677         } else {
5678             result = new _Matrix(hDim, vDim , is_polynomial(), !is_polynomial());
5679         }
5680 
5681 
5682         // put ones on the diagonal
5683 
5684         if (!is_polynomial()) {
5685             long step = vDim + 1;
5686             for (long diag = 0; diag < result->lDim; diag += step) {
5687                 result->theData[diag] = 1.;
5688             }
5689         } else {
5690             for (i=0; i<(*result).hDim*(*result).vDim; i+=vDim+1) {
5691                 (*result).StoreObject(i,new _Polynomial (1.),false);
5692             }
5693         }
5694 
5695         if (max == 0.0) {
5696             return result;
5697         }
5698 
5699         (*result) += (*this);
5700 
5701         i = 2;
5702 
5703         if (precisionArg || is_polynomial()) {
5704             _Matrix temp    (*this);
5705 
5706             if (!is_polynomial()) {
5707                 for (; i<=precisionArg; i++) {
5708                     temp      *= (*this);
5709                     temp      *= 1.0/i;
5710                     (*result) += temp;
5711                 }
5712             }
5713             else {
5714                 while (temp.IsMaxElement (polynomialExpPrecision)) {
5715                     if (i>maxPolynomialExpIterates) {
5716                         break;
5717                     }
5718                     temp        *= (*this);
5719                     temp        *= 1.0/i;
5720                     (*result)   += temp;
5721                     i++;
5722                 }
5723                 if (i>maxPolynomialExpIterates) {
5724                     _String   wM ("Polynomial Matrix Exponential Failed to achieve accuracy POLYNOMIAL_EXP_PRECISION in under MAX_POLYNOMIAL_EXP_ITERATES. Either decrease the precision, or increase the maximum number of iterates.");
5725                     ReportWarning (wM);
5726                 }
5727             }
5728         } else {
5729             hyFloat tMax = MAX(MinElement()*sqrt ((hyFloat)hDim),truncPrecision);
5730 
5731             i=2;
5732 
5733 
5734             if (is_dense()) { // avoid matrix allocation
5735                 _Matrix temp ;
5736                 temp.hDim = hDim;
5737                 temp.vDim = vDim;
5738                 temp.lDim = lDim;
5739                 temp.theData = (hyFloat*)alloca(sizeof (hyFloat) * hDim*vDim);
5740                 memcpy (temp.theData, theData, sizeof (hyFloat) * hDim*vDim);
5741 
5742                 _Matrix tempS;
5743                 tempS.hDim = hDim;
5744                 tempS.vDim = vDim;
5745                 tempS.lDim = lDim;
5746                 tempS.theData = stash;
5747                 // zero out the stash TODO: 20200929 : is this necessary?
5748                 memset (stash, 0, sizeof (hyFloat)*lDim);
5749                 do {
5750                     temp.MultbyS        (*this,false, &tempS, nil);
5751                     // after this call, temp and tempS are gonna swap pointers to theData
5752                     temp      *= 1.0/i;
5753                     (*result) += temp;
5754                     i         ++;
5755 /*#ifndef _OPENMP
5756                     taylor_terms_count++;
5757 #else
5758             #pragma omp atomic
5759                 taylor_terms_count++;
5760 #endif*/
5761                 } while (temp.IsMaxElement(tMax*truncPrecision*i));
5762 
5763 
5764                 if (tempS.theData != stash) { // need to copy data to tempS
5765                     Exchange (tempS.theData, temp.theData);
5766                     memcpy (temp.theData, tempS.theData, lDim * sizeof (hyFloat));
5767                 }
5768                 tempS.theData = nil;
5769                 temp.theData = nil;
5770 
5771             } else  {
5772                 _Matrix temp    (*this);
5773                 _Matrix tempS (hDim, vDim, false, temp.storageType);
5774                 do {
5775                     temp.MultbyS        (*this,theIndex!=nil, &tempS, stash);
5776                     temp      *= 1.0/i;
5777                     (*result) += temp;
5778                     i         ++;
5779     /*#ifndef _OPENMP
5780                         taylor_terms_count++;
5781     #else
5782                 #pragma omp atomic
5783                     taylor_terms_count++;
5784     #endif*/
5785                 } while (temp.IsMaxElement(tMax*truncPrecision*i));
5786             }
5787 
5788             // use Pade (4,4) here
5789 
5790             /*_Matrix temp (*this), top (result) , bottom (result);
5791              temp *= .5;
5792              top+=temp;
5793              bottom-=temp;
5794              temp *= *this;
5795              temp *= 3.0/14.0;
5796              top+=temp;
5797              bottom+=temp;
5798              temp *= *this;
5799              temp *= 1.0/9.0;
5800              top+=temp;
5801              bottom-=temp;
5802              temp *= 1.0/20.0;
5803              top+=temp;
5804              bottom+=temp;
5805              _Matrix* inv = (_Matrix*)bottom.Inverse();
5806              top *= *inv;
5807              DeleteObject (inv);
5808              result = top;*/
5809         }
5810 
5811         if (power2) {
5812             //(*this)*=max;
5813             memcpy(theData, stash2, sizeof (hyFloat) * lDim);
5814 
5815         }
5816 
5817         if (theIndex) {
5818             // transpose back
5819             if (compressedIndex) {
5820                 long from = 0L, i = 0;
5821                 for (long r = 0; r < hDim; r++) {
5822                     #pragma GCC unroll 4
5823                     #pragma clang loop vectorize(enable)
5824                     #pragma clang loop interleave(enable)
5825                     #pragma clang loop unroll(enable)
5826                     for (long c = from; c < compressedIndex[r]; c++, i++) {
5827                         theIndex[i] = compressedIndex[c+hDim] * vDim + r;
5828                     }
5829                     from = compressedIndex[r];
5830                 }
5831                 MatrixMemFree(compressedIndex);
5832                 compressedIndex = nil;
5833             } else {
5834                 for (i=0; i<lDim; i++) {
5835                     long k = theIndex[i];
5836                     if  (k!=-1) {
5837                         long div = k / vDim;
5838                         theIndex[i] = (k - div * vDim)*vDim + div;
5839                     }
5840                 }
5841             }
5842             result->Transpose();
5843         }
5844 
5845 
5846         //_Matrix stash_mx (*result);
5847 
5848         for (long s = 0; s<power2; s++) {
5849 /*#ifndef _OPENMP
5850                     squarings_count++;
5851 #else
5852             #pragma omp atomic
5853                 squarings_count++;
5854 #endif*/
5855         /* for (i = 0; i < hDim; i++) {
5856                 if ((*result)(i,i) > 1.) {
5857                     printf ("\n%ld\n", s);
5858                     ObjectToConsole(result);
5859                     NLToConsole();
5860                     ObjectToConsole(&stash_mx);
5861                     NLToConsole();
5862                     _Matrix cp = *this;
5863                     cp.CheckIfSparseEnough(true);
5864                     ObjectToConsole(&cp);
5865                     abort();
5866                 }
5867             }*/
5868 
5869             /*if (squarings_count == 3606) {
5870                 for (long i = 0, c= 0; i < hDim; i++) {
5871                     for (long j = 0; j < vDim; j++,c++) {
5872                         fprintf (stderr, "%ld\t%ld\t%16.12g\n", i,j, result->theData[c]);
5873                     }
5874                 }
5875             }*/
5876             hyFloat maxDiff = result->Sqr(stash);
5877             /*if (squarings_count == 3606) {
5878                 fprintf (stderr, "\n\n%16.12g\n\n", maxDiff);
5879             }*/
5880             if (maxDiff < DBL_EPSILON * 1.e3) {
5881                 break;
5882             }
5883         }
5884 
5885 
5886         if (check_transition) {
5887             bool pass = true;
5888             if (result->is_dense()) {
5889                 for (unsigned long r = 0L; r < result->lDim; r += result->vDim) {
5890                     if (result->theData[r] > 1.) {
5891                         pass = false;
5892                         break;
5893                     }
5894                 }
5895             } else {
5896                 for (unsigned long r = 0L; r < result->hDim; r ++) {
5897                     if ((*result)(r,r) > 1.) {
5898                         pass = false;
5899                         break;
5900                     }
5901                 }
5902             }
5903             if (!pass) {
5904                 if (scale_to < 1.e100) {
5905                     DeleteObject (result);
5906                     return this->Exponentiate(scale_to * 100, true);
5907                 }
5908 
5909                 /*printf ("SCALE %lg : \n", scale_to);
5910 
5911                 for (unsigned long r = 0L; r < hDim; r ++) {
5912                     hyFloat sum = 0.;
5913                     printf ("%ld %18.16lg %18.16lg\n", r, (*result)(r,r), (*result)(r,r) - 1.);
5914                     for (unsigned long c = 0L; c < vDim; c++) {
5915                         sum += (*this)(r,c);
5916                     }
5917                     printf ("%ld %g\n", r, sum);
5918                 }
5919 
5920 
5921                 ObjectToConsole(this);
5922                 ObjectToConsole(result);*/
5923 
5924                 throw _String ("Failed to compute a valid transition matrix; this is usually caused by ill-conditioned rate matrices (e.g. very large rate values)");
5925             }
5926         }
5927 
5928         return result;
5929     }
5930     catch (const _String& e) {
5931         HandleApplicationError(e);
5932     }
5933 
5934     return new _Matrix;
5935 
5936 }
5937 
5938 //_____________________________________________________________________________________________
5939 
SetupSparseMatrixAllocations(void)5940 void     _Matrix::SetupSparseMatrixAllocations (void) {
5941     overflowBuffer = hDim*storageIncrement/100;
5942     bufferPerRow = MAX (1, (lDim-overflowBuffer)/hDim);
5943     overflowBuffer = lDim-bufferPerRow*hDim;
5944     allocationBlock = hDim*vDim*storageIncrement/100+1;
5945 }
5946 
5947 //_____________________________________________________________________________________________
5948 
Hash(long i,long j) const5949 long    _Matrix::Hash  (long i, long j) const {
5950 // returns element's position in the buffer (-1 if not found)
5951 
5952     if (is_dense()) {
5953         return i*vDim+j;
5954     }
5955     // ordinary matrix
5956 
5957     if (compressedIndex) {
5958         for (long c = i ? compressedIndex[i-1] : 0; c < compressedIndex[i]; c++) {
5959             if (compressedIndex[hDim+c] == j) return c;
5960         }
5961         return -1;
5962     }
5963 
5964     long elementIndex   = i*vDim+j,
5965          m              = i*bufferPerRow;
5966 
5967     for (long allocation_block_index = 0L; allocation_block_index < lDim; allocation_block_index += allocationBlock, m += allocationBlock) {
5968         // look within the row for this allocation block
5969         for (long l = m; l < m + bufferPerRow; l++) {
5970             long try_me = theIndex[l];
5971             if (try_me != elementIndex) {
5972                 if (try_me == -1) return -l - 2;
5973             } else return l;
5974         }
5975         // if not found, look in the overflow are for this block
5976         long upper_bound = MIN (lDim, allocation_block_index + allocationBlock);
5977         for (long n = upper_bound - overflowBuffer; n < upper_bound; n++) {
5978             long try_me = theIndex[n];
5979             if (try_me != elementIndex) {
5980                 if (try_me == -1) return -n - 2;
5981             } else return n;
5982         }
5983     }
5984     return -1;
5985 
5986 
5987 
5988     /*for (long blockIndex = 0; blockIndex<lDim/allocationBlock; blockIndex++,m+=allocationBlock) {
5989         for (long l=m; l<m+bufferPerRow; l++) {
5990             long p = theIndex[l];
5991             if (p!=elementIndex) {
5992                 if (p==-1) {
5993                     return -l-2;
5994                 }
5995             } else {
5996                 return l;
5997             }
5998         }
5999         long n = (blockIndex+1)*allocationBlock-1;
6000         for (long l = n; l>n-overflowBuffer; l--) {
6001             long p = theIndex[l];
6002             if (p!=elementIndex) {
6003                 if (p==-1) {
6004                     return -l-2;
6005                 }
6006             } else {
6007                 return l;
6008             }
6009         }
6010     }*/
6011     //return -1;
6012 }
6013 
6014 //_____________________________________________________________________________________________
operator ()(long i,long j) const6015 hyFloat      _Matrix::operator () (long i, long j) const {
6016     long lIndex = Hash (i,j);
6017     if (lIndex<0) {
6018         return ZEROOBJECT;
6019     } else {
6020         return theData[lIndex];
6021     }
6022 }
6023 
6024 //_____________________________________________________________________________________________
ExtractElementsByEnumeration(_SimpleList * h,_SimpleList * v,bool column)6025 _Matrix*        _Matrix::ExtractElementsByEnumeration (_SimpleList*h, _SimpleList*v, bool column) // extract by row
6026 {
6027     if (storageType && h->lLength == v->lLength && h->lLength > 0) {
6028         _Matrix * result = new _Matrix (column?h->lLength:1,column?1:h->lLength,false,true);
6029 
6030         if (storageType == 2) // formulae
6031             for (long k=0; k<h->lLength; k++) {
6032                 result->StoreFormula(column?k:0,column?0:k,*GetFormula(h->list_data[k],v->list_data[k]));
6033             }
6034         else
6035             for (long k=0; k<h->lLength; k++) {
6036                 result->theData[k] = (*this)(h->list_data[k],v->list_data[k]);
6037             }
6038 
6039         return result;
6040     }
6041     return new _Matrix;
6042 }
6043 
6044 
6045 
6046 //_____________________________________________________________________________________________
MAccess(HBLObjectRef p,HBLObjectRef p2,HBLObjectRef cache)6047 HBLObjectRef _Matrix::MAccess (HBLObjectRef p, HBLObjectRef p2, HBLObjectRef cache) {
6048   if (!p) {
6049     HandleApplicationError ( kErrorStringInvalidMatrixIndex );
6050     return _returnConstantOrUseCache (0., cache);
6051   }
6052 
6053   if (hDim <= 0L || vDim <= 0L) {
6054     return _returnConstantOrUseCache (0., cache);
6055   }
6056 
6057   if (p->ObjectClass() == MATRIX) {
6058     if (p2 == nil) {
6059       _Matrix * nn = (_Matrix*)p;
6060       if (nn->storageType == 1) {
6061         if (nn->hDim == hDim && nn->vDim == vDim) {
6062           _SimpleList hL,
6063           vL;
6064 
6065           for (long r=0; r<hDim; r++)
6066             for (long c=0; c<vDim; c++)
6067               if ((*nn)(r,c) > 0.0) {
6068                 hL << r;
6069                 vL << c;
6070               }
6071 
6072           return ExtractElementsByEnumeration (&hL,&vL);
6073         } else {
6074           if (nn->hDim > 0 && nn->vDim == 1) { // extract by row
6075             _SimpleList hL;
6076 
6077             for (unsigned long r=0UL; r<nn->hDim; r++) {
6078               long v = floor((*nn)(r,0L));
6079               if (v>=0L && v<hDim) {
6080                 hL<<v;
6081               }
6082             }
6083 
6084             if (hL.lLength) {
6085               _Matrix * result = new _Matrix (hL.lLength,vDim,false,true);
6086               unsigned long k = 0UL;
6087               for (unsigned long r=0UL; r<hL.lLength; r++) {
6088                 unsigned long ri = hL.list_data[r];
6089                 for (unsigned long c=0UL; c<vDim; c++,k++) {
6090                   result->theData[k] = (*this)(ri,c);
6091                 }
6092               }
6093               return result;
6094             }
6095 
6096             return new _Matrix;
6097           } else if (nn->vDim > 0 && nn->hDim == 1) { // extract by column
6098             _SimpleList hL;
6099 
6100             for (long r=0; r<nn->vDim; r++) {
6101               long v = (*nn)(0,r);
6102               if (v>=0 && v<vDim) {
6103                 hL<<v;
6104               }
6105             }
6106 
6107             if (hL.lLength) {
6108               _Matrix * result = new _Matrix (hDim,hL.lLength,false,true);
6109               long k = 0;
6110               for (long c=0; c<hDim; c++)
6111                 for (long r=0; r<hL.lLength; r++,k++) {
6112                   result->theData[k] = (*this)(c,hL.list_data[r]);
6113                 }
6114               return result;
6115             }
6116 
6117             return new _Matrix;
6118           }
6119         }
6120       }
6121       ReportWarning ("Incorrect dimensions or matrix type (must be numeric) for an indexing matrix in call to []");
6122     } else {
6123       if (p2->ObjectClass() == MATRIX) {
6124         _Matrix * nn =  (_Matrix*)((_Matrix*)p)->ComputeNumeric();
6125         _Matrix * nn2 = (_Matrix*)((_Matrix*)p2)->ComputeNumeric();
6126 
6127         if (nn->hDim == 1 && nn->vDim == 2 && nn->storageType == 1 && nn2->hDim == 1 && nn2->vDim == 2 && nn2->storageType == 1) {
6128           long left   = (*nn)(0,0),
6129           top    = (*nn)(0,1),
6130           bottom = (*nn2)(0,1),
6131           right  = (*nn2)(0,0);
6132 
6133           if (left >= 0 && left < hDim && right >= 0 && right < hDim && left <=right &&
6134               top >= 0 && top < vDim && bottom >=0 && bottom < vDim && top <= bottom) {
6135             _SimpleList hL,
6136             vL;
6137 
6138             for (long r=left; r<=right; r++)
6139               for (long c=top; c<=bottom; c++) {
6140                 hL << r;
6141                 vL << c;
6142               }
6143 
6144             _Matrix * subM = ExtractElementsByEnumeration (&hL,&vL);
6145             subM->hDim = right-left+1;
6146             subM->vDim = bottom-top+1;
6147 
6148             return subM;
6149           }
6150         }
6151         ReportWarning ("Incorrect dimensions or matrix type (must be numeric 2x1 matrices) for an rectangular extract in call to []");
6152       }
6153 
6154     }
6155     return new _Constant (0.0);
6156   } else {
6157     if (p->ObjectClass() == STRING) {
6158        _String aFormulaString (((_FString*)p)->get_str());
6159       _Formula f (aFormulaString, currentExecutionList ? currentExecutionList->nameSpacePrefix : nil);
6160 
6161       if (!f.IsEmpty()) {
6162         /* check formula validity */
6163 
6164 
6165           _Variable * cv = CheckReceptacle(&hy_env::matrix_element_value, kEmptyString, false),
6166                     * cr = CheckReceptacle(&hy_env::matrix_element_row, kEmptyString, false),
6167                     * cc = CheckReceptacle(&hy_env::matrix_element_column, kEmptyString, false);
6168 
6169         cv->CheckAndSet (0.0, false, NULL);
6170         cr->CheckAndSet (0.0, false, NULL);
6171         cc->CheckAndSet (0.0, false, NULL);
6172 
6173         f.Compute();
6174         if (terminate_execution) {
6175           return new _Matrix ();
6176         } else {
6177 
6178           _Formula * conditionalCheck = nil;
6179 
6180           if (p2 && p2->ObjectClass() == STRING) {
6181             conditionalCheck = new _Formula (((_FString*)p2)->get_str(), currentExecutionList ? currentExecutionList->nameSpacePrefix : nil);
6182             if (conditionalCheck->IsEmpty()) {
6183               delete conditionalCheck;
6184               conditionalCheck = nil;
6185             }
6186 
6187             conditionalCheck->Compute();
6188             if (terminate_execution) {
6189               delete conditionalCheck;
6190               return new _Matrix ();
6191             }
6192           }
6193 
6194           _Matrix   * retMatrix = new _Matrix (hDim,vDim,false,true);
6195 
6196           long          stackDepth = 0;
6197           _SimpleList   vIndexAux;
6198           _AVLList      vIndex (&vIndexAux);
6199 
6200           if (f.AmISimple (stackDepth,vIndex) && (!conditionalCheck || conditionalCheck->AmISimple(stackDepth,vIndex))) {
6201             _SimpleFormulaDatum * stack     = new _SimpleFormulaDatum [stackDepth+1],
6202             * varValues = new _SimpleFormulaDatum [vIndex.countitems()];
6203 
6204             bool                constantValue = false;
6205             hyFloat          constantV     = f.Compute()->Value();
6206 
6207             if (f.IsConstant()) {
6208               constantValue = true;
6209               constantV     = f.Compute()->Value();
6210             } else {
6211               f.ConvertToSimple (vIndex);
6212             }
6213 
6214 
6215             if (conditionalCheck) {
6216               conditionalCheck->ConvertToSimple(vIndex);
6217             }
6218 
6219             if (constantValue && !conditionalCheck) {
6220               for (long r=0; r<hDim; r++)
6221                 for (long c=0; c<vDim; c++) {
6222                   retMatrix->Store (r,c,constantV);
6223                 }
6224             } else {
6225 
6226               long rid []= {cr->get_index(),cc->get_index(),cv->get_index()};
6227 
6228               for (long k=0; k<3; k++) {
6229                 rid[k] = vIndexAux.Find(rid[k]);
6230               }
6231 
6232               PopulateArraysForASimpleFormula(vIndexAux, varValues);
6233 
6234               for (long r=0; r<hDim; r++) {
6235 
6236                 if (rid[0]>=0) {
6237                   varValues[rid[0]].value = r;
6238                 }
6239 
6240                 for (long c=0; c<vDim; c++) {
6241                   if (rid[1]>=0) {
6242                     varValues[rid[1]].value = c;
6243                   }
6244 
6245                   if (rid[2]>=0) {
6246                     varValues[rid[2]].value = (*this)(r,c);
6247                   }
6248 
6249                   if (conditionalCheck && CheckEqual(conditionalCheck->ComputeSimple(stack,varValues),0.0)) {
6250                     if (rid[2]>=0) {
6251                       retMatrix->Store (r,c,varValues[rid[2]].value);
6252                     } else {
6253                       retMatrix->Store (r,c, (*this)(r,c));
6254                     }
6255                     continue;
6256                   }
6257 
6258                   if (constantValue) {
6259                     retMatrix->Store (r,c,constantV);
6260                   } else {
6261                     //printf ("Formula eval (stack depth= %d) (%d, %g, %g) %g\n", stackDepth, rid[2], varValues[rid[2]], f.ComputeSimple(stack,varValues));
6262 
6263                     retMatrix->Store (r,c,f.ComputeSimple(stack,varValues));
6264                   }
6265                 }
6266               }
6267 
6268               f.ConvertFromSimple (vIndex);
6269             }
6270             if (conditionalCheck) {
6271               conditionalCheck->ConvertFromSimple(vIndex);
6272             }
6273 
6274             delete  [] stack;
6275             delete  [] varValues;
6276           } else {
6277             for (long r=0; r<hDim; r++) {
6278               cr->CheckAndSet (r,false, NULL);
6279               for (long c=0; c<vDim; c++) {
6280                 cc->CheckAndSet (c,false, NULL);
6281                   cv->CheckAndSet ((*this)(r,c),false, NULL);
6282                 HBLObjectRef fv;
6283 
6284                 if (conditionalCheck) {
6285                   fv = conditionalCheck->Compute();
6286                   if (fv->ObjectClass() == NUMBER)
6287                     if (CheckEqual (fv->Value(), 0.0)) {
6288                       retMatrix->Store (r,c,cv->Value());
6289                       continue;
6290                     }
6291                 }
6292 
6293                 fv = f.Compute();
6294                 if (fv->ObjectClass()==NUMBER) {
6295                   retMatrix->Store (r,c,fv->Value());
6296                 }
6297               }
6298             }
6299           }
6300           retMatrix->AmISparse();
6301           if (conditionalCheck) {
6302             delete conditionalCheck;
6303           }
6304           return retMatrix;
6305         }
6306       }
6307       ReportWarning (_String("Invalid formula expression for element-wise matrix operations: ") & ((_FString*)p)->get_str());
6308       return new _Matrix;
6309     }
6310   }
6311 
6312   long    ind1 = p->Value(),
6313   ind2 = -1;
6314 
6315   if (p2) {
6316     ind2 = p2->Value();
6317     // handle the row/column access operations here i.e. [R][-1] or [-1][R]
6318 
6319     if (ind1 == -1 && ind2 >=0 && ind2 <vDim) { // valid column access
6320       _SimpleList hL (hDim,0,1),
6321       vL (hDim,ind2,0);
6322       return ExtractElementsByEnumeration (&hL,&vL,true);
6323     }
6324 
6325     if (ind2 == -1 && ind1 >=0 && ind1 <hDim) { // valid row access
6326       _SimpleList hL (vDim,ind1,0),
6327       vL (vDim,0,1);
6328       return ExtractElementsByEnumeration (&hL,&vL);
6329     }
6330   }
6331 
6332   if (hDim == 1) {
6333     if (ind2<0) {
6334       ind2 = ind1;
6335     }
6336     ind1=0;
6337   }
6338 
6339   if (vDim == 1) {
6340     ind2 = 0;
6341   }
6342 
6343   if (ind2<0) { // allow direct vectorlike indexing, i.e m[21] = m[3][3] (if the dim is *x6)
6344     ind2  = ind1%vDim;
6345     ind1 /=vDim;
6346   }
6347 
6348   if (ind1<0 || ind1>=hDim || ind2>=vDim) {
6349     MatrixIndexError     (ind1,ind2,hDim,vDim);
6350     return _returnConstantOrUseCache (0., cache);
6351   }
6352 
6353   if (ind2>=0) { // element access
6354     return GetMatrixCell (ind1, ind2, cache);
6355   }
6356 
6357   return _returnConstantOrUseCache (0., cache);
6358 }
6359 
6360 //_____________________________________________________________________________________________
GetMatrixCell(long ind1,long ind2,HBLObjectRef cache) const6361 HBLObjectRef _Matrix::GetMatrixCell (long ind1, long ind2, HBLObjectRef cache) const {
6362     if (is_expression_based()) { // formulas
6363       if (!theIndex) {
6364         _Formula * entryFla = (((_Formula**)theData)[ind1*vDim+ind2]);
6365         if (entryFla) {
6366           return (HBLObjectRef)entryFla->Compute()->makeDynamic();
6367         } else {
6368           return _returnConstantOrUseCache (0., cache);
6369         }
6370       } else {
6371         long p = Hash (ind1, ind2);
6372         if (p<0) {
6373           return _returnConstantOrUseCache (0., cache);
6374         } else {
6375           return (HBLObjectRef)(((_Formula**)theData)[p])->Compute()->makeDynamic();
6376         }
6377       }
6378     } else {
6379       if (is_numeric()) {
6380         if (theIndex) {
6381           return _returnConstantOrUseCache ((*this)(ind1,ind2), cache);
6382         } else {
6383           return _returnConstantOrUseCache (theData[ind1*vDim+ind2], cache);
6384         }
6385 
6386       } else {
6387         _MathObject* cell;
6388         if (!theIndex) {
6389           cell = (_MathObject*)GetMatrixObject (ind1*vDim+ind2)->makeDynamic();
6390         } else {
6391           long p = Hash (ind1, ind2);
6392           if (p<0) {
6393             cell = new _Constant (0.0);
6394           } else {
6395             cell = (_MathObject*)GetMatrixObject (p)->makeDynamic();
6396           }
6397         }
6398         return cell;
6399       }
6400     }
6401 }
6402 
6403 
6404 //_____________________________________________________________________________________________
GetFormula(long ind1,long ind2) const6405 _Formula* _Matrix::GetFormula (long ind1, long ind2) const {
6406 
6407     if (hDim == 1) {
6408         if (ind2<0) {
6409             ind2 = ind1;
6410         }
6411         ind1=0;
6412     }
6413 
6414     if (vDim == 1) {
6415         ind2 = 0;
6416     }
6417 
6418     if (ind2<0) {
6419         ind2 = ind1%vDim;
6420         ind1/=vDim;
6421     }
6422 
6423     if ( ind1<0 || ind1>=hDim || ind2>=vDim) {
6424         MatrixIndexError (ind1,ind2,hDim,vDim);
6425         return nil;
6426     }
6427 
6428 
6429     if (ind2>=0) { // element access
6430         if (storageType == 2) { // formulas
6431             if (!theIndex) {
6432                 return (((_Formula**)theData)[ind1*vDim+ind2]);
6433             } else {
6434                 long p = Hash (ind1, ind2);
6435                 if (p<0) {
6436                     return nil;
6437                 } else {
6438                     return (((_Formula**)theData)[p]);
6439                 }
6440             }
6441         }
6442     }
6443 
6444     return nil;
6445 }
6446 
6447 //_____________________________________________________________________________________________
MCoord(HBLObjectRef p,HBLObjectRef p2,HBLObjectRef cachedResult)6448 HBLObjectRef _Matrix::MCoord (HBLObjectRef p, HBLObjectRef p2, HBLObjectRef cachedResult) {
6449     long ind1 = -1L,
6450          ind2 = -1L;
6451 
6452     if (!p) {
6453         HandleApplicationError ( kErrorStringInvalidMatrixIndex );
6454         return new _MathObject;
6455     }
6456 
6457     ind1 = p->Value();
6458     if (p2) {
6459         ind2 = p2->Value();
6460     }
6461 
6462 
6463     if (hDim == 1L) {
6464         if (ind2<0L) {
6465             ind2 = ind1;
6466         }
6467         ind1=0L;
6468     }
6469 
6470     if (vDim == 1L) {
6471         ind2 = 0L;
6472     }
6473 
6474     if (ind2<0L) { // allow direct vectorlike indexing, i.e m[21] = m[3][3] (if the dim is *x6)
6475         ind2 = ind1%vDim;
6476         ind1 = ind1/vDim;
6477     }
6478     _Matrix * res = nil;
6479     if (cachedResult && cachedResult->ObjectClass() == MATRIX) {
6480         res = (_Matrix*)cachedResult;
6481         if (!(res->is_numeric() && res->check_dimension(1,2))){
6482             res = nil;
6483         }
6484     }
6485     if (!res)
6486         res = new _Matrix (1L,2L,false,true);
6487 
6488     res->theData[0]=ind1;
6489     res->theData[1]=ind2;
6490     return res;
6491 
6492 }
6493 
6494 //_____________________________________________________________________________________________
MResolve(HBLObjectRef p,HBLObjectRef p2,long & ind1,long & ind2)6495 bool _Matrix::MResolve (HBLObjectRef p, HBLObjectRef p2, long& ind1, long& ind2)
6496 {
6497     ind1 = -1;
6498     ind2 = -1;
6499 
6500     if (!p) {
6501         HandleApplicationError ( kErrorStringInvalidMatrixIndex );
6502         return false;
6503     }
6504 
6505     ind1 = p->Value();
6506     if (p2) {
6507         ind2 = p2->Value();
6508     }
6509 
6510     return CheckCoordinates (ind1,ind2);
6511 }
6512 
6513 //_____________________________________________________________________________________________
6514 
CheckCoordinates(long & ind1,long & ind2)6515 bool _Matrix::CheckCoordinates (long& ind1, long& ind2)
6516 {
6517     if (hDim == 1) {
6518         if (ind2<0) {
6519             ind2 = ind1;
6520         }
6521         ind1=0;
6522     }
6523 
6524     if (vDim == 1) {
6525         ind2 = 0;
6526     }
6527 
6528     if (ind2<0) { // allow direct vectorlike indexing, i.e m[21] = m[3][3] (if the dim is *x6)
6529         if (vDim > 1) {
6530             ind2 = ind1%vDim;
6531             ind1/= vDim;
6532         } else {
6533             ind2 = 0;
6534         }
6535     }
6536 
6537     if (ind1<0 || ind1>=hDim || ind2>=vDim) {
6538         MatrixIndexError (ind1,ind2, hDim, vDim);
6539         return false;
6540     }
6541     return true;
6542 }
6543 
6544 
6545 //_____________________________________________________________________________________________
MStore(long ind1,long ind2,_Formula & f,long opCode)6546 void _Matrix::MStore (long ind1, long ind2, _Formula& f, long opCode) {
6547     if (ind2>=0) { // element storage
6548         if (is_expression_based()) { // formulas
6549             if (opCode == HY_OP_CODE_ADD) {
6550                 _Formula * addOn = GetFormula(ind1,ind2);
6551                 if (addOn) {
6552                     StoreFormula (ind1,ind2,*_Formula::PatchFormulasTogether(*addOn, f, HY_OP_CODE_ADD),false);
6553                     return;
6554                 }
6555             }
6556             StoreFormula (ind1,ind2,f);
6557         } else {
6558             if (!f.IsAConstant()) {
6559                 Convert2Formulas();
6560                 StoreFormula (ind1,ind2,f);
6561             } else {
6562                 HBLObjectRef res = f.Compute();
6563                 hyFloat toStore = res->Value();
6564                 if (opCode == HY_OP_CODE_ADD) {
6565                     toStore += (*this)(ind1,ind2);
6566                 }
6567                 Store(ind1,ind2,toStore);
6568             }
6569         }
6570     }
6571 }
6572 
6573 //_____________________________________________________________________________________________
MStore(long ind1,long ind2,HBLObjectRef value,long opCode)6574 void _Matrix::MStore (long ind1, long ind2, HBLObjectRef value, long opCode) {
6575     if (ind2>=0) { // element storage
6576         if (is_expression_based()) { // formulas
6577             value->AddAReference();
6578             if (opCode == HY_OP_CODE_ADD) {
6579                 _Formula * addOn = GetFormula(ind1,ind2);
6580                 if (addOn) {
6581                     StoreFormula (ind1,ind2,*_Formula::PatchFormulasTogether(*addOn, value, HY_OP_CODE_ADD),false);
6582                     return;
6583                 }
6584             }
6585             _Formula * f = new _Formula (value);
6586             StoreFormula (ind1,ind2,*f,false);
6587         } else {
6588             if (value->ObjectClass() != NUMBER) {
6589                 Convert2Formulas();
6590                 value->AddAReference();
6591                 _Formula * f = new _Formula (value);
6592                 StoreFormula (ind1,ind2,*f,false);
6593             } else {
6594                 hyFloat toStore = value->Value();
6595                 if (opCode == HY_OP_CODE_ADD) {
6596                     toStore += (*this)(ind1,ind2);
6597                 }
6598                 Store(ind1,ind2,toStore);
6599             }
6600         }
6601     }
6602 }
6603 
6604 //_____________________________________________________________________________________________
MStore(HBLObjectRef p,HBLObjectRef p2,_Formula & f,long opCode)6605 void _Matrix::MStore (HBLObjectRef p, HBLObjectRef p2, _Formula& f, long opCode)
6606 {
6607     long      ind1, ind2;
6608     if (MResolve (p,p2, ind1,ind2)) {
6609         MStore   (ind1,ind2,f, opCode);
6610     }
6611 }
6612 
6613 //_____________________________________________________________________________________________
MStore(HBLObjectRef p,HBLObjectRef p2,HBLObjectRef poly)6614 void _Matrix::MStore (HBLObjectRef p, HBLObjectRef p2, HBLObjectRef poly)
6615 {
6616     long      ind1, ind2;
6617     if (MResolve (p,p2, ind1,ind2)) {
6618         MStore   (ind1,ind2,poly);
6619     }
6620 
6621 }
6622 //_____________________________________________________________________________________________
MStore(long ind1,long ind2,HBLObjectRef poly)6623 void _Matrix::MStore (long ind1, long ind2, HBLObjectRef poly) {
6624     if (ind2>=0) { // element storage
6625         if (storageType == 0) { // formulas
6626             StoreObject (ind1,ind2,poly,true);
6627             if (AUTO_PAD_DIAGONAL) {
6628                 UpdateDiag (ind1,ind2,poly);
6629             }
6630         } else {
6631             _Polynomial* pp = (_Polynomial*)poly;
6632             poly = pp->IsANumber();
6633             if (!poly) { // just a number
6634                 storageType==1?ConvertNumbers2Poly():ConvertFormulas2Poly();
6635                 StoreObject (ind1,ind2,pp,true);
6636             } else {
6637                 (*this)[Hash(ind1,ind2)] = poly->Value();
6638             }
6639         }
6640     }
6641 }
6642 
6643 
6644 //_____________________________________________________________________________________________
operator [](long i)6645 hyFloat&     _Matrix::operator [] (long i) {
6646     if (is_dense()) {
6647       return theData [i];
6648     }
6649 
6650     unsigned long r = (unsigned long)i / vDim,
6651                   c = i - vDim * r;
6652 
6653     long lIndex = Hash (r, c);
6654     if (lIndex == -1) {
6655         IncreaseStorage();
6656         lIndex = Hash (r, c);
6657     }
6658     if (lIndex<0) {
6659         theIndex[-lIndex-2] = i;
6660         return ((hyFloat*)theData)[-lIndex-2];
6661     } else {
6662         return ((hyFloat*)theData)[lIndex];
6663     }
6664 }
6665 
6666 //_____________________________________________________________________________________________
Store(long i,long j,hyFloat value)6667 void        _Matrix::Store (long i, long j, hyFloat value) {
6668     if (storageType!=1) {
6669         return;
6670     }
6671 
6672     long lIndex;
6673 
6674     if (theIndex) {
6675         lIndex = Hash (i, j);
6676 
6677         if (lIndex == -1) {
6678             IncreaseStorage();
6679             lIndex = Hash (i, j);
6680         }
6681     } else {
6682         lIndex = i*vDim + j;
6683     }
6684 
6685     if (lIndex<0) {
6686         theIndex[-lIndex-2] = i*vDim+j;
6687         ((hyFloat*)theData)[-lIndex-2] = value;
6688     } else {
6689         ((hyFloat*)theData)[lIndex] = value;
6690     }
6691 
6692 }
6693 
6694 //_____________________________________________________________________________________________
StoreObject(long i,long j,_MathObject * value,bool dup)6695 void        _Matrix::StoreObject (long i, long j, _MathObject* value, bool dup) {
6696     if (storageType) {
6697         return;
6698     }
6699 
6700     long lIndex = Hash (i, j);
6701     if (lIndex == -1) {
6702         IncreaseStorage();
6703         lIndex = Hash (i, j);
6704     }
6705 
6706     if (dup) {
6707         value = (_MathObject*) value->makeDynamic();
6708     }
6709     if (lIndex<0) {
6710         theIndex[-lIndex-2] = i*vDim+j;
6711         ((_MathObject**)theData)[-lIndex-2] = value;
6712     } else {
6713         DeleteObject (GetMatrixObject(lIndex));
6714         ((_MathObject**)theData)[lIndex] = value;
6715     }
6716     if (AUTO_PAD_DIAGONAL) { // correct the diagonal entry
6717     }
6718 
6719 }
6720 //_____________________________________________________________________________________________
6721 
UpdateDiag(long i,long j,_MathObject * value)6722 void        _Matrix::UpdateDiag  (long i,long j, _MathObject* value)
6723 {
6724     if (i!=j) {
6725         _MathObject * diagCell = nil, *newCell;
6726         if (!theIndex) {
6727             diagCell = GetMatrixObject(i*hDim+i);
6728         } else {
6729             long lIndex = Hash (i,i);
6730             if (lIndex>=0) {
6731                 diagCell = GetMatrixObject(lIndex);
6732             }
6733         }
6734         if (!diagCell) {
6735             newCell = value->Minus();
6736         } else {
6737             newCell = diagCell->Sub(value);
6738         }
6739         StoreObject(i,i,newCell,false);
6740     }
6741 }
6742 //_____________________________________________________________________________________________
StoreObject(long k,_MathObject * value,bool dup)6743 void        _Matrix::StoreObject (long k, _MathObject* value, bool dup)
6744 {
6745     StoreObject (k/vDim, k%vDim, value, dup);
6746 }
6747 
6748 //_____________________________________________________________________________________________
StoreFormula(long i,long j,_Formula & f,bool copyF,bool simplify)6749 void        _Matrix::StoreFormula (long i, long j, _Formula& f, bool copyF, bool simplify) {
6750 
6751     auto do_simplify = [] (_Formula *f) -> void {
6752         _Polynomial*    is_poly = (_Polynomial*)f->ConstructPolynomial();
6753         if (is_poly) {
6754             _Formula pf (is_poly);
6755             f->Duplicate(&pf);
6756         }
6757         f->SimplifyConstants();
6758     };
6759 
6760     if (is_expression_based()) {
6761         long lIndex = Hash (i, j);
6762         if (lIndex == -1) {
6763             IncreaseStorage();
6764             lIndex = Hash (i, j);
6765         }
6766 
6767         //printf ("In (%d) %s\n",simplify, _String ((_String*)f.toStr (kFormulaStringConversionNormal)).get_str());
6768 
6769         if (lIndex<0) {
6770             theIndex[-lIndex-2] = i*vDim+j;
6771             ((_Formula**)theData)[-lIndex-2] = copyF?(_Formula*)f.makeDynamic():&f;
6772             if (simplify) {
6773                 do_simplify (((_Formula**)theData)[-lIndex-2]);
6774                 //((_Formula**)theData)[-lIndex-2]->SimplifyConstants();
6775             }
6776         } else {
6777             if (((_Formula**)theData)[lIndex]!=(_Formula*)ZEROPOINTER) {
6778                 delete ((_Formula**)theData)[lIndex];
6779             }
6780             ((_Formula**)theData)[lIndex] = copyF?(_Formula*)f.makeDynamic():&f;
6781             if (simplify) {
6782                 do_simplify (((_Formula**)theData)[lIndex]);
6783                 //((_Formula**)theData)[lIndex]->SimplifyConstants();
6784             }
6785         }
6786         //printf ("Stored %s\n", _String ((_String*)(((_Formula**)theData)[lIndex >= 0 ? lIndex : -lIndex-2])->toStr (kFormulaStringConversionNormal)).get_str());
6787 
6788 
6789         CheckIfSparseEnough();
6790     }
6791 }
6792 
6793 //_____________________________________________________________________________________________
6794 
6795 
Swap(_Matrix & m)6796 void        _Matrix::Swap (_Matrix& m){
6797     Exchange(theData,m.theData);
6798     Exchange(hDim,m.hDim);
6799     Exchange(vDim,m.vDim);
6800     Exchange(lDim,m.lDim);
6801     Exchange(theIndex,m.theIndex);
6802     Exchange(storageType,m.storageType);
6803     Exchange(bufferPerRow,m.bufferPerRow);
6804     Exchange(overflowBuffer,m.overflowBuffer);
6805     Exchange(allocationBlock,m.allocationBlock);
6806     Exchange(theValue,m.theValue);
6807     Exchange(cmd,m.cmd);
6808 }
6809 
6810 //_____________________________________________________________________________________________
6811 
AplusBx(_Matrix & B,hyFloat x)6812 void        _Matrix::AplusBx (_Matrix& B, hyFloat x)
6813 {
6814     _Matrix temp (B);
6815     temp *= x;
6816     *this+=temp;
6817 }
6818 
6819 //#define _SLKP_USE_SSE_INTRINSICS
6820 
6821 //_____________________________________________________________________________________________
Sqr(hyFloat * _hprestrict_ stash)6822 hyFloat        _Matrix::Sqr (hyFloat* _hprestrict_ stash) {
6823 
6824     hyFloat diff = 0.;
6825     if (hDim != vDim) {
6826         return diff;
6827     }
6828     // not a square matrix
6829 
6830     if (! (is_dense() && is_numeric()) ) {
6831         // sparse or non-numeric matrix
6832         _Matrix temp (hDim, vDim, storageType==0?theIndex!=nil:false, storageType);
6833         Multiply (temp, *this);
6834         Swap(temp);
6835         return DBL_EPSILON * 1.e4;
6836     } else {
6837         if (hDim==4) {
6838             // special case for nucleotides
6839             for (unsigned long i=0UL, k = 0UL; i<16; i+=4) {
6840                 for (unsigned long j=0UL; j<4UL; j++, k++) {
6841                    hyFloat p1 = theData[i]   * theData [j];
6842                    hyFloat p2 = theData[i+1] * theData [j+4];
6843                    p1 += theData[i+2] * theData [j+8];
6844                    p2 += theData[i+3] * theData [j+12];
6845                    stash[k] = p1+p2;
6846                 }
6847             }
6848         } else {
6849             long loopBound = (vDim >> 2) << 2;
6850             //vDim - vDim % 4;
6851             // loop interchange rocks!
6852 
6853 #ifdef  _SLKP_USE_ARM_NEON
6854     #define DO_GROUP_OP0(X,Y,k) Y = vld1q_f64(theData + col_offset + k); X = vmulq_f64(A4, Y);
6855     #define DO_GROUP_OP1(X,Y,k) X = vld1q_f64(dest + row_offset + k); Y = vld1q_f64(theData + col_offset + k); X = vfmaq_f64 (X,A4,Y);
6856     #define DO_GROUP_OP2(X,k) vst1q_f64 (dest + row_offset + k,X);
6857 
6858         if (true) {
6859             hyFloat  * _hprestrict_ dest = stash;
6860 
6861             long ti = 0L,
6862                  row_offset = 0L;
6863 
6864             if (loopBound == 60UL) {
6865                 if (hDim == 61) { // special case for universal code
6866 
6867                     for (long r = 0; r < 61; r++, row_offset += 61) {
6868                         long col_offset = 0L;
6869                         { // row 1
6870                             float64x2_t A4 = vdupq_n_f64 (theData[ti]);
6871                             float64x2_t X1, X2, X3, X4,
6872                                         Y1, Y2, Y3, Y4;
6873 
6874                             DO_GROUP_OP0 (X1, Y1, 0); DO_GROUP_OP0 (X2, Y2, 2); DO_GROUP_OP0 (X3, Y3, 4); DO_GROUP_OP0 (X4, Y4, 6);
6875                             DO_GROUP_OP2 (X1,0); DO_GROUP_OP2 (X2,2); DO_GROUP_OP2 (X3,4); DO_GROUP_OP2 (X4,6);
6876                             DO_GROUP_OP0 (X1, Y1, 8); DO_GROUP_OP0 (X2, Y2, 10); DO_GROUP_OP0 (X3, Y3, 12); DO_GROUP_OP0 (X4, Y4, 14);
6877                             DO_GROUP_OP2 (X1,8); DO_GROUP_OP2 (X2,10); DO_GROUP_OP2 (X3,12); DO_GROUP_OP2 (X4,14);
6878                             DO_GROUP_OP0 (X1, Y1, 16); DO_GROUP_OP0 (X2, Y2, 18); DO_GROUP_OP0 (X3, Y3, 20); DO_GROUP_OP0 (X4, Y4, 22);
6879                             DO_GROUP_OP2 (X1,16); DO_GROUP_OP2 (X2,18); DO_GROUP_OP2 (X3,20); DO_GROUP_OP2 (X4,22);
6880                             DO_GROUP_OP0 (X1, Y1, 24); DO_GROUP_OP0 (X2, Y2, 26); DO_GROUP_OP0 (X3, Y3, 28); DO_GROUP_OP0 (X4, Y4, 30);
6881                             DO_GROUP_OP2 (X1,24); DO_GROUP_OP2 (X2,26); DO_GROUP_OP2 (X3,28); DO_GROUP_OP2 (X4,30);
6882                             DO_GROUP_OP0 (X1, Y1, 32); DO_GROUP_OP0 (X2, Y2, 34); DO_GROUP_OP0 (X3, Y3, 36); DO_GROUP_OP0 (X4, Y4, 38);
6883                             DO_GROUP_OP2 (X1,32); DO_GROUP_OP2 (X2,34); DO_GROUP_OP2 (X3,36); DO_GROUP_OP2 (X4,38);
6884                             DO_GROUP_OP0 (X1, Y1, 40); DO_GROUP_OP0 (X2, Y2, 42); DO_GROUP_OP0 (X3, Y3, 44); DO_GROUP_OP0 (X4, Y4, 46);
6885                             DO_GROUP_OP2 (X1,40); DO_GROUP_OP2 (X2,42); DO_GROUP_OP2 (X3,44); DO_GROUP_OP2 (X4,46);
6886                             DO_GROUP_OP0 (X1, Y1, 48); DO_GROUP_OP0 (X2, Y2, 50); DO_GROUP_OP0 (X3, Y3, 52); DO_GROUP_OP0 (X4, Y4, 54);
6887                             DO_GROUP_OP2 (X1,48); DO_GROUP_OP2 (X2,50); DO_GROUP_OP2 (X3,52); DO_GROUP_OP2 (X4,54);
6888                             DO_GROUP_OP0 (X1, Y1, 56); DO_GROUP_OP0 (X2, Y2, 58);
6889                             DO_GROUP_OP2 (X1,56); DO_GROUP_OP2 (X2,58);
6890 
6891 
6892                             dest[row_offset + 60] = theData[ti] * theData[60];
6893                             col_offset = 61;
6894                             ti++;
6895                         }
6896 
6897                         for (long c = 1; c < 61; c++, ti++, col_offset += 61) {
6898                             float64x2_t A4 = vdupq_n_f64 (theData[ti]);
6899                             float64x2_t X1, X2, X3, X4,
6900                                         Y1, Y2, Y3, Y4;
6901 
6902                             DO_GROUP_OP1 (X1, Y1, 0); DO_GROUP_OP1 (X2, Y2, 2); DO_GROUP_OP1 (X3, Y3, 4); DO_GROUP_OP1 (X4, Y4, 6);
6903                             DO_GROUP_OP2 (X1,0); DO_GROUP_OP2 (X2,2); DO_GROUP_OP2 (X3,4); DO_GROUP_OP2 (X4,6);
6904                             DO_GROUP_OP1 (X1, Y1, 8); DO_GROUP_OP1 (X2, Y2, 10); DO_GROUP_OP1 (X3, Y3, 12); DO_GROUP_OP1 (X4, Y4, 14);
6905                             DO_GROUP_OP2 (X1,8); DO_GROUP_OP2 (X2,10); DO_GROUP_OP2 (X3,12); DO_GROUP_OP2 (X4,14);
6906                             DO_GROUP_OP1 (X1, Y1, 16); DO_GROUP_OP1 (X2, Y2, 18); DO_GROUP_OP1 (X3, Y3, 20); DO_GROUP_OP1 (X4, Y4, 22);
6907                             DO_GROUP_OP2 (X1,16); DO_GROUP_OP2 (X2,18); DO_GROUP_OP2 (X3,20); DO_GROUP_OP2 (X4,22);
6908                             DO_GROUP_OP1 (X1, Y1, 24); DO_GROUP_OP1 (X2, Y2, 26); DO_GROUP_OP1 (X3, Y3, 28); DO_GROUP_OP1 (X4, Y4, 30);
6909                             DO_GROUP_OP2 (X1,24); DO_GROUP_OP2 (X2,26); DO_GROUP_OP2 (X3,28); DO_GROUP_OP2 (X4,30);
6910                             DO_GROUP_OP1 (X1, Y1, 32); DO_GROUP_OP1 (X2, Y2, 34); DO_GROUP_OP1 (X3, Y3, 36); DO_GROUP_OP1 (X4, Y4, 38);
6911                             DO_GROUP_OP2 (X1,32); DO_GROUP_OP2 (X2,34); DO_GROUP_OP2 (X3,36); DO_GROUP_OP2 (X4,38);
6912                             DO_GROUP_OP1 (X1, Y1, 40); DO_GROUP_OP1 (X2, Y2, 42); DO_GROUP_OP1 (X3, Y3, 44); DO_GROUP_OP1 (X4, Y4, 46);
6913                             DO_GROUP_OP2 (X1,40); DO_GROUP_OP2 (X2,42); DO_GROUP_OP2 (X3,44); DO_GROUP_OP2 (X4,46);
6914                             DO_GROUP_OP1 (X1, Y1, 48); DO_GROUP_OP1 (X2, Y2, 50); DO_GROUP_OP1 (X3, Y3, 52); DO_GROUP_OP1 (X4, Y4, 54);
6915                             DO_GROUP_OP2 (X1,48); DO_GROUP_OP2 (X2,50); DO_GROUP_OP2 (X3,52); DO_GROUP_OP2 (X4,54);
6916                             DO_GROUP_OP1 (X1, Y1, 56); DO_GROUP_OP1 (X2, Y2, 58);
6917                             DO_GROUP_OP2 (X1,56); DO_GROUP_OP2 (X2,58);
6918 
6919                             dest[row_offset + 60] += theData[ti] * theData[col_offset + 60];
6920                         }
6921                     }
6922                 } else {
6923                     for (long r = 0; r < hDim; r++, row_offset += vDim) {
6924                         long col_offset = 0L;
6925                         { // row 1
6926                             float64x2_t A4 = vdupq_n_f64 (theData[ti]);
6927                             float64x2_t X1, X2, X3, X4,
6928                                         Y1, Y2, Y3, Y4;
6929 
6930                             DO_GROUP_OP0 (X1, Y1, 0); DO_GROUP_OP0 (X2, Y2, 2); DO_GROUP_OP0 (X3, Y3, 4); DO_GROUP_OP0 (X4, Y4, 6);
6931                             DO_GROUP_OP2 (X1,0); DO_GROUP_OP2 (X2,2); DO_GROUP_OP2 (X3,4); DO_GROUP_OP2 (X4,6);
6932                             DO_GROUP_OP0 (X1, Y1, 8); DO_GROUP_OP0 (X2, Y2, 10); DO_GROUP_OP0 (X3, Y3, 12); DO_GROUP_OP0 (X4, Y4, 14);
6933                             DO_GROUP_OP2 (X1,8); DO_GROUP_OP2 (X2,10); DO_GROUP_OP2 (X3,12); DO_GROUP_OP2 (X4,14);
6934                             DO_GROUP_OP0 (X1, Y1, 16); DO_GROUP_OP0 (X2, Y2, 18); DO_GROUP_OP0 (X3, Y3, 20); DO_GROUP_OP0 (X4, Y4, 22);
6935                             DO_GROUP_OP2 (X1,16); DO_GROUP_OP2 (X2,18); DO_GROUP_OP2 (X3,20); DO_GROUP_OP2 (X4,22);
6936                             DO_GROUP_OP0 (X1, Y1, 24); DO_GROUP_OP0 (X2, Y2, 26); DO_GROUP_OP0 (X3, Y3, 28); DO_GROUP_OP0 (X4, Y4, 30);
6937                             DO_GROUP_OP2 (X1,24); DO_GROUP_OP2 (X2,26); DO_GROUP_OP2 (X3,28); DO_GROUP_OP2 (X4,30);
6938                             DO_GROUP_OP0 (X1, Y1, 32); DO_GROUP_OP0 (X2, Y2, 34); DO_GROUP_OP0 (X3, Y3, 36); DO_GROUP_OP0 (X4, Y4, 38);
6939                             DO_GROUP_OP2 (X1,32); DO_GROUP_OP2 (X2,34); DO_GROUP_OP2 (X3,36); DO_GROUP_OP2 (X4,38);
6940                             DO_GROUP_OP0 (X1, Y1, 40); DO_GROUP_OP0 (X2, Y2, 42); DO_GROUP_OP0 (X3, Y3, 44); DO_GROUP_OP0 (X4, Y4, 46);
6941                             DO_GROUP_OP2 (X1,40); DO_GROUP_OP2 (X2,42); DO_GROUP_OP2 (X3,44); DO_GROUP_OP2 (X4,46);
6942                             DO_GROUP_OP0 (X1, Y1, 48); DO_GROUP_OP0 (X2, Y2, 50); DO_GROUP_OP0 (X3, Y3, 52); DO_GROUP_OP0 (X4, Y4, 54);
6943                             DO_GROUP_OP2 (X1,48); DO_GROUP_OP2 (X2,50); DO_GROUP_OP2 (X3,52); DO_GROUP_OP2 (X4,54);
6944                             DO_GROUP_OP0 (X1, Y1, 56); DO_GROUP_OP0 (X2, Y2, 58);
6945                             DO_GROUP_OP2 (X1,56); DO_GROUP_OP2 (X2,58);
6946 
6947 
6948                             for (long k = 60; k < vDim; k++) {
6949                                 dest[row_offset + k] = theData[ti] * theData[k];
6950                             }
6951                             col_offset = vDim;
6952                             ti++;
6953                         }
6954 
6955                         for (long c = 1; c < hDim; c++, ti++, col_offset += vDim) {
6956                             float64x2_t A4 = vdupq_n_f64 (theData[ti]);
6957                             float64x2_t X1, X2, X3, X4,
6958                                         Y1, Y2, Y3, Y4;
6959 
6960                             DO_GROUP_OP1 (X1, Y1, 0); DO_GROUP_OP1 (X2, Y2, 2); DO_GROUP_OP1 (X3, Y3, 4); DO_GROUP_OP1 (X4, Y4, 6);
6961                             DO_GROUP_OP2 (X1,0); DO_GROUP_OP2 (X2,2); DO_GROUP_OP2 (X3,4); DO_GROUP_OP2 (X4,6);
6962                             DO_GROUP_OP1 (X1, Y1, 8); DO_GROUP_OP1 (X2, Y2, 10); DO_GROUP_OP1 (X3, Y3, 12); DO_GROUP_OP1 (X4, Y4, 14);
6963                             DO_GROUP_OP2 (X1,8); DO_GROUP_OP2 (X2,10); DO_GROUP_OP2 (X3,12); DO_GROUP_OP2 (X4,14);
6964                             DO_GROUP_OP1 (X1, Y1, 16); DO_GROUP_OP1 (X2, Y2, 18); DO_GROUP_OP1 (X3, Y3, 20); DO_GROUP_OP1 (X4, Y4, 22);
6965                             DO_GROUP_OP2 (X1,16); DO_GROUP_OP2 (X2,18); DO_GROUP_OP2 (X3,20); DO_GROUP_OP2 (X4,22);
6966                             DO_GROUP_OP1 (X1, Y1, 24); DO_GROUP_OP1 (X2, Y2, 26); DO_GROUP_OP1 (X3, Y3, 28); DO_GROUP_OP1 (X4, Y4, 30);
6967                             DO_GROUP_OP2 (X1,24); DO_GROUP_OP2 (X2,26); DO_GROUP_OP2 (X3,28); DO_GROUP_OP2 (X4,30);
6968                             DO_GROUP_OP1 (X1, Y1, 32); DO_GROUP_OP1 (X2, Y2, 34); DO_GROUP_OP1 (X3, Y3, 36); DO_GROUP_OP1 (X4, Y4, 38);
6969                             DO_GROUP_OP2 (X1,32); DO_GROUP_OP2 (X2,34); DO_GROUP_OP2 (X3,36); DO_GROUP_OP2 (X4,38);
6970                             DO_GROUP_OP1 (X1, Y1, 40); DO_GROUP_OP1 (X2, Y2, 42); DO_GROUP_OP1 (X3, Y3, 44); DO_GROUP_OP1 (X4, Y4, 46);
6971                             DO_GROUP_OP2 (X1,40); DO_GROUP_OP2 (X2,42); DO_GROUP_OP2 (X3,44); DO_GROUP_OP2 (X4,46);
6972                             DO_GROUP_OP1 (X1, Y1, 48); DO_GROUP_OP1 (X2, Y2, 50); DO_GROUP_OP1 (X3, Y3, 52); DO_GROUP_OP1 (X4, Y4, 54);
6973                             DO_GROUP_OP2 (X1,48); DO_GROUP_OP2 (X2,50); DO_GROUP_OP2 (X3,52); DO_GROUP_OP2 (X4,54);
6974                             DO_GROUP_OP1 (X1, Y1, 56); DO_GROUP_OP1 (X2, Y2, 58);
6975                             DO_GROUP_OP2 (X1,56); DO_GROUP_OP2 (X2,58);
6976 
6977                             for (long k = 60; k < vDim; k++) {
6978                                 dest[row_offset + k] += theData[ti] * theData[col_offset + k];
6979                             }
6980                         }
6981                     }
6982                 }
6983             } else {
6984                 for (long r = 0; r < hDim; r++, row_offset += vDim) {
6985                     long col_offset = 0L;
6986                     { // row 1
6987                         float64x2_t A4 = vdupq_n_f64 (theData[ti]);
6988                         #pragma GCC unroll 4
6989                         #pragma clang loop vectorize(enable)
6990                         #pragma clang loop interleave(enable)
6991                         #pragma clang loop unroll(enable)
6992                         #pragma GCC ivdep
6993                         #pragma ivdep
6994                         for (long k = 0; k < loopBound; k+=4) {
6995                             float64x2_t D4, B4, X1, X2;
6996                             DO_GROUP_OP0 (D4, B4, k);
6997                             DO_GROUP_OP0 (X1, X2, k+2);
6998                             DO_GROUP_OP2 (D4,k);
6999                             DO_GROUP_OP2 (X1,k+2);
7000                         }
7001 
7002                         for (long k = loopBound; k < vDim; k++) {
7003                             dest[row_offset + k] = theData[ti] * theData[k];
7004                         }
7005                         col_offset = vDim;
7006                         ti++;
7007                     }
7008 
7009                     for (long c = 1; c < hDim; c++, ti++, col_offset += vDim) {
7010                         float64x2_t A4 = vdupq_n_f64 (theData[ti]);
7011                         for (long k = 0; k < loopBound; k+=4) {
7012                             float64x2_t D4, B4, X1, X2;
7013                             DO_GROUP_OP1 (D4, B4, k);
7014                             DO_GROUP_OP1 (X1, X2, k+2);
7015                             DO_GROUP_OP2 (D4,k);
7016                             DO_GROUP_OP2 (X1,k+2);
7017                         }
7018 
7019                         for (long k = loopBound; k < vDim; k++) {
7020                             dest[row_offset + k] += theData[ti] * theData[col_offset + k];
7021                         }
7022                     }
7023                 }
7024             }
7025         } else {
7026 
7027 #endif
7028 
7029 #ifdef  _SLKP_USE_AVX_INTRINSICS
7030             #define DO_GROUP_OP0(X,Y,k) Y = _mm256_loadu_pd(theData + col_offset + k); X = _mm256_mul_pd(A4, Y);
7031             #ifdef _SLKP_USE_FMA3_INTRINSICS
7032                 #define DO_GROUP_OP1(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k); Y = _mm256_loadu_pd(theData + col_offset + k); X = _mm256_fmadd_pd (A4,Y,X);
7033                 #define DO_GROUP_OP2(X,k) _mm256_storeu_pd (dest + row_offset + k,X);
7034             #else
7035                 #define DO_GROUP_OP1(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k); Y = _mm256_loadu_pd(theData + col_offset + k); X = _mm256_add_pd (X, _mm256_mul_pd(A4, Y));
7036                 #define DO_GROUP_OP2(X,k) _mm256_storeu_pd (dest + row_offset + k,X);
7037            #endif
7038            if (true) {
7039                hyFloat  * _hprestrict_ dest = stash;
7040 
7041                long ti = 0L,
7042                     row_offset = 0L;
7043 
7044                if (loopBound == 60UL) { // codons
7045                    if (hDim == 61) { // special case universal genetic code
7046                        for (long r = 0; r < 61; r++, row_offset += 61) {
7047                            long col_offset = 0L;
7048 
7049                            {  // handle first row separately to zero out the dest entries
7050                                __m256d A4 = _mm256_set1_pd(theData[ti]);
7051                                __m256d D4_1, D4_2, D4_3, D4_4;
7052                                __m256d B4_1, B4_2, B4_3, B4_4;
7053 
7054                                DO_GROUP_OP0 (D4_1,B4_1,0);
7055                                DO_GROUP_OP0 (D4_2,B4_2,4);
7056                                DO_GROUP_OP0 (D4_3,B4_3,8);
7057                                DO_GROUP_OP0 (D4_4,B4_4,12);
7058                                DO_GROUP_OP2 (D4_1,0);
7059                                DO_GROUP_OP2 (D4_2,4);
7060                                DO_GROUP_OP2 (D4_3,8);
7061                                DO_GROUP_OP2 (D4_4,12);
7062 
7063                                DO_GROUP_OP0 (D4_1,B4_1,16);
7064                                DO_GROUP_OP0 (D4_2,B4_2,20);
7065                                DO_GROUP_OP0 (D4_3,B4_3,24);
7066                                DO_GROUP_OP0 (D4_4,B4_4,28);
7067                                DO_GROUP_OP2 (D4_1,16);
7068                                DO_GROUP_OP2 (D4_2,20);
7069                                DO_GROUP_OP2 (D4_3,24);
7070                                DO_GROUP_OP2 (D4_4,28);
7071 
7072                                DO_GROUP_OP0 (D4_1,B4_1,32);
7073                                DO_GROUP_OP0 (D4_2,B4_2,36);
7074                                DO_GROUP_OP0 (D4_3,B4_3,40);
7075                                DO_GROUP_OP0 (D4_4,B4_4,44);
7076                                DO_GROUP_OP2 (D4_1,32);
7077                                DO_GROUP_OP2 (D4_2,36);
7078                                DO_GROUP_OP2 (D4_3,40);
7079                                DO_GROUP_OP2 (D4_4,44);
7080 
7081                                DO_GROUP_OP0 (D4_1,B4_1,48);
7082                                DO_GROUP_OP0 (D4_2,B4_2,52);
7083                                DO_GROUP_OP0 (D4_3,B4_3,56);
7084                                DO_GROUP_OP2 (D4_1,48);
7085                                DO_GROUP_OP2 (D4_2,52);
7086                                DO_GROUP_OP2 (D4_3,56);
7087 
7088                                dest[row_offset + 60] = theData[ti] * theData[60];
7089                                ti++;
7090                                col_offset = vDim;
7091                            }
7092                            for (long c = 1; c < 61; c++, ti++, col_offset += 61) {
7093                                __m256d A4 = _mm256_set1_pd(theData[ti]);
7094 
7095                                __m256d D4_1, D4_2, D4_3, D4_4;
7096                                __m256d B4_1, B4_2, B4_3, B4_4;
7097 
7098                                DO_GROUP_OP1 (D4_1,B4_1,0);
7099                                DO_GROUP_OP1 (D4_2,B4_2,4);
7100                                DO_GROUP_OP1 (D4_3,B4_3,8);
7101                                DO_GROUP_OP1 (D4_4,B4_4,12);
7102                                DO_GROUP_OP2 (D4_1,0);
7103                                DO_GROUP_OP2 (D4_2,4);
7104                                DO_GROUP_OP2 (D4_3,8);
7105                                DO_GROUP_OP2 (D4_4,12);
7106 
7107                                DO_GROUP_OP1 (D4_1,B4_1,16);
7108                                DO_GROUP_OP1 (D4_2,B4_2,20);
7109                                DO_GROUP_OP1 (D4_3,B4_3,24);
7110                                DO_GROUP_OP1 (D4_4,B4_4,28);
7111                                DO_GROUP_OP2 (D4_1,16);
7112                                DO_GROUP_OP2 (D4_2,20);
7113                                DO_GROUP_OP2 (D4_3,24);
7114                                DO_GROUP_OP2 (D4_4,28);
7115 
7116                                DO_GROUP_OP1 (D4_1,B4_1,32);
7117                                DO_GROUP_OP1 (D4_2,B4_2,36);
7118                                DO_GROUP_OP1 (D4_3,B4_3,40);
7119                                DO_GROUP_OP1 (D4_4,B4_4,44);
7120                                DO_GROUP_OP2 (D4_1,32);
7121                                DO_GROUP_OP2 (D4_2,36);
7122                                DO_GROUP_OP2 (D4_3,40);
7123                                DO_GROUP_OP2 (D4_4,44);
7124 
7125                                DO_GROUP_OP1 (D4_1,B4_1,48);
7126                                DO_GROUP_OP1 (D4_2,B4_2,52);
7127                                DO_GROUP_OP1 (D4_3,B4_3,56);
7128                                DO_GROUP_OP2 (D4_1,48);
7129                                DO_GROUP_OP2 (D4_2,52);
7130                                DO_GROUP_OP2 (D4_3,56);
7131 
7132                                dest[row_offset + 60] += theData[ti] * theData[col_offset + 60];
7133 
7134                            }
7135                        }
7136 
7137                    } else {
7138                        for (long r = 0; r < hDim; r++, row_offset += vDim) {
7139                            long col_offset = 0L;
7140 
7141                            {  // handle first row separately to zero out the dest entries
7142                                __m256d A4 = _mm256_set1_pd(theData[ti]);
7143                                __m256d D4_1, D4_2, D4_3, D4_4;
7144                                __m256d B4_1, B4_2, B4_3, B4_4;
7145 
7146                                DO_GROUP_OP0 (D4_1,B4_1,0);
7147                                DO_GROUP_OP0 (D4_2,B4_2,4);
7148                                DO_GROUP_OP0 (D4_3,B4_3,8);
7149                                DO_GROUP_OP0 (D4_4,B4_4,12);
7150                                DO_GROUP_OP2 (D4_1,0);
7151                                DO_GROUP_OP2 (D4_2,4);
7152                                DO_GROUP_OP2 (D4_3,8);
7153                                DO_GROUP_OP2 (D4_4,12);
7154 
7155                                DO_GROUP_OP0 (D4_1,B4_1,16);
7156                                DO_GROUP_OP0 (D4_2,B4_2,20);
7157                                DO_GROUP_OP0 (D4_3,B4_3,24);
7158                                DO_GROUP_OP0 (D4_4,B4_4,28);
7159                                DO_GROUP_OP2 (D4_1,16);
7160                                DO_GROUP_OP2 (D4_2,20);
7161                                DO_GROUP_OP2 (D4_3,24);
7162                                DO_GROUP_OP2 (D4_4,28);
7163 
7164                                DO_GROUP_OP0 (D4_1,B4_1,32);
7165                                DO_GROUP_OP0 (D4_2,B4_2,36);
7166                                DO_GROUP_OP0 (D4_3,B4_3,40);
7167                                DO_GROUP_OP0 (D4_4,B4_4,44);
7168                                DO_GROUP_OP2 (D4_1,32);
7169                                DO_GROUP_OP2 (D4_2,36);
7170                                DO_GROUP_OP2 (D4_3,40);
7171                                DO_GROUP_OP2 (D4_4,44);
7172 
7173                                DO_GROUP_OP0 (D4_1,B4_1,48);
7174                                DO_GROUP_OP0 (D4_2,B4_2,52);
7175                                DO_GROUP_OP0 (D4_3,B4_3,56);
7176                                DO_GROUP_OP2 (D4_1,48);
7177                                DO_GROUP_OP2 (D4_2,52);
7178                                DO_GROUP_OP2 (D4_3,56);
7179 
7180                                for (long k = loopBound; k < vDim; k++) {
7181                                    dest[row_offset + k] = theData[ti] * theData[k];
7182                                }
7183                                ti++;
7184                                col_offset = vDim;
7185                            }
7186                            for (long c = 1; c < hDim; c++, ti++, col_offset += vDim) {
7187                                __m256d A4 = _mm256_set1_pd(theData[ti]);
7188 
7189                                __m256d D4_1, D4_2, D4_3, D4_4;
7190                                __m256d B4_1, B4_2, B4_3, B4_4;
7191 
7192                                DO_GROUP_OP1 (D4_1,B4_1,0);
7193                                DO_GROUP_OP1 (D4_2,B4_2,4);
7194                                DO_GROUP_OP1 (D4_3,B4_3,8);
7195                                DO_GROUP_OP1 (D4_4,B4_4,12);
7196                                DO_GROUP_OP2 (D4_1,0);
7197                                DO_GROUP_OP2 (D4_2,4);
7198                                DO_GROUP_OP2 (D4_3,8);
7199                                DO_GROUP_OP2 (D4_4,12);
7200 
7201                                DO_GROUP_OP1 (D4_1,B4_1,16);
7202                                DO_GROUP_OP1 (D4_2,B4_2,20);
7203                                DO_GROUP_OP1 (D4_3,B4_3,24);
7204                                DO_GROUP_OP1 (D4_4,B4_4,28);
7205                                DO_GROUP_OP2 (D4_1,16);
7206                                DO_GROUP_OP2 (D4_2,20);
7207                                DO_GROUP_OP2 (D4_3,24);
7208                                DO_GROUP_OP2 (D4_4,28);
7209 
7210                                DO_GROUP_OP1 (D4_1,B4_1,32);
7211                                DO_GROUP_OP1 (D4_2,B4_2,36);
7212                                DO_GROUP_OP1 (D4_3,B4_3,40);
7213                                DO_GROUP_OP1 (D4_4,B4_4,44);
7214                                DO_GROUP_OP2 (D4_1,32);
7215                                DO_GROUP_OP2 (D4_2,36);
7216                                DO_GROUP_OP2 (D4_3,40);
7217                                DO_GROUP_OP2 (D4_4,44);
7218 
7219                                DO_GROUP_OP1 (D4_1,B4_1,48);
7220                                DO_GROUP_OP1 (D4_2,B4_2,52);
7221                                DO_GROUP_OP1 (D4_3,B4_3,56);
7222                                DO_GROUP_OP2 (D4_1,48);
7223                                DO_GROUP_OP2 (D4_2,52);
7224                                DO_GROUP_OP2 (D4_3,56);
7225 
7226                                for (long k = loopBound; k < vDim; k++) {
7227                                    dest[row_offset + k] += theData[ti] * theData[col_offset + k];
7228                                }
7229                            }
7230                        }
7231                    }
7232                } else { // something else
7233                    for (long r = 0; r < hDim; r++, row_offset += vDim) {
7234                        long col_offset = 0L;
7235 
7236                        {
7237                            __m256d A4 = _mm256_set1_pd(theData[ti]);
7238                            for (long k = 0; k < loopBound; k+=4) {
7239                                __m256d D4, B4;
7240                                DO_GROUP_OP0 (D4, B4, k);
7241                                DO_GROUP_OP2 (D4,k);
7242                            }
7243 
7244                            for (long k = loopBound; k < vDim; k++) {
7245                                dest[row_offset + k] = theData[ti] * theData[k];
7246                            }
7247                            col_offset = vDim;
7248                            ti++;
7249                        }
7250 
7251                        for (long c = 1; c < hDim; c++, ti++, col_offset += vDim) {
7252                            __m256d A4 = _mm256_set1_pd(theData[ti]);
7253                            for (long k = 0; k < loopBound; k+=4) {
7254                                __m256d D4, B4;
7255                                DO_GROUP_OP1 (D4, B4, k);
7256                                DO_GROUP_OP2 (D4,k);
7257                            }
7258 
7259                            for (long k = loopBound; k < vDim; k++) {
7260                                dest[row_offset + k] += theData[ti] * theData[col_offset + k];
7261                            }
7262                        }
7263                    }
7264                }
7265            } else {
7266 
7267 #endif
7268 
7269 
7270             hyFloat  * _hprestrict_ column = stash+lDim;
7271             hyFloat const  * source = theData;
7272 
7273             for (long j = 0; j < vDim; j++) {
7274                 for (long c = 0; c < vDim; c++) {
7275                     column[c] = source[j + c * vDim];
7276                 }
7277 
7278 #ifdef _SLKP_USE_AVX_INTRINSICS
7279                 if (vDim == 61UL) {
7280                   for (unsigned long i = 0; i < lDim; i += 61) {
7281                     hyFloat * _hprestrict_ row = theData + i;
7282 
7283 
7284                      __m256d   sum256;
7285 
7286 #ifdef _SLKP_USE_FMA3_INTRINSICS
7287 
7288 
7289                         sum256 = _mm256_fmadd_pd (_mm256_loadu_pd (row), _mm256_loadu_pd (column),
7290                                                  _mm256_fmadd_pd (_mm256_loadu_pd (row+4), _mm256_loadu_pd (column+4),
7291                                          _mm256_mul_pd (_mm256_loadu_pd (row+8), _mm256_loadu_pd (column+8))));
7292 
7293                         for (unsigned long k = 12UL; k < 60UL; k += 12UL) {
7294                             sum256 =  _mm256_fmadd_pd (_mm256_loadu_pd (row+k), _mm256_loadu_pd (column+k),
7295                                                         _mm256_fmadd_pd (_mm256_loadu_pd (row+k+4), _mm256_loadu_pd (column+k+4),
7296                                                         _mm256_fmadd_pd (_mm256_loadu_pd (row+k+8), _mm256_loadu_pd (column+k+8), sum256))
7297                                                        );
7298                         }
7299 #else
7300 
7301                     sum256 = _mm256_setzero_pd();
7302                     for (unsigned long k = 0UL; k < 60UL; k += 12UL) {
7303                       __m256d term0 = _mm256_mul_pd (_mm256_loadu_pd (row+k), _mm256_loadu_pd (column+k));
7304                       __m256d term1 = _mm256_mul_pd (_mm256_loadu_pd (row+k+4), _mm256_loadu_pd (column+k+4));
7305                       __m256d term2 = _mm256_mul_pd (_mm256_loadu_pd (row+k+8), _mm256_loadu_pd (column+k+8));
7306 
7307                       __m256d sum01 = _mm256_add_pd(term0,term1);
7308                       __m256d plus2 = _mm256_add_pd(term2, sum256);
7309 
7310                       sum256 = _mm256_add_pd (sum01, plus2);
7311                     }
7312 #endif
7313 
7314                     stash[i+j] = _avx_sum_4(sum256) + row[60] * column [60];
7315 
7316                   }
7317 
7318                 } else {
7319                   for (unsigned long i = 0; i < lDim; i += vDim) {
7320                       hyFloat * row = theData + i;
7321 
7322                       __m256d   sum256 = _mm256_setzero_pd();
7323 
7324                       long k;
7325 
7326                       for (k = 0; k < loopBound; k += 4) {
7327 #ifdef _SLKP_USE_FMA3_INTRINSICS
7328                           sum256 = _mm256_fmadd_pd (_mm256_loadu_pd (row+k), _mm256_loadu_pd (column+k), sum256);
7329 #else
7330                           sum256 = _mm256_add_pd (_mm256_mul_pd (_mm256_loadu_pd (row+k), _mm256_loadu_pd (column+k)), sum256);
7331 #endif
7332                       }
7333 
7334                       hyFloat result = _avx_sum_4(sum256);
7335 
7336                       for (; k < vDim; k++) {
7337                           result += row[k] * column [k];
7338                       }
7339 
7340                       stash[i+j] = result;
7341 
7342                   }
7343                 }
7344 #ifdef  _SLKP_USE_AVX_INTRINSICS
7345             }
7346 #endif
7347 #else
7348                 for (long i = 0; i < lDim; i += vDim) {
7349                     hyFloat * row    = theData + i,
7350                                  buffer [4] = {0.,0.,0.,0.};
7351 
7352 
7353                     unsigned long        k;
7354 
7355                     for (k = 0UL; k < loopBound; k += 4UL) {
7356                         buffer [0] += row[k] * column [k];
7357                         buffer [1] += row[k+1] * column [k+1];
7358                         buffer [2] += row[k+2] * column [k+2];
7359                         buffer [3] += row[k+3] * column [k+3];
7360                     }
7361 
7362                     for (; k < vDim; k++) {
7363                         buffer[0] += row[k] * column [k];
7364                     }
7365 
7366                     stash[i+j] = (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]);
7367                 }
7368 #endif
7369            }
7370         }
7371 #ifdef  _SLKP_USE_ARM_NEON
7372         }
7373 #endif
7374 
7375         long lDimmod4 = (lDim >> 2) << 2;
7376         hyFloat diffs[4] = {0.0,0.0,0.0,0.0};
7377 
7378         for (long s = 0; s < lDimmod4; s+=4) {
7379             hyFloat d1 = fabs (theData[s  ] - stash[s  ]);
7380             hyFloat d2 = fabs (theData[s+1] - stash[s+1]);
7381             hyFloat d3 = fabs (theData[s+2] - stash[s+2]);
7382             hyFloat d4 = fabs (theData[s+3] - stash[s+3]);
7383             if (d1 > diffs[0]) diffs[0] = d1;
7384             if (d2 > diffs[1]) diffs[1] = d2;
7385             if (d3 > diffs[2]) diffs[2] = d3;
7386             if (d4 > diffs[3]) diffs[3] = d4;
7387         }
7388 
7389         for (long s = lDimmod4; s < lDim; s++) {
7390             hyFloat d1 = fabs (theData[s] - stash[s]);
7391             if (d1 > diffs[0]) diffs[0] = d1;
7392         }
7393 
7394         diff = MAX (MAX (diffs[0], diffs[1]), MAX (diffs[2], diffs[3]));
7395 
7396         memcpy (theData, stash, lDim * sizeof (hyFloat));
7397 
7398         /*for (long s = 0; s < lDim; s++) {
7399             StoreIfGreater(diff, fabs (theData[s] - stash[s]));
7400             theData[s] = stash[s];
7401         }*/
7402     }
7403     return diff;
7404 }
7405 //_____________________________________________________________________________________________
AgreeObjects(_Matrix & m)7406 void        _Matrix::AgreeObjects (_Matrix& m)
7407 {
7408   if (storageType==2) {
7409     if (toPolyOrNot!=0.0) {
7410       ConvertFormulas2Poly ();
7411     } else {
7412       Evaluate(true);
7413     }
7414   }
7415 
7416   if (m.storageType==2) {
7417     if (toPolyOrNot!=0.0) {
7418       m.ConvertFormulas2Poly ();
7419     } else {
7420       m.Evaluate(true);
7421     }
7422   }
7423 
7424   if (storageType!=m.storageType) {
7425     if (toPolyOrNot) {
7426       if (storageType == 1) {
7427         ConvertNumbers2Poly ();
7428       } else {
7429         m.ConvertNumbers2Poly ();
7430       }
7431     } else {
7432       if (storageType == 1) {
7433         m.Evaluate (true);
7434       } else {
7435         Evaluate ();
7436       }
7437     }
7438   }
7439 }
7440 //_____________________________________________________________________________________________
ConvertFormulas2Poly(bool force2numbers)7441 void        _Matrix::ConvertFormulas2Poly (bool force2numbers)
7442 {
7443     bool conversionFlag = true;
7444     _MathObject** tempStorage = (_MathObject**)MatrixMemAllocate(sizeof(void*)*lDim);
7445 
7446     long i;
7447 
7448     for (i=0; i<lDim; i++) {
7449         tempStorage[i]=ZEROPOINTER;
7450     }
7451 
7452 
7453     if (theIndex) { // sparse
7454         for (i=0; i<lDim; i++) {
7455             if (IsNonEmpty(i)) {
7456                 HBLObjectRef polyCell = ((_Formula**)theData)[i]->ConstructPolynomial();
7457                 if (polyCell) { // valid polynomial conversion
7458                     tempStorage[i] = (HBLObjectRef)polyCell;
7459                     polyCell->AddAReference();
7460                 } else {
7461                     conversionFlag = false;
7462                     break;
7463                 }
7464             }
7465         }
7466         if (conversionFlag) {
7467             // check for "*" entries
7468             for (i=0; i<lDim; i++) {
7469                 if (IsNonEmpty(i)) {
7470                     if (((_Formula**)theData)[i]->IsEmpty()) { // "*" entry
7471                         long r = theIndex[i]/vDim, c = theIndex[i]%vDim;
7472                         _Polynomial diag;
7473                         for (long j=0; j<vDim; j++) {
7474                             if (j==c) {
7475                                 continue;
7476                             }
7477                             long h = Hash (r,j);
7478                             if (h>=0) {
7479                                 _Polynomial * temp = (_Polynomial *)diag.Sub(tempStorage[h], nil);
7480                                 diag.Duplicate (temp);
7481                                 DeleteObject (temp);
7482                             }
7483                         }
7484                         DeleteObject(tempStorage[i]);
7485                         tempStorage[i]=(_Polynomial*)diag.makeDynamic();
7486                     }
7487                 }
7488             }
7489         }
7490     } else {
7491         for (long i=0; i<lDim; i++) {
7492             _Formula* f = ((_Formula**)theData)[i];
7493             if (f->IsEmpty()) {
7494                 continue;
7495             }
7496             HBLObjectRef polyCell = f->ConstructPolynomial();
7497             if (polyCell) { // valid polynomial conversion
7498                 tempStorage[i] = (HBLObjectRef)polyCell;
7499                 polyCell->AddAReference();
7500             } else {
7501                 conversionFlag = false;
7502                 break;
7503             }
7504         }
7505         if (conversionFlag) {
7506             // check for "*" entries
7507             for (long i=0; i<lDim; i++) {
7508                 if (((_Formula**)theData)[i]->IsEmpty()) { // "*" entry
7509                     long r = i/vDim;
7510                     _Polynomial diag;
7511                     for (long j=vDim*r; j<vDim*(r+1); j++) {
7512                         if (j==i) {
7513                             continue;
7514                         }
7515                         _Polynomial * temp = (_Polynomial *)diag.Sub(tempStorage[j], nil);
7516                         diag.Duplicate (temp);
7517                         DeleteObject (temp);
7518                     }
7519                     DeleteObject(tempStorage[i]);
7520                     tempStorage[i]=(_Polynomial*)diag.makeDynamic();
7521                 }
7522             }
7523         }
7524     }
7525 
7526     if (conversionFlag) { // successful conversion
7527         ClearFormulae();
7528         MatrixMemFree (theData);
7529         theData = (hyFloat*) tempStorage;
7530         storageType = 0;
7531         if (!theIndex) {
7532             _Polynomial zero_polynomial;
7533             for (i=0; i<lDim; i++)
7534                 if (!GetMatrixObject (i)) {
7535                     StoreObject (i,&zero_polynomial,true);
7536                 }
7537         }
7538     } else {
7539         for (long i=0; i<lDim; i++) {
7540             DeleteObject (tempStorage[i]);
7541         }
7542         MatrixMemFree (tempStorage);
7543         if (force2numbers) {
7544             Evaluate(true);
7545         }
7546     }
7547 
7548 }
7549 
7550 //_____________________________________________________________________________________________
ConvertNumbers2Poly(void)7551 void        _Matrix::ConvertNumbers2Poly (void)
7552 {
7553     _MathObject ** tempStorage = (_MathObject**)MatrixMemAllocate (lDim*sizeof (void*));
7554     if (!theIndex) {
7555         for (long i=0; i<lDim; i++) {
7556             tempStorage[i]=new _Polynomial (theData[i]);
7557         }
7558     } else {
7559         for (long i=0; i<lDim; i++)
7560             if (IsNonEmpty (i)) {
7561                 tempStorage[i]=new _Polynomial (theData[i]);
7562             } else {
7563                 tempStorage[i] = nil;
7564             }
7565     }
7566     MatrixMemFree (theData);
7567     theData = (hyFloat*) tempStorage;
7568     storageType = 0;
7569 }
7570 
7571 
7572 //_____________________________________________________________________________________________
operator +=(_Matrix & m)7573 void        _Matrix::operator += (_Matrix& m)
7574 {
7575     AgreeObjects (m);
7576     if ((!m.theIndex) && theIndex) {
7577         CheckIfSparseEnough(true);
7578     }
7579     AddMatrix (*this,m);
7580 }
7581 
7582 //______________________________________________________________
7583 
CompareRows(const long row1,const long row2)7584 long    _Matrix::CompareRows (const long row1, const long row2) {
7585     for (long column_id = 0; column_id < vDim; column_id ++) {
7586         hyFloat v1 = theData[row1*vDim+column_id],
7587                    v2 = theData[row2*vDim+column_id];
7588         if (!CheckEqual (v1,v2)) {
7589             return (v1 < v2)?-1L:1L;
7590         }
7591     }
7592     return 0L;
7593 }
7594 
7595 //______________________________________________________________
7596 
SwapRows(const long row1,const long row2)7597 void    _Matrix::SwapRows (const long row1, const long row2) {
7598     long idx1 = row1*vDim,
7599          idx2 = row2*vDim;
7600     for (long column_id = 0; column_id < vDim; column_id ++) {
7601         hyFloat t = theData[idx1];
7602         theData[idx1++] = theData[idx2];
7603         theData[idx2++] = t;
7604     }
7605 }
7606 //______________________________________________________________
7607 
RecursiveIndexSort(long from,long to,_SimpleList * index)7608 void    _Matrix::RecursiveIndexSort (long from, long to, _SimpleList* index) {
7609     long            middle          = (from+to) >> 1,
7610                     bottommove      = 1L,
7611                     topmove         = 1L;
7612 
7613     /*
7614         Use '+' to denote an element that is gretae than 'M' (the 'middle' element)
7615         and '-' to denote an element than is less than 'M'
7616 
7617         Initially we may have something like
7618 
7619         --++--+M--+++--++-
7620         and we want to end up with
7621         ---------M+++++++
7622 
7623         Initially, we arrange the elements as
7624 
7625         ----+++M-----++++++, and then swap 'bottommove' pluses (of which there are 3 in this case)
7626                             with 'topmove' minuses (of which there are 5)
7627 
7628      */
7629 
7630 
7631     if (middle)
7632         while (middle-bottommove>=from && CompareRows (middle-bottommove, middle) > 0L) {
7633             bottommove++;
7634         }
7635     if (from<to)
7636         while (middle+topmove<=to && CompareRows (middle+topmove,middle) < 0L) {
7637             topmove++;
7638         }
7639 
7640     for (long i=from; i<middle-bottommove; i++)
7641         if (CompareRows (i, middle) >= 0L) {
7642             SwapRows (middle-bottommove, i);
7643             index->Swap(middle-bottommove,i);
7644             bottommove++;
7645 
7646             while (middle-bottommove>=from && CompareRows (middle-bottommove, middle) > 0L) {
7647                 bottommove++;
7648             }
7649         }
7650 
7651     {
7652         for (long i=middle+topmove+1; i<=to; i++)
7653             if (CompareRows(i,middle) <= 0L) {
7654                 SwapRows   (i, middle+topmove);
7655                 index->Swap(i, middle+topmove);
7656 
7657                 topmove++;
7658                 while (middle+topmove<=to && CompareRows (middle+topmove,middle) < 0L) {
7659                    topmove++;
7660                 }
7661             }
7662     }
7663 
7664     if (topmove==bottommove) {
7665         for (long i=1; i<bottommove; i++) {
7666             SwapRows(middle+i, middle-i);
7667             index->Swap (middle+i, middle-i);
7668         }
7669     } else if (topmove>bottommove) {
7670         long shift = topmove-bottommove;
7671         // in the example above, shift = 2
7672 
7673         for (long i=1; i<bottommove; i++) {
7674              SwapRows (middle-i, middle+i+shift);
7675              index->Swap(middle-i, middle+i+shift);
7676         }
7677         // at the end of this loop, the example above will look like
7678         // -------M--+++++++++, so now if we swap 'M' with the last '-', we'll arrive at the desired configuration
7679 
7680         SwapRows    (middle, middle+shift);
7681         index->Swap (middle, middle+shift);
7682         middle+=shift;
7683 
7684     } else {
7685         long shift = bottommove-topmove;
7686         for (long i=1; i<topmove; i++) {
7687             SwapRows (middle+i, middle-i-shift);
7688             index->Swap (middle+i, middle-i-shift);
7689         }
7690 
7691         SwapRows    (middle, middle-shift);
7692         index->Swap (middle, middle-shift);
7693         middle-=shift;
7694     }
7695 
7696     if (to>middle+1) {
7697         RecursiveIndexSort (middle+1,to, index);
7698     }
7699     if (from<middle-1) {
7700         RecursiveIndexSort (from,middle-1, index);
7701     }
7702 }
7703 
7704 //_____________________________________________________________________________________________
SortMatrixOnColumn(HBLObjectRef mp,HBLObjectRef cache)7705 HBLObjectRef       _Matrix::SortMatrixOnColumn (HBLObjectRef mp, HBLObjectRef cache)
7706 {
7707     if (storageType!=1) {
7708         HandleApplicationError  ("Only numeric matrices can be sorted");
7709         return new _MathObject();
7710     }
7711 
7712     if (theData == nil) {
7713         return new _Matrix (0,0);
7714     }
7715 
7716     _SimpleList sortOn;
7717 
7718     if (mp->ObjectClass () != NUMBER || mp->Value() < 0.0 || mp->Value () > GetVDim()-1) {
7719         bool goodMe = false;
7720         if (mp->ObjectClass () == MATRIX) {
7721             _Matrix * sortOnM = (_Matrix*)((_Matrix*)mp)->ComputeNumeric();
7722             long sortBy      = sortOnM->GetHDim()*sortOnM->GetVDim(),
7723                  maxColumnID = GetVDim();
7724 
7725             for (long k=0; k<sortBy; k=k+1) {
7726                 long idx = (*sortOnM)[k];
7727                 if (idx < 0 || idx >= maxColumnID) {
7728                     HandleApplicationError (_String("Invalid column index to sort on in call to ") & __PRETTY_FUNCTION__ & " : " & idx);
7729                     return new _MathObject();
7730                 }
7731                 sortOn << idx;
7732             }
7733             goodMe = sortOn.lLength;
7734         }
7735         if (!goodMe) {
7736             HandleApplicationError  (_String ("Invalid column index to sort the matrix on:") & _String((_String*)mp->toStr()).Enquote());
7737             return new _MathObject;
7738         }
7739     } else {
7740         sortOn << mp->Value();
7741     }
7742 
7743     // TODO SLKP 20111109 -- replace with a generic sort function
7744                      // the code below is BROKEN
7745 
7746     _SimpleList             idx (hDim,0,1);
7747     _Matrix theColumn   (hDim,sortOn.lLength,false,true);
7748 
7749     for (unsigned long col2Sort = 0; col2Sort < sortOn.lLength; col2Sort++) {
7750         long colIdx = sortOn.list_data[col2Sort];
7751 
7752         if (theIndex)
7753             for (long k=0; k<hDim; k++) {
7754                 theColumn.theData[col2Sort+k*sortOn.lLength] = (*this)(k, colIdx);
7755             }
7756         else
7757             for (long k=0, j = colIdx; k<hDim; k++, j+=vDim) {
7758                 theColumn.theData[col2Sort+k*sortOn.lLength] = theData[j];
7759             }
7760 
7761     }
7762 
7763     theColumn.RecursiveIndexSort (0, hDim-1, &idx);
7764     /*for (long i = 1; i < hDim; i++) {
7765         if (theColumn.theData[i-1] > theColumn.theData[i]) {
7766             HandleApplicationError("Resulting matrix is not properly sorted");
7767         }
7768     }*/
7769 
7770 
7771     _Matrix                 *result     = (_Matrix*)_returnMatrixOrUseCache(hDim, vDim, _NUMERICAL_TYPE, theIndex, cache);
7772 
7773     if (theIndex) {
7774         _SimpleList    revIdx (hDim,0,1);
7775         SortLists (&idx, &revIdx);
7776         for (long r=0; r<lDim; r++) {
7777             long oi = theIndex[r];
7778 
7779             if (oi >= 0) {
7780                 long     v  = oi%vDim,
7781                          h  = oi/vDim,
7782                          ni = revIdx.list_data[h]*vDim+v;
7783 
7784                 (*result)[ni] = theData[r];
7785             }
7786         }
7787     } else
7788         for (long r=0; r<hDim; r++) {
7789             long remapped = idx.list_data[r];
7790             remapped *= vDim;
7791             for (long c=r*vDim; c<r*vDim+vDim; c++, remapped++) {
7792                 result->theData[c] = theData[remapped];
7793             }
7794         }
7795 
7796 
7797     return result;
7798 }
7799 
7800 //_____________________________________________________________________________________________
PoissonLL(HBLObjectRef mp,HBLObjectRef cache)7801 HBLObjectRef       _Matrix::PoissonLL (HBLObjectRef mp, HBLObjectRef cache)
7802 {
7803     if (!is_numeric()) {
7804         HandleApplicationError ("Only numeric matrices can be passed to Poisson Log-Likelihood");
7805         return new _MathObject;
7806     }
7807 
7808     if (mp->ObjectClass () != NUMBER || mp->Value() < 0.0) {
7809         HandleApplicationError  (_String ("Invalid Poisson distribution parameter") & (_String((_String*)mp->toStr())).Enquote());
7810         return new _MathObject;
7811     }
7812 
7813     hyFloat     loglik = 0.0,
7814                    *logFactorials = new hyFloat [101],
7815     lambda        = mp->Value(),
7816     logLambda     = log (lambda),
7817     log2p         = log (sqrt(8.*atan(1.)));
7818 
7819 
7820     logFactorials[0] = 0.;
7821     logFactorials[1] = 0.;
7822 
7823     long           maxFactorialDone = 1;
7824 
7825     for (long idx = 0; idx < lDim; idx++) {
7826         long  cellValue = 0;
7827         if (theIndex) {
7828             cellValue = theIndex[idx];
7829             if (cellValue<0) {
7830                 continue;
7831             }
7832 
7833             cellValue = theData[cellValue];
7834         } else {
7835             cellValue = theData[idx];
7836         }
7837 
7838         if (cellValue>=0) {
7839             if (maxFactorialDone>=cellValue) {
7840                 loglik += logLambda * cellValue - lambda - logFactorials [cellValue];
7841             } else {
7842                 if (cellValue<=100) {
7843                     for (long idx2 = maxFactorialDone+1; idx2 <= cellValue; idx2++) {
7844                         logFactorials[idx2] = logFactorials[idx2-1]+log((hyFloat)idx2);
7845                     }
7846                     loglik += logLambda * cellValue - lambda - logFactorials [cellValue];
7847                     maxFactorialDone = cellValue;
7848                 } else
7849                     // use Stirling's formula
7850                 {
7851                     loglik += logLambda * cellValue - lambda + cellValue - (cellValue+0.5)*log((hyFloat)cellValue)-log2p;
7852                 }
7853             }
7854         }
7855     }
7856 
7857     delete      [] logFactorials;
7858 
7859     return _returnConstantOrUseCache(loglik, cache);
7860 }
7861 
7862 
7863 //_____________________________________________________________________________________________
PathLogLikelihood(HBLObjectRef mp,HBLObjectRef cache)7864 HBLObjectRef       _Matrix::PathLogLikelihood (HBLObjectRef mp, HBLObjectRef cache) {
7865     try {
7866         _Matrix                 *m          = nil;
7867 
7868         if (! is_numeric() || hDim != 3) {
7869             throw (_String("First argument must be a numeric 3xN matrix"));
7870         } else {
7871             //errMsg = "Second argument in call to < (PathLogLikelihood) must be a square matrix";
7872             if (mp->ObjectClass () == MATRIX) {
7873                 m = (_Matrix*)mp->Compute();
7874                 if (m->GetHDim() != m->GetVDim()) {
7875                     throw (_String("Second argument must be a square matrix"));
7876                 }
7877             } else {
7878                 throw (_String("Second argument must be a matrix"));
7879             }
7880         }
7881 
7882 
7883         CheckIfSparseEnough     (true);
7884 
7885         hyFloat              res     = 0.0;
7886         long                    maxDim  = m->GetHDim();
7887 
7888         for (unsigned long step = 0UL; step < vDim; step++) {
7889 
7890             long        i1 = get (0,step),
7891                         i2 = get (1,step);
7892             hyFloat     t  = get (2,step);
7893 
7894             if (i1<0 || i2 < 0 || i1 >= maxDim || i2 >= maxDim || t<0.0) {
7895                 throw (_String ("An invalid transition in step ") & _String ((long)(step+1L)) & " of the chain: " & i1 & " to " & i2 & " in time " & t);
7896             }
7897 
7898             _Matrix         rateMx (*m);
7899             rateMx *= t;
7900             _Matrix   * tMatrix = rateMx.Exponentiate ();
7901             t = tMatrix->theData[maxDim*i1+i2];
7902             DeleteObject (tMatrix);
7903 
7904             if (t>0.0) {
7905                 res += log (t);
7906             } else {
7907                 return _returnConstantOrUseCache(-1.e300, cache);
7908             }
7909         }
7910         return _returnConstantOrUseCache(res, cache);
7911     } catch (const _String& err) {
7912         HandleApplicationError  (err);
7913         return new _MathObject;
7914     }
7915 
7916 }
7917 
7918 //_____________________________________________________________________________________________
pFDR(HBLObjectRef classes,HBLObjectRef cache)7919 HBLObjectRef       _Matrix::pFDR (HBLObjectRef classes, HBLObjectRef cache) {
7920     try {
7921         long            steps     = 20,
7922                         iter_count = 500;
7923 
7924         hyFloat         p_value = 0.0,
7925                         max_lambda = 0.0;
7926 
7927 
7928         if (theIndex) {
7929             CheckIfSparseEnough (true);
7930         }
7931 
7932         if (!is_numeric()) {
7933             throw _String("Only numeric matrices can be passed to && (pFDR)");
7934         } else {
7935             if (!(is_column() || is_row()) || is_empty())   {
7936                 throw _String("The first argument of && (pFDR) must be an Nx1/1xN matrix.");
7937             } else if (classes->ObjectClass () != NUMBER || classes->Value() > 1. || (p_value = classes->Value()) < 0.0) {
7938                 throw _String ("Invalid baseline p-value (must be in (0,1)):") & _String((_String*)classes->toStr());
7939             } else {
7940                 for (unsigned long i=0UL; i<lDim; i++) {
7941                     hyFloat p_count = theData[i];
7942                     if (p_count < 0.0 || p_count > 1.0) {
7943                         throw _String ("Invalid p-value entry in matrix passed to pFDR (must be a positive integer):") & p_count;
7944                     }
7945                     StoreIfGreater(max_lambda, p_count);
7946                 }
7947             }
7948         }
7949 
7950 
7951         _Matrix        lamdbaRange (steps,1,false,true),
7952                        pFDRs       (steps,1,false,true);
7953 
7954         hyFloat     anLamdba           = 0.0,
7955                        minPFDR          = 5.0,
7956                        uberPFDR        = 0.0,
7957                        uberPFDRUpperLimit = 0.0,
7958                        minMSE             = 1.e100,
7959                        aStep            = 1.0/steps;
7960 
7961 
7962         unsigned long k = 0;
7963         while (anLamdba<1.0) {
7964             lamdbaRange.theData[k] = anLamdba;
7965 
7966             if ((pFDRs.theData[k] = computePFDR (anLamdba, p_value))<minPFDR) {
7967                 minPFDR = pFDRs.theData[k];
7968             }
7969 
7970             k++;
7971             anLamdba += aStep;
7972         }
7973 
7974         for (unsigned long k=0UL; k<steps; k++) {
7975             hyFloat mse    = 0.0;
7976             _Matrix    ITpDFR (iter_count,1,false,true);
7977 
7978             for (unsigned long it = 0; it < iter_count; it++) {
7979                 _Matrix         sampledPs (lDim,1,false,true);
7980                 _SimpleList     sample    (lDim,0,1);
7981                 sample.PermuteWithReplacement (1);
7982 
7983                 for (long el = 0; el < lDim; el++) {
7984                     sampledPs.theData[el] = theData[sample.list_data[el]];
7985                 }
7986 
7987                 ITpDFR.theData[it] = sampledPs.computePFDR (lamdbaRange.theData[k], p_value);
7988                 mse += (ITpDFR.theData[it]-minPFDR)*(ITpDFR.theData[it]-minPFDR);
7989             }
7990 
7991             mse /= iter_count;
7992 
7993             if (mse < minMSE) {
7994                 minMSE = mse;
7995                 uberPFDR = pFDRs.theData[k];
7996                 _Constant  zer (0.0);
7997                 _Matrix* sorted = (_Matrix*)ITpDFR.SortMatrixOnColumn (&zer, nil);
7998                 uberPFDRUpperLimit = sorted->theData[((long)(0.95*iter_count))];
7999                 DeleteObject (sorted);
8000             }
8001         }
8002 
8003         _Matrix * resMx = (_Matrix *) _returnMatrixOrUseCache (2,1,_NUMERICAL_TYPE,false, cache);
8004         resMx->theData[0] = uberPFDR;
8005         resMx->theData[1] = uberPFDRUpperLimit;
8006         return resMx;
8007     } catch (const _String& err) {
8008         HandleApplicationError  (err);
8009         return new _MathObject;
8010     }
8011 }
8012 
8013 //_____________________________________________________________________________________________
computePFDR(hyFloat lambda,hyFloat gamma)8014 hyFloat      _Matrix::computePFDR (hyFloat lambda, hyFloat gamma)
8015 // assumes a non-sparse row/column matrix
8016 {
8017     long        rejected    = 0,
8018                 null         = 0;
8019 
8020     for (long idx = 0; idx < lDim; idx++) {
8021         if (theData[idx] <= gamma) {
8022             rejected++;
8023         }
8024         if (theData[idx] > lambda) {
8025             null++;
8026         }
8027     }
8028 
8029     if (null) {
8030         hyFloat pi_0 = null/(lDim*(1.-lambda)),
8031                    pr_p = 0;
8032 
8033         if (rejected) {
8034             pr_p = rejected/(hyFloat)lDim;
8035         } else {
8036             pr_p = 1./(hyFloat)lDim;
8037         }
8038 
8039         return pi_0 * gamma / (pr_p /** (1.-exp(log(1.-gamma)*lDim))*/);
8040 
8041     } else {
8042         return 1;
8043     }
8044 }
8045 
8046 //_____________________________________________________________________________________________
8047 
Random(HBLObjectRef kind,HBLObjectRef cache)8048 HBLObjectRef _Matrix::Random (HBLObjectRef kind, HBLObjectRef cache) {
8049 
8050     try {
8051         long columns = GetVDim(),
8052              rows    = GetHDim();
8053 
8054         if (kind->ObjectClass() == NUMBER) {
8055             bool    resample = (kind->Compute()->Value()>0);
8056 
8057             _SimpleList     remapped (columns,0,1);
8058 
8059             if (resample) {
8060                 remapped.PermuteWithReplacement(1);
8061             } else {
8062                 remapped.Permute(1);
8063             }
8064 
8065 
8066             if (is_numeric()) {   // numeric matrix
8067                 _Matrix * res = (_Matrix *)_returnMatrixOrUseCache(rows, columns,_NUMERICAL_TYPE, theIndex != nil,cache);
8068 
8069                 if (is_dense())
8070                     for (unsigned long vv = 0; vv<lDim; vv+=columns)
8071                         for (unsigned long k2=0; k2<remapped.lLength; k2++) {
8072                             res->theData[vv+k2] = theData[vv+remapped.list_data[k2]];
8073                         }
8074                 else {
8075                     for (unsigned long vv = 0; vv< rows; vv++)
8076                         for (unsigned long k=0; k<remapped.lLength; k++) {
8077                             long ki = remapped.list_data[k];
8078                             if ((ki = Hash (vv,ki)) >= 0) {
8079                                 res->Store (vv,k,theData[ki]);
8080                             }
8081                         }
8082                 }
8083                 return res;
8084             } else {            // formula matrix
8085                 if (is_expression_based()) {
8086                     _Matrix * res = new _Matrix (rows, columns,theIndex != nil,false);
8087 
8088                     for (unsigned long vv = 0UL; vv< rows; vv++)
8089                         for (unsigned long k=0UL; k<remapped.lLength; k++) {
8090                             _Formula * ff = GetFormula (vv,remapped.get (k));
8091                             if (ff) {
8092                                 res->StoreFormula (vv, k, *ff);
8093                             }
8094                         }
8095                     return res;
8096                 }
8097             }
8098         }
8099 
8100         else if (kind->ObjectClass() == ASSOCIATIVE_LIST) {
8101             //ReportWarning (_String("_Matrix::Random() with associative list as first argument."));
8102 
8103             // Associative list should contain following arguments:
8104             //  "PDF" - string corresponding to p.d.f. ("Gamma", "Normal")
8105             //  "ARG0" ... "ARGn" - whatever parameter arguments (matrices) are required for the p.d.f.
8106 
8107             _AssociativeList    * pdfArgs   = (_AssociativeList *)kind;
8108             _List               * keys      = pdfArgs->GetKeys();
8109             _String             pdfkey      ("PDF"),
8110                                 * arg0      = (_String *)pdfArgs->GetByKey(pdfkey,STRING);
8111             DeleteObject (keys);
8112 
8113 
8114             if (arg0) {
8115                 _String     pdf ((_String*)arg0->toStr()),
8116                             arg ("ARG0");
8117 
8118                 long        pdfCode = _HY_MatrixRandomValidPDFs.GetValueFromString (pdf);
8119 
8120                  switch (pdfCode) {
8121                     case _HY_MATRIX_RANDOM_DIRICHLET:
8122                         return (_Matrix *) DirichletDeviate();
8123                     case _HY_MATRIX_RANDOM_GAUSSIAN:
8124                         return (_Matrix *) GaussianDeviate (*(_Matrix *) pdfArgs->GetByKey (arg, MATRIX));
8125                     case _HY_MATRIX_RANDOM_WISHART:
8126                         return (_Matrix *) WishartDeviate (*(_Matrix *) pdfArgs->GetByKey (arg, MATRIX));
8127                     case _HY_MATRIX_RANDOM_INVERSE_WISHART:
8128                         return (_Matrix *) InverseWishartDeviate (*(_Matrix *) pdfArgs->GetByKey (arg, MATRIX));
8129                     case _HY_MATRIX_RANDOM_MULTINOMIAL:
8130                         return (_Matrix *) MultinomialSample ((_Constant *) pdfArgs->GetByKey (arg, NUMBER));
8131                     default:
8132                         throw _String("String argument passed to Random not a supported PDF: ") & pdf.Enquote();
8133                 }
8134             } else {
8135                 throw _String("Expecting 'PDF' key in associative list argument passed to Random(), received: ") & *arg0;
8136             }
8137 
8138         } else if (kind->ObjectClass () == STRING) {
8139             _String key = ((_FString*)kind->Compute())->get_str();
8140             if (key == _String("LHS")) {
8141                 // latin hypercube sampling: samples are in ROWS
8142                 _Matrix * lhc = new _Matrix ( rows, columns, false, true);
8143 
8144                 _SimpleList permutation ( rows ,0,1);
8145 
8146                 for (unsigned long c = 0; c < columns; c++) {
8147                     permutation.Permute (1);
8148                     for (long r = 0; r < rows ; r++) {
8149                         lhc->set(r,c) = get (permutation.get(r),c);
8150                     }
8151                 }
8152 
8153                 return lhc;
8154             }
8155             throw _String ("Invalid string argument passed to matrix Random :") & key;
8156         } else {
8157             throw _String ("Invalid argument passes to matrix Random (should be a number, an associative list or a string):") & _String((_String*)kind->toStr());
8158         }
8159     } catch (_String const& err) {
8160         HandleApplicationError (err);
8161     }
8162     return new _Matrix (1,1);
8163 }
8164 
8165 //_____________________________________________________________________________________________
K_Means(HBLObjectRef classes,HBLObjectRef cache)8166 HBLObjectRef       _Matrix::K_Means (HBLObjectRef classes, HBLObjectRef cache) {
8167     // K-means clustering on scalar data
8168     /*
8169 
8170      this    : Nx2 matrix {{value1, count1}{value2, count 2}...}}
8171      classes : 2x1 matrix {{cluster count}{number of random restarts}}
8172 
8173      reutn   : 2 x Max (cluster count, 2)  {{cluster mean 1, cluster mean 2, ... , cluster mean N}{total L^2 error, how many restarts hit the min}}
8174 
8175      */
8176 
8177     // TODO: 20171026 SLKP revised. check correctness
8178 
8179      try {
8180         _Matrix     *   arg;
8181         long            cluster_count,
8182                         iter_count,
8183                         sample_count = 0L;
8184 
8185         if (theIndex) {
8186             CheckIfSparseEnough (true);
8187         }
8188 
8189         if (!is_numeric()) {
8190             throw _String("Only numeric matrices can be passed to <= (K-means)");
8191         } else {
8192             if (GetVDim () != 2) {
8193                 throw _String("The first argument of <= (K-means) must be an Nx2 matrix, with samples in the first column, and counts in the 2nd.");
8194             } else if (classes->ObjectClass () != MATRIX) {
8195                 throw _String ("Invalid number of clusters is call to K-means (must be >=1):") & _String((_String*)classes->toStr());
8196             } else {
8197                 arg = (_Matrix*)classes->Compute();
8198                 if (!arg->check_dimension (1,2) || (cluster_count=arg->theData[0]) < 1 || (iter_count = arg->theData[1]) < 1) {
8199                     throw _String ("Invalid second argument is call to K-means (must be a 2x1 matrix of positive integers specifying cluster_count and maximum number of random restarts. Had ") & _String((_String*)classes->toStr()).Enquote();
8200                 } else {
8201                     for (unsigned long i=1UL; i<lDim; i+=2UL) {
8202                         long pCount = theData[i];
8203                         if (pCount <= 0L) {
8204                              throw _String ("Invalid count entry in matrix passed to K-means (must be a positive integer):") & pCount;
8205                         }
8206                         sample_count += pCount;
8207                     }
8208                 }
8209             }
8210         }
8211 
8212         if (cluster_count > sample_count) {
8213             throw _String ("More clusters requested than available data points");
8214         }
8215 
8216         _Matrix * res = (_Matrix*)_returnMatrixOrUseCache(2, cluster_count, _NUMERICAL_TYPE, false, cache);
8217 
8218         if (cluster_count == 1L) {
8219             hyFloat sampleMean    = 0.,
8220                     errorEstimate = 0.;
8221 
8222             for (unsigned long c1=0UL, c2=1UL; c1 < 2*hDim; c1+=2UL, c2+=2UL) {
8223                 sampleMean +=  theData[c1] * theData[c2];
8224             }
8225 
8226             sampleMean /= sample_count;
8227 
8228             for (unsigned long c1=0UL, c2=1UL; c1 < 2*hDim; c1+=2UL, c2+=2UL) {
8229                 hyFloat locErr = theData[c1] - sampleMean;
8230                 errorEstimate += locErr*locErr*theData[c2];
8231             }
8232 
8233             res->theData[0] = sampleMean;
8234             res->theData[1] = errorEstimate;
8235         } else {
8236 
8237             hyFloat  minError    = 1.e100;
8238 
8239 
8240             _SimpleList full_copy_list    ((unsigned long)sample_count,0,0);
8241 
8242             for (unsigned long c1 = 0UL, overall = 0UL; c1 < hDim; c1++) {
8243                 unsigned long copies = get (c1, 1);
8244 
8245                 for (unsigned long c2 = 0; c2 < copies; c2++, overall++) {
8246                     full_copy_list.list_data[overall] = c1;
8247                 }
8248             }
8249 
8250 
8251             long        hit_min_error;
8252             _Matrix     best_cluster_means;
8253 
8254             for (unsigned long sampleCount = 0UL; sampleCount < iter_count; sampleCount ++) {
8255                 // choose N random cluster centers to start with
8256                 _SimpleList       chosen_means        = full_copy_list.Sample(cluster_count),
8257                                   cluster_assignments (hDim, 0, 0);
8258 
8259                 _Matrix           cluster_means        (cluster_count,2,false,true);
8260 
8261 
8262 
8263                 for (long cc = 0; cc < cluster_count; cc = cc+1) {
8264                     cluster_means.set (cc, 0) = get(chosen_means.get(cc), 0);
8265                 }
8266 
8267                 hyFloat            last_error_estimate = 1.e100,
8268                                    error_estimate      = 0.;
8269 
8270 
8271                 for (unsigned long cIters = 0UL; cIters < hDim * 5; cIters ++) {
8272 
8273                         bool moved = false;
8274                         // assign each point to the nearest cluster centroid
8275                         for (unsigned long data_point = 0UL; data_point < hDim; data_point++) {
8276 
8277                             unsigned long best_cluster = cluster_assignments.get (data_point);
8278 
8279                             hyFloat         this_value              = get  (data_point, 0),
8280                                             current_min_distance    = fabs (this_value-cluster_means.get (cluster_assignments.get (data_point),0));
8281 
8282                             for (unsigned long cluster_id = 0UL; cluster_id < cluster_count; cluster_id++) {
8283                                 if (StoreIfLess (current_min_distance, fabs (this_value-cluster_means.get (cluster_id,0)))) {
8284                                     best_cluster = cluster_id;
8285                                 }
8286                             }
8287 
8288                             if (best_cluster != cluster_assignments.get (data_point)) {
8289                                 moved = true;
8290                                 cluster_assignments[data_point] = best_cluster;
8291                             }
8292                         }
8293 
8294                         if (moved) {
8295                             for (long cc = 0; cc < cluster_count; cc = cc+1) {
8296                                 cluster_means.set (cc,0) = 0.;
8297                                 cluster_means.set (cc,1) = 0.;
8298                             }
8299                             for (unsigned long data_point = 0UL; data_point < hDim; data_point++) {
8300                                 cluster_means.set (cluster_assignments.get (data_point), 0) += get (data_point,0);
8301                                 cluster_means.set (cluster_assignments.get (data_point), 1) += get (data_point,1);
8302                            }
8303                            for (long cc = 0; cc < cluster_count; cc = cc+1) {
8304                                if (cluster_means.get (cc, 1) != .0) {
8305                                    cluster_means.set (cc,0) /= cluster_means.get (cc,1);
8306                                }
8307                            }
8308                         } else {
8309                             break;
8310                         }
8311 
8312 
8313                 }
8314 
8315                 if (minError == 0.0 || fabs((minError-error_estimate)/minError) < 0.001) {
8316                     hit_min_error ++;
8317                 } else if (error_estimate < minError) {
8318                     hit_min_error = 1;
8319                     minError    = error_estimate;
8320                     best_cluster_means = cluster_means;
8321                 }
8322             }
8323 
8324               for (long k2 = 0; k2 < cluster_count; k2++) {
8325                 res->theData[k2] = best_cluster_means.get (k2, 0);
8326             }
8327 
8328             res->theData[cluster_count]   = minError;
8329             res->theData[cluster_count+1] = hit_min_error;
8330             return res;
8331         }
8332     } catch (_String const& err) {
8333         HandleApplicationError (err);
8334     }
8335 
8336    return new _Matrix;
8337 }
8338 
8339 
8340 //_____________________________________________________________________________________________
PopulateConstantMatrix(hyFloat v)8341 void            _Matrix::PopulateConstantMatrix (hyFloat v) {
8342     if (is_numeric()) {
8343         InitializeArray(theData, lDim, (hyFloat&&)v);
8344     }
8345 }
8346 
8347 //_____________________________________________________________________________________________
AddObj(HBLObjectRef mp,HBLObjectRef cache)8348 HBLObjectRef       _Matrix::AddObj (HBLObjectRef mp, HBLObjectRef cache)
8349 {
8350     if (_Matrix::ObjectClass()!=mp->ObjectClass()) {
8351         if (mp->ObjectClass () == STRING) {
8352             _FormulaParsingContext def;
8353             _Matrix * convMatrix = new _Matrix (((_FString*)mp)->get_str(), false, def),
8354             * res;
8355             res = (_Matrix*)AddObj (convMatrix, cache);
8356             DeleteObject (convMatrix);
8357             return res;
8358         }
8359         if (mp->ObjectClass () == NUMBER) {
8360             _Matrix* aNum = (_Matrix*)ComputeNumeric ();
8361 
8362             hyFloat pValue = mp->Value();
8363 
8364             if (aNum->is_numeric()) {
8365                 return ApplyScalarOperation ([=] (hyFloat h) -> hyFloat {return h + pValue;}, cache);
8366             }
8367         }
8368 
8369         HandleApplicationError ( kErrorStringIncompatibleOperands );
8370         return new _Matrix (1,1);
8371     }
8372 
8373     _Matrix * m = (_Matrix*)mp;
8374     AgreeObjects (*m);
8375     _Matrix * result = (_Matrix *)_returnMatrixOrUseCache( hDim, vDim, storageType, theIndex && m->theIndex, cache);
8376     AddMatrix (*result,*m);
8377     return result;
8378 }
8379 
8380 //_____________________________________________________________________________________________
operator -=(_Matrix & m)8381 void        _Matrix::operator -= (_Matrix& m)
8382 {
8383     AgreeObjects (m);
8384     if ((!m.theIndex)&&theIndex) {
8385         CheckIfSparseEnough(true);
8386     }
8387     Subtract (*this,m);
8388 }
8389 
8390 //_____________________________________________________________________________________________
NonZeroEntries(_SimpleList & target)8391 void       _Matrix::NonZeroEntries (_SimpleList& target) {
8392     if (theIndex && storageType == 1) {
8393         target.Clear();
8394         target.RequestSpace(lDim);
8395         for (long elementID = 0; elementID < lDim; elementID ++) {
8396             if (theIndex[elementID] >= 0) {
8397                 target << theIndex[elementID];
8398             }
8399         }
8400         target.Sort();
8401     }
8402 }
8403 
8404 //_____________________________________________________________________________________________
Equal(HBLObjectRef mp)8405 bool       _Matrix::Equal(HBLObjectRef mp)
8406 {
8407     if (mp->ObjectClass()!=ObjectClass()) {
8408         return false;
8409     }
8410 
8411     _Matrix * m = (_Matrix*)mp;
8412 
8413     if (m->storageType == storageType && m->hDim == hDim && m->vDim == vDim) {
8414         if (is_numeric()) {
8415             if (theIndex || m->theIndex) {
8416                 for (long r = 0L; r < hDim; r ++) {
8417                     for (long c = 0L; c < vDim; c++) {
8418                         if (!CheckEqual((*this)(r,c), (*m)(r,c))) {
8419                             return false;
8420                         }
8421                     }
8422                 }
8423 
8424             } else {
8425                 for (long elementID = 0; elementID < lDim; elementID ++) {
8426                     if (!CheckEqual(theData[elementID], m->theData[elementID])) {
8427                         return false;
8428                     }
8429                 }
8430             }
8431 
8432             return true;
8433         } else {
8434             if (IsAStringMatrix() && m->IsAStringMatrix()) {
8435                 for (long r = 0L; r < hDim; r ++) {
8436                     for (long c = 0L; c < vDim; c++) {
8437                         _Formula * f1 = GetFormula(r,c),
8438                                  * f2 = GetFormula(r,c);
8439 
8440                         if (f1 && f2) {
8441                             if (((_FString*)f1->Compute())->get_str() != ((_FString*)f2->Compute())->get_str()) {
8442                                 return false;
8443                             }
8444                         } else {
8445                             if (f1 || f2) {
8446                                 return false;
8447                             }
8448                         }
8449                     }
8450                 }
8451                 return true;
8452             }
8453         }
8454     }
8455 
8456     return false;
8457 }
8458 
8459 
8460 //_____________________________________________________________________________________________
SubObj(HBLObjectRef mp,HBLObjectRef cache)8461 HBLObjectRef       _Matrix::SubObj (HBLObjectRef mp, HBLObjectRef cache)
8462 {
8463     if (mp->ObjectClass()!=ObjectClass()) {
8464         HandleApplicationError ( kErrorStringIncompatibleOperands );
8465         return new _Matrix (1,1);
8466     }
8467 
8468     _Matrix * m = (_Matrix*)mp;
8469     AgreeObjects (*m);
8470     _Matrix * result = (_Matrix*) _returnMatrixOrUseCache(hDim, vDim, storageType,theIndex && m->theIndex, cache);
8471     Subtract (*result,*m);
8472     return result;
8473 }
8474 
8475 //_____________________________________________________________________________________________
operator *=(hyFloat c)8476 void        _Matrix::operator *= (hyFloat c) {
8477     Multiply (*this,c);
8478 }
8479 
8480 //_____________________________________________________________________________________________
operator *(hyFloat c)8481 _Matrix     _Matrix::operator * (hyFloat c) {
8482     _Matrix result (*this);
8483     Multiply (result,c);
8484     return result;
8485 }
8486 
8487 //_____________________________________________________________________________________________
operator *=(_Matrix & m)8488 void        _Matrix::operator *= (_Matrix& m) {
8489     if (CheckDimensions     (m)) {
8490         AgreeObjects        (m);
8491         _Matrix   result    (hDim, m.vDim, false, storageType);
8492         Multiply            (result,m);
8493         //if ((theIndex!=nil)||(m.theIndex!=nil)) result.AmISparse();
8494         if (theIndex!=nil && m.theIndex!=nil) {
8495             result.AmISparse();
8496         }
8497         Swap                (result);
8498     }
8499 }
8500 
8501 //long count_sparse_successes = 0L;
8502 //long total_multbys = 0L;
8503 
8504 //_____________________________________________________________________________________________
MultbyS(_Matrix & m,bool leftMultiply,_Matrix * externalStorage,hyFloat * stash)8505 void        _Matrix::MultbyS (_Matrix& m, bool leftMultiply, _Matrix* externalStorage, hyFloat* stash) {
8506     _Matrix * result = nil;
8507     if (!externalStorage) {
8508         result = new _Matrix (hDim, m.vDim, false, storageType);
8509     }
8510 
8511     _Matrix * receptacle = (externalStorage?externalStorage:result);
8512 
8513     if (leftMultiply) {
8514         m.Multiply (*receptacle,*this);
8515     } else {
8516         Multiply   (*receptacle,m);
8517     }
8518 
8519     if (theIndex&&m.theIndex) {
8520         //total_multbys++;
8521         // 20200928: speculatively compress the matrix; convert back to dense if storage is too large
8522         /*CompressSparseMatrix(false,stash);
8523         if (lDim >= hDim * vDim *_Matrix::switchThreshold/100) {
8524             Swap            (*receptacle);
8525         }*/
8526 
8527         if (receptacle->AmISparseFast(*this) == false) {
8528             Swap            (*receptacle);
8529         } /*else {
8530             CompressSparseMatrix(false,stash);
8531         }*/
8532 
8533         //if (total_multbys % 100) {
8534         //    printf ("%ld / %ld\n", count_sparse_successes, total_multbys);
8535         //}
8536     } else { // both dense
8537         Swap            (*receptacle);
8538     }
8539 
8540 
8541     if (!externalStorage) {
8542         DeleteObject (result);
8543     } else {
8544         if (!externalStorage->CheckIfSparseEnough (true, false)) { // no conversion took place; reset memory
8545             memset (externalStorage->theData, 0, sizeof (hyFloat)*externalStorage->lDim);
8546         }
8547         //for (long s = 0; s < externalStorage->lDim; s++) externalStorage->theData[s] = 0.0;
8548     }
8549 }
8550 
8551 //_____________________________________________________________________________________________
MultObj(HBLObjectRef mp,HBLObjectRef cache)8552 HBLObjectRef       _Matrix::MultObj (HBLObjectRef mp, HBLObjectRef cache) {
8553 
8554   if (mp->ObjectClass()!=ObjectClass()) {
8555     if (mp->ObjectClass()!=NUMBER) {
8556       HandleApplicationError ( kErrorStringIncompatibleOperands );
8557       return new _Matrix (1,1);
8558     } else {
8559       hyFloat theV = mp->Value();
8560       return (HBLObjectRef)((*this)*theV).makeDynamic();
8561     }
8562   }
8563 
8564   _Matrix*        m = (_Matrix*)mp;
8565   if (!CheckDimensions (*m)) return new _MathObject;
8566   AgreeObjects    (*m);
8567 
8568   _Matrix*      result = (_Matrix*) _returnMatrixOrUseCache(hDim, m->vDim, storageType, false, cache);
8569   Multiply      (*result,*m);
8570   return        result;
8571 
8572 }
8573 
8574 //_____________________________________________________________________________________________
MultElements(HBLObjectRef mp,bool elementWiseDivide,HBLObjectRef cache)8575 HBLObjectRef       _Matrix::MultElements (HBLObjectRef mp, bool elementWiseDivide, HBLObjectRef cache) {
8576 
8577     if (mp->ObjectClass()!=ObjectClass()) {
8578         HandleApplicationError ( kErrorStringIncompatibleOperands );
8579         return new _Matrix (1,1);
8580     }
8581 
8582     _Matrix* m = (_Matrix*)mp;
8583 
8584     bool by_column = false;
8585     // if the second argument has dimension 1xcolumns of the first matrix, then
8586     // result [i][j] is assigned this [i][j] * / argument [0][j]
8587     // in other words, divide or multiply each column
8588 
8589     bool by_row    = false;
8590     // if the first argument has dimension rows of the second matrix x 1 then
8591     // result [i][j] is assigned argument [i][j] * / this [i][0]
8592     // in other words, divide or multiply each row
8593 
8594 
8595     if ( GetHDim()!=m->GetHDim()  || GetVDim()!=m->GetVDim()) {
8596         if (GetVDim() == m->GetVDim() && m->GetHDim () == 1) {
8597             by_column = true;
8598         } else {
8599             if (GetHDim() == m->GetHDim() && GetVDim () == 1) {
8600                 by_row = true;
8601             } else {
8602                 HandleApplicationError ("Element-wise multiplication/division requires matrixes of the same dimension, or (NxM) $ (1xM) or (Nx1) $ (NxM) matrices ");
8603                 return new _Matrix (1,1);
8604             }
8605         }
8606     }
8607 
8608     if (! is_numeric() || ! m->is_numeric() ) {
8609         HandleApplicationError ("Element-wise multiplication/division only works on numeric matrices");
8610         return new _Matrix (1,1);
8611     }
8612 
8613     _Matrix*      result = (_Matrix*) _returnMatrixOrUseCache(GetHDim(), m->GetVDim(), _NUMERICAL_TYPE, false, cache);
8614 
8615     if (theIndex || m->theIndex) {
8616         auto operation = elementWiseDivide ? DivNumbers : MultNumbers;
8617 
8618         long index = 0L;
8619         if (by_row) {
8620             for (long row = 0; row < hDim; row++) {
8621                 for (long column = 0; column < m->vDim; column++, index++) {
8622                     result->theData[index] = operation ( (*this)(row,0), (*m)(row,column));
8623                 }
8624             }
8625         } else {
8626             if (by_column) {
8627                 for (long row = 0; row < hDim; row++) {
8628                     for (long column = 0; column < m->vDim; column++, index++) {
8629                         result->theData[index] = operation ( (*this)(row,column), (*m)(0,column));
8630                     }
8631                 }
8632             }
8633             else {
8634                 for (long row = 0; row < hDim; row++) {
8635                     for (long column = 0; column < m->vDim; column++, index++) {
8636                         result->theData[index] = operation ( (*this)(row,column), (*m)(row,column));
8637                     }
8638                 }
8639             }
8640         }
8641     } else {
8642         if (elementWiseDivide) {
8643             long index = 0L;
8644             if (by_row) {
8645                 for (long row = 0; row < hDim; row++) {
8646                     for (long column = 0; column < m->vDim; column++, index++) {
8647                         result->theData[index] = theData[row] / m->theData [index];
8648                     }
8649                 }
8650             } else {
8651                 if (by_column) {
8652                     for (long row = 0; row < hDim; row++) {
8653                         for (long column = 0; column < m->vDim; column++, index++) {
8654                             result->theData[index] = theData[index] / m->theData [column];
8655                         }
8656                     }
8657                 }
8658                 else {
8659                     for (long row = 0; row < hDim; row++) {
8660                         for (long column = 0; column < m->vDim; column++, index++) {
8661                             result->theData[index] = theData[index] / m->theData [index];
8662                         }
8663                     }
8664                 }
8665             }
8666         } else {
8667             long index = 0L;
8668             if (by_row) {
8669                 for (long row = 0; row < hDim; row++) {
8670                     for (long column = 0; column < m->vDim; column++, index++) {
8671                         result->theData[index] = theData[row] * m->theData [index];
8672                     }
8673                 }
8674             } else {
8675                 if (by_column) {
8676                     for (long row = 0; row < hDim; row++) {
8677                         for (long column = 0; column < m->vDim; column++, index++) {
8678                             result->theData[index] = theData[index] * m->theData [column];
8679                         }
8680                     }
8681                 }
8682                 else {
8683                     for (long row = 0; row < hDim; row++) {
8684                         for (long column = 0; column < m->vDim; column++, index++) {
8685                             result->theData[index] =theData[index] * m->theData [index];
8686                         }
8687                     }
8688                 }
8689             }
8690         }
8691     }
8692 
8693     if (theIndex||m->theIndex) {
8694         result->AmISparse();
8695     }
8696 
8697     return  result;
8698 }
8699 
8700 //_____________________________________________________________________________________________
CheckDimensions(_Matrix & secondArg) const8701 bool    _Matrix::CheckDimensions (_Matrix& secondArg) const {
8702 // check matrix dimensions to ensure that they are multipliable
8703     if (vDim!=secondArg.hDim) {
8704         if (hDim == 1 && secondArg.hDim==1 && vDim == secondArg.vDim) { // handle scalar product separately
8705             secondArg.Transpose();
8706         } else {
8707             char str[255];
8708             snprintf (str, sizeof(str),"Incompatible matrix dimensions in call to CheckDimension: %ldx%ld and %ldx%ld\n",hDim,vDim,secondArg.hDim,secondArg.vDim);
8709             HandleApplicationError (str);
8710             return false;
8711         }
8712     }
8713     return true;
8714 }
8715 
8716 //_____________________________________________________________________________________________
operator *(_Matrix & m)8717 _Matrix     _Matrix::operator * (_Matrix& m)
8718 {
8719     if (!CheckDimensions (m)) {
8720         _Matrix d;
8721         return d;
8722     }
8723 
8724     AgreeObjects (m);
8725     _Matrix result (hDim, m.vDim, false, storageType);
8726     Multiply (result,m);
8727     if ((theIndex!=nil)||(m.theIndex!=nil)) {
8728         result.AmISparse();
8729     }
8730     return result;
8731 
8732 }
8733 //_____________________________________________________________________________________________
operator +(_Matrix & m)8734 _Matrix     _Matrix::operator + (_Matrix& m)
8735 {
8736     AgreeObjects (m);
8737     _Matrix result (hDim, vDim, bool((theIndex!=nil)&&(m.theIndex!=nil)), storageType);
8738     AddMatrix (result,m);
8739     return result;
8740 
8741 }
8742 //_____________________________________________________________________________________________
operator -(_Matrix & m)8743 _Matrix     _Matrix::operator - (_Matrix& m)
8744 {
8745     AgreeObjects (m);
8746     _Matrix result (hDim, vDim, bool((theIndex!=nil)&&(m.theIndex!=nil)), storageType);
8747     Subtract (result,m);
8748     return result;
8749 }
8750 
8751 //_________________________________________________________
internal_to_str(_StringBuffer * string,FILE * file,unsigned long padding)8752 void    _Matrix::internal_to_str (_StringBuffer* string, FILE * file, unsigned long padding) {
8753 
8754     StringFileWrapper res (string, file);
8755    _String padder (" ", padding);
8756 
8757     static const _String kUseJSONForMatrix ("USE_JSON_FOR_MATRIX");
8758 
8759     bool is_numeric_mx = is_numeric ();
8760     bool directly_printable  = is_numeric_mx || IsAStringMatrix ();
8761 
8762     if (directly_printable) {
8763 
8764         long digs         = -1L;
8765 
8766         bool doJSON = hy_env::EnvVariableTrue(kUseJSONForMatrix);
8767 
8768         char openBracket  = doJSON ? '[' : '{',
8769              closeBracket = doJSON ? ']' : '}';
8770 
8771         if (is_numeric_mx) {
8772             digs = MIN (print_digit_specification = hy_env::EnvVariableGetDefaultNumber(hy_env::print_float_digits), 15);
8773         }
8774         res << padder << openBracket << kStringFileWrapperNewLine;
8775 
8776         if (is_numeric_mx) {
8777 
8778             _String formatStr = _String("%") &_String(digs+6)&'.'&_String(digs)&'g';
8779 
8780              char  number_buffer [256];
8781 
8782             for (long i = 0L; i<hDim; i++) {
8783                 if (i) {
8784                     res << padder;
8785                 }
8786                 res << openBracket;
8787 
8788                 for (long j = 0L; j<vDim; j++) {
8789                     if (j) {
8790                         res << ", ";
8791                     }
8792                     parameterToCharBuffer ((*this)(i,j), number_buffer, 255, doJSON);
8793                     res << number_buffer;
8794                 }
8795                 res << closeBracket << (doJSON && i != hDim -1 ? ',' : ' ') << kStringFileWrapperNewLine;
8796              }
8797         } else {
8798             for (long i = 0L; i<hDim; i++) {
8799                 if (i) {
8800                     res << padder;
8801                 }
8802                 res << openBracket;
8803 
8804                 for (long j = 0L; j<vDim; j++) {
8805                     if (j) {
8806                         res << ", ";
8807                     }
8808                     res << '"';
8809 
8810                     _Formula * f = GetFormula (i,j);
8811                     if (f) {
8812                         HBLObjectRef fv = f->Compute();
8813                         if (fv) {
8814                           res << _String ((_String*)fv->toStr());
8815                           //;((_FString*)fv)->get_str();
8816                         }
8817                     }
8818                     res << '"';
8819 
8820                 }
8821                 res << closeBracket << (doJSON && i != hDim -1  ? ',' : ' ') << kStringFileWrapperNewLine;
8822             }
8823         }
8824         res << padder << closeBracket;
8825     } else if (storageType==_POLYNOMIAL_TYPE) {
8826         ANALYTIC_COMPUTATION_FLAG  = hy_env::EnvVariableTrue (ANAL_COMP_FLAG);
8827         if (!ANALYTIC_COMPUTATION_FLAG) {
8828             ((_Matrix*)Compute())->internal_to_str (string, file, padding);
8829             return;
8830         }
8831         for (long i = 0; i<hDim; i++) {
8832             res << "\n[";
8833             for (long j = 0; j<vDim; j++) {
8834                 long p = Hash (i,j);
8835                 if (j) {
8836                     res << ",";
8837                 }
8838                 if (p>=0) {
8839                     _String *sp = (_String*) GetMatrixObject (p)->toStr();
8840                      res << *sp;
8841                      DeleteObject (sp);
8842                 } else {
8843                     res << "0.";
8844                 }
8845             }
8846             res << "]\n";
8847         }
8848     } else {
8849         _Matrix* eval = (_Matrix*)(storageType==3?EvaluateSimple():Evaluate(false));
8850         eval->internal_to_str(string, file, padding);
8851         DeleteObject (eval);
8852     }
8853 }
8854 //_________________________________________________________
toFileStr(FILE * dest,unsigned long padding)8855 void    _Matrix::toFileStr (FILE*dest, unsigned long padding){
8856     internal_to_str(nil, dest, padding);
8857 }
8858 //_____________________________________________________________________________________________
8859 
toStr(unsigned long padding)8860 BaseRef _Matrix::toStr(unsigned long padding) {
8861     _StringBuffer * serialized = new _StringBuffer (2048L);
8862     internal_to_str (serialized, nil, padding);
8863     return serialized;
8864 }
8865 
8866 //_____________________________________________________________________________________________
8867 
Serialize(_StringBuffer & res,_String & myID)8868 void     _Matrix::Serialize (_StringBuffer& res, _String& myID) {
8869     if (storageType != _POLYNOMIAL_TYPE) {
8870         res << '\n';
8871         res <<  myID;
8872         if (is_numeric()) {
8873             res << '=';
8874             res.AppendNewInstance((_String*)toStr());
8875             res << ';';
8876         } else if (is_expression_based()) {
8877             res << (_String ("={") & hDim & ',' & vDim & "};\n");
8878             for (long h=0L; h<hDim; h++) {
8879                 for (long v=0L; v<vDim; v++) {
8880                     _Formula *theCell = GetFormula (h,v);
8881                     if (theCell&& !theCell->IsEmpty()) {
8882                         res << myID << '[' << _String(h) << "][" << _String(v) << "]:=";
8883                         res.AppendNewInstance((_String*)theCell->toStr(kFormulaStringConversionNormal));
8884                         res << ";\n";
8885                     }
8886                 }
8887             }
8888         }
8889     }
8890 }
8891 
8892 
8893 //_____________________________________________________________________________________________
8894 
SetIncrement(int m)8895 void    SetIncrement (int m) {
8896     _Matrix::storageIncrement = m;
8897 }
8898 //_____________________________________________________________________________________________
InitMxVar(_SimpleList & mxVariables,hyFloat glValue)8899 void    _Matrix::InitMxVar (_SimpleList& mxVariables, hyFloat glValue) {
8900     mxVariables.Each ([&] (long value, unsigned long) -> void {
8901         LocateVar(value)->SetValue (new _Constant (glValue), false,true, NULL);
8902     });
8903 }
8904 //_____________________________________________________________________________________________
ImportMatrixExp(FILE * theSource)8905 bool    _Matrix::ImportMatrixExp (FILE* theSource) {
8906     // TODO: SLKP 20171027, need to review and possibly deprecate
8907     long mDim=0,i,k=0,j,m;
8908     char buffer[255],fc=0;
8909     buffer[0]=0;
8910     while(1) {
8911         buffer[mDim]=fgetc(theSource);
8912         if (feof(theSource)) {
8913             return false;
8914         }
8915         if (buffer[mDim]==',') {
8916             break;
8917         }
8918         mDim++;
8919     }
8920     buffer[mDim] = 0;
8921     mDim = atol (buffer); // matrix dimension
8922     Clear();
8923     CreateMatrix (this,mDim,mDim,false,false);
8924     // read in the variables
8925     i = 0;
8926     _SimpleList varList,c1,c2;
8927     while (fc!=';') {
8928         fc = fgetc (theSource);
8929         if ((fc==',')||(fc==';')) {
8930             buffer [i] = 0;
8931             _String varName (buffer);
8932 
8933             _Variable * ppv = CheckReceptacle (&varName, kEmptyString, true);
8934             varList << ppv->get_index();
8935             i = 0;
8936         } else {
8937             buffer[i]=fc;
8938             i++;
8939         }
8940         if (feof(theSource)) {
8941             return false;
8942         }
8943     }
8944     do {
8945         fc = fgetc (theSource);
8946         if (feof(theSource)) {
8947             return false;
8948         }
8949     } while (fc!=';');
8950 
8951     k = 0; // term counter
8952 
8953     while (k<mDim*mDim) {
8954         i = 0;
8955         while (fc!='{') {
8956             fc = fgetc (theSource);
8957             buffer[i] = fc;
8958             i++;
8959             if (feof(theSource)) {
8960                 return false;
8961             }
8962         }
8963         _Polynomial* thisCell = new _Polynomial (varList);
8964         m = atol (buffer);
8965         hyFloat* theCoeffs = (hyFloat*)MatrixMemAllocate(m*sizeof(hyFloat));
8966         j = 0;
8967         while (fc!='}') {
8968             i = 0;
8969             do {
8970                 buffer[i] = fc = fgetc (theSource);
8971                 i++;
8972                 if (feof(theSource)) {
8973                     DeleteObject (thisCell);
8974                     return false;
8975                 }
8976             } while ((fc!=',')&&(fc!='}'));
8977             buffer[i]=0;
8978             theCoeffs[j]=atof (buffer);
8979             j++;
8980             if (j>m) {
8981                 DeleteObject (thisCell);
8982                 return false;
8983             }
8984         }
8985         fc = fgetc(theSource);
8986         if (fc != '{') {
8987             DeleteObject (thisCell);
8988             return false;
8989         }
8990         _PolynomialData *pd = new _PolynomialData (varList.countitems(),j,theCoeffs);
8991         MatrixMemFree (theCoeffs);
8992         c1.Clear();
8993         while (fc!='}') {
8994             i = 0;
8995             do {
8996                 buffer[i] = fc = fgetc (theSource);
8997                 i++;
8998                 if (feof(theSource)) {
8999                     DeleteObject (thisCell);
9000                     DeleteObject (pd);
9001                     return false;
9002                 }
9003             } while ((fc!=',')&&(fc!='}'));
9004             buffer[i]=0;
9005             c1<<atol (buffer);
9006         }
9007         fc = fgetc(theSource);
9008         if (fc != '{') {
9009             DeleteObject (thisCell);
9010             DeleteObject (pd);
9011             return false;
9012         }
9013         c2.Clear();
9014         while (fc!='}') {
9015             i = 0;
9016             do {
9017                 buffer[i] = fc = fgetc (theSource);
9018                 i++;
9019                 if (feof(theSource)) {
9020                     DeleteObject (thisCell);
9021                     DeleteObject (pd);
9022                     return false;
9023                 }
9024             } while ((fc!=',')&&(fc!='}'));
9025             buffer[i]=0;
9026             c2<<atol (buffer);
9027         }
9028         thisCell->SetTheTerms(pd);
9029         thisCell->SetCLists (c1,c2);
9030         StoreObject(k,thisCell);
9031         k++;
9032     }
9033 
9034     return true;
9035 }
9036 
9037 //_____________________________________________________________________________________________
9038 
ExportMatrixExp(_Matrix * theBase,FILE * theDump)9039 void    _Matrix::ExportMatrixExp (_Matrix* theBase, FILE* theDump)
9040 // TODO: SLKP 20171027, need to review and possibly deprecate
9041 
9042 // export the matrix's computational form in the following format
9043 // matrix dimension followed by a comma
9044 // a comma separated list of variable names followed by a semicolon
9045 // a list of (precision, maxcap) followed by a semicolon
9046 // for each matrix entry
9047 // number of coeffs,
9048 // a {} enclosed list of coefficients
9049 // a {} enclosed first computational list
9050 // a {} enclosed second computational list
9051 // followed by a comma
9052 {
9053     // write out the preliminaries
9054     if (storageType!=0) {
9055         HandleApplicationError ( kErrorStringMatrixExportError );
9056         return;
9057     }
9058     fprintf (theDump,"%ld,",hDim);
9059     _SimpleList mxVariables;
9060     {
9061         _AVLList        mxA (&mxVariables);
9062         ScanForVariables(mxA,true);
9063         mxA.ReorderList();
9064     }
9065 
9066 
9067     long k, i=0;
9068     hyFloat* varPool = (hyFloat*)MatrixMemAllocate (mxVariables.countitems()*sizeof(hyFloat));
9069     for (k=0; k<mxVariables.countitems(); k++) {
9070         fprintf (theDump,"%s",LocateVar(mxVariables(k))->GetName()->get_str());
9071         if (k<mxVariables.countitems()-1) {
9072             fprintf (theDump,"%c",',');
9073         } else {
9074             fprintf (theDump,"%c",';');
9075         }
9076         varPool[k]=topPolyCap;
9077     }
9078 
9079     // begin by computing the actual "numerical exponential"
9080     // initialize all the variables to the polycap value
9081 
9082     InitMxVar   (mxVariables, topPolyCap);
9083 
9084     _Matrix     *dummy = (_Matrix*)theBase->Evaluate(false);
9085     _Matrix     *numExp = (_Matrix*)(dummy->Exponentiate());
9086 
9087     DeleteObject(dummy);
9088     checkParameter (ANAL_MATRIX_TOLERANCE,analMatrixTolerance,1e-6);
9089     fprintf (theDump,"%g,%g;",analMatrixTolerance,topPolyCap);
9090 
9091     // now loop thru the cells and check the precision term by term
9092     for (k=0; k<lDim; k++) {
9093         _SimpleList termRank, termIndex,c1,c2;
9094         _Polynomial* thisCell = ((_Polynomial**)theData)[k];
9095         long nTerms = thisCell->GetTheTerms()->NumberOfTerms(),
9096              step = nTerms/10+1, upTo = step, tup,j;
9097         hyFloat* coeffHolder =  (hyFloat*)MatrixMemAllocate (nTerms*sizeof(hyFloat)), error, bestError = 1;
9098 
9099         thisCell->RankTerms(&termRank);
9100         for (i=0; i<nTerms; i++) {
9101             termIndex<<i;
9102         }
9103         SortLists (&termRank,&termIndex);
9104         termRank.Clear();
9105         for (i=0; i<nTerms; i++) {
9106             termRank<<(nTerms-termIndex.Find(i)-1);
9107         }
9108         bestError = 1;
9109         while(upTo<nTerms+step) {
9110             if (upTo<nTerms) {
9111                 tup = upTo;
9112             } else {
9113                 tup = nTerms-1;
9114             }
9115             termIndex.Clear();
9116             for (i=0,j=0; (i<nTerms)&&(j<=tup); i++) {
9117                 if (termRank.list_data[i]<=tup) {
9118                     coeffHolder[j]=thisCell->GetTheTerms()->GetCoeff(i);
9119                     j++;
9120                     termIndex<<i;
9121                 }
9122             }
9123             thisCell->Convert2ComputationForm(&c1,&c2,&termIndex);
9124             error = fabs(thisCell->ComputeP(varPool,coeffHolder,thisCell->GetNoVariables()+1,c1.countitems(),c1.quickArrayAccess(),
9125                                             c2.quickArrayAccess())-numExp->directIndex(k));
9126             if (error<bestError) {
9127                 bestError = error;
9128             }
9129             if (bestError<=analMatrixTolerance) {
9130                 break;
9131             }
9132             upTo+=step;
9133         }
9134 
9135         if (bestError>analMatrixTolerance) {
9136             char be[100];
9137             snprintf (be, sizeof(be),"%g",bestError);
9138             _String wm ("Polynomial Matrix Exp approximation failed tolerance test in cell (");
9139             wm = wm&_String(k/hDim)&","&_String(k%hDim)&"). Tolerance achieved is:"&be;
9140             ReportWarning (wm);
9141         }
9142         fprintf(theDump,"%ld{",tup+1);
9143         for (i=0; i<=tup; i++) {
9144             if (i) {
9145                 fprintf(theDump,",%18.16g",coeffHolder[i]);
9146             } else {
9147                 fprintf(theDump,"%18.16g",coeffHolder[i]);
9148             }
9149         }
9150         fprintf(theDump,"}%ld",tup);
9151         c1.toFileStr(theDump);
9152         c2.toFileStr(theDump);
9153         MatrixMemFree (coeffHolder);
9154 
9155     }
9156     MatrixMemFree (varPool);
9157     DeleteObject (numExp);
9158 }
9159 
9160 //_____________________________________________________________________________________________
9161 
ExpNumberOfSubs(_Matrix * freqs,bool mbf)9162 hyFloat  _Matrix::ExpNumberOfSubs  (_Matrix* freqs, bool mbf) {
9163     // TODO SLKP 20171027 SLKP reviewed and edited; check correctness
9164 
9165     if (!is_square_numeric(false) || !freqs->is_numeric()) {
9166         return 0.0;
9167     }
9168 
9169     hyFloat      result      =   0.0;
9170     _Matrix      *stencil    =   BranchLengthStencil();
9171 
9172     if ( freqs->is_dense() == false ) {
9173         freqs->CheckIfSparseEnough(true);
9174     }
9175 
9176     if (stencil) {
9177         if (mbf) {
9178             ForEachCellNumeric ([&] (hyFloat value, unsigned long index, unsigned long row, unsigned long column) -> void {
9179                 if (row != column && stencil->theData[index]) {
9180                     result += value * freqs->theData[row] * freqs->theData[column];
9181                 }
9182             });
9183         } else {
9184             ForEachCellNumeric ([&] (hyFloat value, unsigned long index, unsigned long row, unsigned long column) -> void {
9185                 if (row != column && stencil->theData[index]) {
9186                     result += value * freqs->theData[row];
9187                 }
9188             });
9189         }
9190     } else {
9191         if (mbf) {
9192             ForEachCellNumeric ([&] (hyFloat value, unsigned long index, unsigned long row, unsigned long column) -> void {
9193                 if (row != column) {
9194                     result += value * freqs->theData[row] * freqs->theData[column];
9195                 }
9196             });
9197         } else {
9198             ForEachCellNumeric ([&] (hyFloat value, unsigned long index, unsigned long row, unsigned long column) -> void {
9199                 if (row != column) {
9200                     result += value * freqs->theData[row];
9201                 }
9202             });
9203         }
9204     }
9205     return result;
9206 }
9207 
9208 //_____________________________________________________________________________________________
ComputeRowAndColSums(void)9209 _List*      _Matrix::ComputeRowAndColSums (void) {
9210 // the first entry is the matrix with row sums
9211 // the second - the entry with column sums
9212 // the third  - a constant with the total sum
9213     if ((storageType == 1) && (hDim >= 1) && (vDim >= 1)) {
9214         _List*      resList = new _List;
9215         _Matrix     *rowSums     = new _Matrix (hDim,1,false,true),
9216         *columnSums  = new _Matrix (vDim,1,false,true);
9217 
9218 
9219         hyFloat totals = 0.0;
9220 
9221         if (theIndex) {
9222             for (long item = 0; item < lDim; item ++) {
9223                 long idx = theIndex[item];
9224                 if (idx>=0) {
9225 
9226                     hyFloat      v = theData[idx];
9227 
9228                     rowSums->theData[idx/vDim] += v;
9229                     columnSums->theData[idx%vDim] += v;
9230                     totals += v;
9231                 }
9232             }
9233         } else {
9234             for (long rows = 0; rows < hDim; rows++) {
9235                 hyFloat rowSum = 0.;
9236 
9237                 for (long columns = 0; columns < vDim; columns ++) {
9238                     rowSum += theData[rows*vDim+columns];
9239                 }
9240 
9241                 rowSums->theData[rows] = rowSum;
9242                 totals += rowSum;
9243             }
9244 
9245             for (long columns = 0; columns < vDim; columns++) {
9246                 hyFloat colSum = 0.;
9247 
9248                 for (long rows = 0; rows < hDim; rows ++) {
9249                     colSum += theData[rows*vDim+columns];
9250                 }
9251 
9252                 columnSums->theData[columns] = colSum;
9253             }
9254         }
9255 
9256         (*resList) < rowSums
9257                    < columnSums
9258                    < new _Constant (totals);
9259 
9260         return resList;
9261 
9262     }
9263     return nil;
9264 }
9265 
9266 //_____________________________________________________________________________________________
9267 
NeighborJoin(bool methodIndex,HBLObjectRef cache)9268 _Matrix* _Matrix::NeighborJoin (bool methodIndex, HBLObjectRef cache) {
9269     long          specCount = GetHDim();
9270 
9271     if (storageType != 1 ||  specCount!= GetVDim() || specCount < 4) {
9272         HandleApplicationError ("NeigborJoin needs a square numeric matrix of dimension >= 4");
9273         return    new _Matrix;
9274     }
9275 
9276     CheckIfSparseEnough (true);
9277 
9278     _Matrix              netDivergence (specCount,1,false,true);
9279     _SimpleList          useColumn     (specCount,0,1),
9280                          columnIndex   (specCount,0,1);
9281 
9282     _Matrix*             res = (_Matrix* )_returnMatrixOrUseCache((specCount+1)*2,3,_NUMERICAL_TYPE,false,cache);
9283 
9284     for (long k=0; k<specCount ; k++) {
9285         for (long j=0; j<k; j++) {
9286             hyFloat d = theData[j*specCount+k];
9287 
9288             netDivergence.theData[k] += d;
9289             netDivergence.theData[j] += d;
9290 
9291         }
9292         res->theData[k*3+2] = 1;
9293     }
9294 
9295     long   cladesMade = 1;
9296 
9297     while (cladesMade < specCount) {
9298         hyFloat      min = 1.e100;
9299 
9300         long            minIndex  = -1,
9301                         minIndex2 = -1,
9302                         minIndexR = -1,
9303                         minIndexC = -1,
9304                         k = specCount-1-cladesMade;
9305 
9306         hyFloat      recRemaining = 1./k;
9307 
9308         if (cladesMade == specCount-1) {
9309             minIndex = useColumn.list_data[1];
9310 
9311             hyFloat d = theData[minIndex];
9312 
9313             if ((d<0)&&methodIndex) {
9314                 d = 0;
9315             }
9316 
9317             k = columnIndex.list_data[1];
9318 
9319             if (k>=specCount+cladesMade-2) {
9320                 k = columnIndex[0];
9321             }
9322 
9323             long    m = specCount+cladesMade-2;
9324 
9325             res->theData[k*3+1]  = d;
9326             res->theData[k*3]    = m;
9327             res->theData[3*m+2] += res->theData[3*k+2];
9328             res->theData[3*m]    = -1;
9329 
9330             break;
9331         }
9332 
9333         for (long i=1; i<useColumn.lLength; i++) {
9334             long c1 = useColumn.list_data[i];
9335 
9336             for (long j=0; j<i; j=j+1) {
9337                 long c2 = useColumn.list_data[j];
9338 
9339                 //if (c2>=c1)
9340                 //break;
9341 
9342                 hyFloat d = theData[c2*specCount+c1]-(netDivergence.theData[c1]+netDivergence.theData[c2])*recRemaining;
9343 
9344                 if (d<min) {
9345                     min         = d;
9346                     minIndex    = c2;
9347                     minIndex2   = c1;
9348                     minIndexR   = j;
9349                     minIndexC   = i;
9350                 }
9351             }
9352         }
9353 
9354         if (minIndex < 0 || minIndex2 < 0 || minIndexR < 0 || minIndexC < 0) {
9355             _String err = _String ("Invalid distance matrix passed to NeighborJoin. Matrices written onto ") & hy_messages_log_name;
9356             ReportWarning ((_String*)toStr());
9357             ReportWarning (_String((_String*)netDivergence.toStr()));
9358             ReportWarning (_String((_String*)useColumn.toStr()));
9359             HandleApplicationError (err);
9360             DeleteObject (res);
9361             return new _Matrix;
9362         }
9363 
9364         hyFloat      D  = theData[minIndex*specCount+minIndex2],
9365                         d  = (D - (netDivergence.theData[minIndex2]-netDivergence.theData[minIndex])*recRemaining)*0.5,
9366                         d2 = D - d;
9367 
9368         if (methodIndex) {
9369             if (d<0) {
9370                 d = 0.0;
9371                 d2 = D;
9372             }
9373             if (d2<0) {
9374                 d2 = 0.0;
9375                 d = D;
9376                 if (d<0) {
9377                     d = 0;
9378                 }
9379             }
9380         }
9381 
9382         long    m = columnIndex.list_data [minIndexC],
9383                 n = columnIndex.list_data [minIndexR];
9384 
9385         k       = specCount+cladesMade-1;
9386 
9387         res->theData[n*3]       =   k;
9388         res->theData[n*3+1]     =   d;
9389 
9390         res->theData[m*3]       =   k;
9391         res->theData[m*3+1]     =   d2;
9392 
9393         res->theData[k*3+2] = res->theData[n*3+2]+res->theData[m*3+2]+1;
9394 
9395         d = theData[minIndex*specCount+minIndex2];
9396 
9397         netDivergence.theData[minIndex]  = 0;
9398         netDivergence.theData[minIndex2] = 0;
9399 
9400         useColumn.Delete(minIndexC);
9401         columnIndex.Delete(minIndexC);
9402 
9403         for (k=0; k<useColumn.lLength; k++) {
9404             long  k2 = useColumn.list_data[k];
9405 
9406             if (k2>=minIndex) {
9407                 if (k2 == minIndex) {
9408                     k++;
9409                 }
9410                 break;
9411             }
9412 
9413             hyFloat d2 = theData[k2*specCount+minIndex]+theData[k2*specCount+minIndex2],
9414                        t  =  (d2-d)*.5;
9415 
9416             netDivergence.theData  [k2]               += t-d2;
9417             theData [k2*specCount+minIndex]            = t;
9418             netDivergence.theData[minIndex]           += t;
9419 
9420         }
9421 
9422         for (; k<useColumn.lLength; k++) {
9423             long  k2 = useColumn.list_data[k];
9424             if (k2 >= minIndex2) {
9425                 if (k2 == minIndex2) {
9426                     k++;
9427                 }
9428                 break;
9429             }
9430 
9431             hyFloat  d2 = theData[minIndex*specCount+k2]+theData[k2*specCount+minIndex2],
9432                         t =  (d2-d)*.5;
9433 
9434             netDivergence.theData [k2]                  += t-d2;
9435             theData[minIndex*specCount+k2]               = t;
9436             netDivergence.theData[minIndex]             += t;
9437 
9438         }
9439 
9440         //for (k=minIndex2+1;k<ds.species; k=k+1)
9441         for (; k<useColumn.lLength; k++) {
9442             long  k2 = useColumn.list_data[k];
9443 
9444             hyFloat  d2 = theData[minIndex*specCount+k2]+theData[minIndex2*specCount+k2],
9445                         t =  (d2-d)*.5;
9446 
9447             netDivergence.theData [k2]                   += t-d2;
9448             theData[minIndex*specCount+k2]                = t;
9449             netDivergence.theData[minIndex]              += t;
9450         }
9451 
9452         columnIndex.list_data[minIndexR] = specCount+cladesMade-1;
9453         {
9454             for (long i=0; i<minIndex2; i++) {
9455                 theData[i*specCount+minIndex2] = 0;
9456             }
9457         }
9458         {
9459             for (long i=minIndex2+1; i<specCount; i++) {
9460                 theData[minIndex2*specCount+i]=0;
9461             }
9462         }
9463 
9464         cladesMade ++;
9465     }
9466 
9467 
9468     //_Matrix    *tree  = res->MakeTreeFromParent (specCount);
9469     //DeleteObject (res);
9470     //return tree;
9471     return res;
9472 }
9473 
9474 //_____________________________________________________________________________________________
MakeTreeFromParent(long specCount,HBLObjectRef cache)9475 _Matrix*        _Matrix::MakeTreeFromParent (long specCount, HBLObjectRef cache) {
9476     if (is_empty()) {
9477         return new _Matrix;
9478     }
9479 
9480     try {
9481 
9482         if (specCount<0L ) {
9483             throw (_String ("Parameter to ") & __PRETTY_FUNCTION__ & " must be greater than or equal to 0");
9484         }
9485 
9486         if (GetVDim () != 3) {
9487             throw (_String ("Expected a matrix with 3 columns"));
9488         }
9489         if (GetHDim () <= 2*specCount + 1) {
9490             throw (_String ("Expected a matrix with at least ") & (2*specCount + 1) & " rows");
9491         }
9492 
9493         const long result_rows = 2*(specCount+1);
9494         _Matrix     *tree = (_Matrix* )_returnMatrixOrUseCache(result_rows,5,_NUMERICAL_TYPE,false,cache),
9495                     CI  (2*(specCount+1),1,false,true);
9496 
9497 
9498         for (long kk = 0; kk < specCount-1; kk++) {
9499             tree->theData[kk*5+4] = -1; // set parent records to
9500         }
9501 
9502         long cladesMade = 0L;
9503 
9504         for (long nodeID2 = 0L; nodeID2 < specCount; nodeID2 ++) {
9505             long        nodeID       = nodeID2,
9506                         nodeDepth    = 0,
9507                         saveNodeID   = nodeID,
9508                         parentID     = theData[nodeID*3],
9509                         layoutOffset = cladesMade,
9510                         m,
9511                         n;
9512 
9513             while (parentID>=0) {
9514                 long idx = parentID-specCount;
9515                 if (idx < 0 || idx >= result_rows) {
9516                     throw (_String ("Invalid parent index in row ") & nodeID2);
9517                 }
9518                 n = tree->theData[idx*5+4];
9519                 if (n >= 0) {
9520                     layoutOffset = n+tree->theData[idx*5+3];
9521                     break;
9522                 }
9523                 parentID  = theData[parentID*3];
9524             }
9525 
9526             parentID   = theData[nodeID*3];
9527 
9528             while (parentID>=0) {
9529                 n = parentID-specCount;
9530                 if (n < 0 || n >= result_rows) {
9531                     throw (_String ("Invalid parent index in row ") & nodeID);
9532                 }
9533                 m = theData[nodeID*3+2];
9534 
9535                 if (tree->theData[n*5+4] < 0)
9536                     /* this node hasn't been laid out yet */
9537                 {
9538                     if (theData[parentID*3]>=0) {
9539                         tree->theData[n*5+4] = layoutOffset; /* where the layout for the clade begins */
9540                         tree->theData[n*5+3]   = m; /* offset for that layout */
9541                     }
9542 
9543                     m += layoutOffset - 1;
9544 
9545                     tree->theData[m*5]   = nodeID;
9546                     tree->theData[m*5+2] = theData[nodeID*3+1];
9547 
9548                     CI.theData[nodeID] = m;
9549                 } else
9550                     /* it has been laid out */
9551                 {
9552                     m += tree->theData[n*5+3]+tree->theData[n*5+4] - 1;
9553 
9554                     tree->theData[m*5]   = nodeID;
9555                     tree->theData[m*5+2] = theData[nodeID*3+1];
9556 
9557                     tree->theData[n*5+3] = m + theData[nodeID*3+2];
9558 
9559                     CI.theData[nodeID]   = m;
9560                     nodeDepth ++;
9561 
9562                     break;
9563                 }
9564                 nodeDepth++;
9565                 nodeID    = parentID;
9566                 parentID  = theData[nodeID*3];
9567             }
9568 
9569             /* update levels of nodes */
9570 
9571             if (parentID<0) {
9572                 nodeID   = saveNodeID;
9573                 parentID = theData[nodeID*3];
9574 
9575                 while (parentID>=0) {
9576                     m = CI.theData[nodeID];
9577                     if (m < 0 || m >= result_rows) {
9578                         throw (_String ("Invalid parent index in row ") & nodeID);
9579                     }
9580                     tree->theData[m*5+1] = nodeDepth;
9581                     nodeDepth --;
9582                     saveNodeID = nodeID;
9583                     nodeID     = parentID;
9584                     parentID   = theData[nodeID*3];
9585                 }
9586 
9587                 cladesMade += theData[3*saveNodeID+2];
9588             } else {
9589                 m = CI.theData[parentID];
9590 
9591                 n = tree->theData[m*5+1];/* depth of the parent */
9592 
9593                 nodeID   = saveNodeID;
9594 
9595                 while (nodeDepth >= 0) {
9596                     m = CI.theData[nodeID];
9597 
9598                     tree->theData[m*5+1] = nodeDepth+n;
9599 
9600                     nodeDepth --;
9601                     nodeID  = theData[nodeID*3];
9602                 }
9603             }
9604         }
9605         tree->theData[cladesMade*5]      = 2*specCount-2;
9606         tree->theData[cladesMade*5+1]    = 0;
9607         tree->theData[(specCount-2)*5+4] = 0;
9608         return tree;
9609     } catch (const _String& error) {
9610         HandleApplicationError(error);
9611         return new _Matrix (1,1,false,true);
9612     }
9613 }
9614 
9615 
9616 //_____________________________________________________________________________________________
FisherExact(hyFloat p1,hyFloat p2,hyFloat p3)9617 hyFloat      _Matrix::FisherExact (hyFloat p1, hyFloat p2, hyFloat p3)
9618 {
9619     if ((hDim>=1)&&(vDim>=1)&&(hDim+vDim>2)) {
9620         if (vDim<hDim) {
9621             _Matrix temp (*this);
9622             temp.Transpose();
9623             return  temp.FisherExact (p1,p2,p3);
9624         }
9625         _Matrix *  numericMx = (_Matrix*)ComputeNumeric();
9626 
9627         double     prob,
9628                    pval;
9629 
9630         numericMx->CheckIfSparseEnough (true);
9631 
9632         double        *tempArray = new double [numericMx->lDim];
9633 
9634         for (long i=0; i<hDim; i++)
9635             for (long j=0; j<vDim; j++) {
9636                 tempArray[j*hDim+i] = numericMx->theData[i*vDim+j];
9637             }
9638 
9639         fexact_ (hDim,vDim,tempArray,p1,p2,p3,&prob,&pval);
9640         delete  []  tempArray;
9641         return pval;
9642 
9643     }
9644     return 1.;
9645 }
9646 
9647 //_____________________________________________________________________________________________
9648 
SimplexHelper1(long rowIndex,_SimpleList & columnList,long columnCount,bool useAbsValue,long & maxIndex,hyFloat & maxValue)9649 void        _Matrix::SimplexHelper1 (long rowIndex, _SimpleList& columnList, long columnCount, bool useAbsValue, long& maxIndex, hyFloat& maxValue)
9650 // find the maximum element (using absolute value of not) in row rowIndex+1 of this matrix,
9651 // over first columnCount columns indexed by columnList
9652 //  column indexing is offset by + 1 to account for the first column not being eligible for pivoting
9653 {
9654     if (columnCount <= 0) {
9655         maxValue = 0.0;
9656     } else {
9657         rowIndex = (rowIndex+1)*vDim;
9658         maxIndex = columnList.list_data[0];
9659         maxValue = theData[rowIndex+maxIndex+1];
9660         for (long k=1; k<columnCount; k++) {
9661             hyFloat t = useAbsValue?
9662                            (fabs(theData[rowIndex+columnList.list_data[k]+1])-fabs(maxValue))
9663                            :(theData[rowIndex+columnList.list_data[k]+1]-maxValue);
9664             if (t>0.) {
9665                 maxValue = theData[rowIndex+columnList.list_data[k]+1];
9666                 maxIndex = columnList.list_data[k];
9667             }
9668         }
9669     }
9670 }
9671 
9672 //_____________________________________________________________________________________________
9673 
SimplexHelper2(long & pivotIndex,long columnToExamine,hyFloat eps)9674 void        _Matrix::SimplexHelper2 (long& pivotIndex, long columnToExamine, hyFloat eps)
9675 {
9676     long            m = hDim-2,
9677                     n = vDim-1,
9678                     i = 0;
9679 
9680     hyFloat      q1,
9681                     q;
9682 
9683     pivotIndex = -1;
9684     for (; i<m; i++)
9685         if (theData[(i+1)*vDim+columnToExamine+1] < -eps) {
9686             break;
9687         }
9688     if (i>=m) {
9689         return;    // function is unbounded
9690     }
9691     q1              = -theData[(i+1)*vDim]/theData[(i+1)*vDim+columnToExamine+1];
9692     pivotIndex      = i;
9693     for (i=pivotIndex+1; i<m; i++) {
9694         if (theData[(i+1)*vDim+columnToExamine+1] < -eps) {
9695             q = -theData[(i+1)*vDim]/theData[(i+1)*vDim+columnToExamine+1];
9696             if (q<q1) {
9697                 pivotIndex = i;
9698                 q1         = q;
9699             } else {
9700                 hyFloat q0, qp;
9701                 if (q==q1) { // degeneracy
9702                     for (long k=0; k<n; k++) {
9703                         qp = -theData[(pivotIndex+1)*vDim + k + 1]/theData[(pivotIndex+1)*vDim+columnToExamine+1];
9704                         q0 = -theData[(i+1)*vDim + k + 1]/theData[(i+1)*vDim+columnToExamine+1];
9705                         if (q0!=qp) {
9706                             break;
9707                         }
9708                     }
9709                     if (q0 < qp) {
9710                         pivotIndex = i;
9711                     }
9712                 }
9713             }
9714         }
9715     }
9716 
9717 }
9718 
9719 //_____________________________________________________________________________________________
9720 
SimplexHelper3(long i1,long k1,long ip,long kp)9721 void        _Matrix::SimplexHelper3 (long i1, long k1, long ip, long kp)
9722 {
9723     hyFloat piv = 1./theData[(ip+1)*vDim+kp+1];
9724     for (long i=0; i<=i1+1; i++)
9725         if (i-1 != ip) { // not the pivot row
9726             theData[i*vDim+kp+1] *= piv;
9727             for (long k=0; k<=k1+1; k++)
9728                 if (k-1 != kp) {
9729                     theData[i*vDim+k] -= theData[(ip+1)*vDim+k] * theData[i*vDim+kp+1];
9730                 }
9731         }
9732     for (long k=0; k<=k1+1; k++)
9733         if (k-1 != kp)  {
9734             theData[(ip+1)*vDim+k] *= -piv;
9735         }
9736     theData[(ip+1)*vDim+kp+1] = piv;
9737 }
9738 
9739 //_____________________________________________________________________________________________
SimplexSolve(hyFloat desiredPrecision)9740 _Matrix*    _Matrix::SimplexSolve (hyFloat desiredPrecision ) {
9741 // this function is adapted from the Num. Recipes in C version; but with 0 indexing
9742 // hyphy primitives
9743 // and without goto labels
9744 
9745 // the of dimension RxC is interpreted as follows
9746 // R-1 constraints
9747 // C-2 variables
9748 
9749 // the first row:
9750 //      cell   0      - current value of the objective function
9751 //      cells  1-C-2  - coefficient of variable x_k in the objective function
9752 //      cell   C-1    - if >=0. then maximize the function
9753 //                    - if <0   then minimize the function
9754 
9755 // other rows
9756 // constraint j written in the form
9757 // b_j - a_j1 x_1 - a_j2 x_2 - ... -a_jk x_k
9758 // last cell is what type of constraint it is:
9759 // < 0: <= inequality
9760 // > 0: >= inequality
9761 // = 0 equality
9762 
9763 // upon return, will contain a row matrix of either C-1 cells:
9764 // extreme value of the objective function in the first cell
9765 // variable values in the same order as originally supplied
9766 // if an kEmptyString matrix is returned - no feasible solution could be found
9767 // if a 1x1 matrix is returned - the objective function is unbounded
9768 
9769     try {
9770 
9771         long n = vDim-2, // number of variables
9772              m = hDim-1; // number of constraints
9773 
9774         if (is_numeric() && n>0 && m>0) {
9775             while (1) // artificial construct used to break out on error
9776                 // an to avoid goto statements
9777             {
9778                 bool        doMaximize = (*this)(0,n+1) >= 0.0;
9779 
9780                 // allocate temporary storage
9781                 _Matrix     tempMatrix (m+2,n+1,false,true);
9782                 // first, copy the objective function row
9783                 for (long i=0; i<=n; i++) {
9784                     tempMatrix.Store(0,i,doMaximize?(*this)(0,i):-(*this)(0,i));
9785                 }
9786 
9787                 // now, count the number of constraints of each type and reorder things
9788 
9789                 long    m1 = 0, // <= constraints
9790                         m2 = 0, // >= constraints
9791                         m3 = 0; // == constraints
9792 
9793                 {
9794                     for (long i=1; i<=m; i++) {
9795                         hyFloat t = (*this)(i,n+1);
9796                         if (t<0.0) {
9797                             m1++;
9798                         } else if (t>0.0) {
9799                             m2++;
9800                         } else {
9801                             m3++;
9802                         }
9803                         if ((*this)(i,0) < 0.0) {
9804                             throw _String("Negative values are not allowed in the first column of the simplex tableau");
9805                          }
9806                     }
9807                 }
9808 
9809 
9810                 // copy coefficients into the temp matrix, sorting the constraints in the <=, >= and == order
9811                 {
9812                     for (long i=1, t1=0, t2=0, t3=0; i<=m; i++) {
9813                         hyFloat t   = (*this)(i,n+1);
9814                         long       idx;
9815                         if (t<0.0) {
9816                             idx=1+t1++;
9817                         } else if (t>0.0) {
9818                             idx=1+m1+t2++;
9819                         } else {
9820                             idx=1+m1+m2+t3++;
9821                         }
9822                         for (long j=0; j<=n; j++) {
9823                             tempMatrix.Store(idx,j,(*this)(i,j));
9824                         }
9825 
9826                     }
9827                 }
9828                 // allocate temporary storage
9829 
9830                 _SimpleList l1      (n+1,0,1),
9831                             l3       (m,0,0),
9832                             izrov    (n,0,1),
9833                             iposv    (m,n,1);
9834 
9835                 long        nl1     = n;
9836 
9837                 if (m2+m3) { // >= and == constraints exist; origin is not a feasible solution
9838                     for (long i=0; i<m2; l3.list_data[i] = 1,i++) ; // slack variables in the list of 'basis' variables
9839                     for (long k=0; k<=n; k++) { // compute the auxiliary objective function
9840                         hyFloat q = 0.;
9841                         for (long k2 = m1+1; k2<=m; k2++) {
9842                             q += tempMatrix(k2,k);
9843                         }
9844                         tempMatrix.Store (m+1,k,-q);
9845                     }
9846                     while (1) { // initial artifical construct
9847                         long        pivotColumn,
9848                                     ip;
9849                         hyFloat  pivotValue;
9850 
9851                         tempMatrix.SimplexHelper1 (m,l1,nl1,false,pivotColumn, pivotValue);
9852                         if (pivotValue <= desiredPrecision && tempMatrix(m+1,0) < -desiredPrecision)
9853                             // aux objective is still negative and can't be improved
9854                             // no feasible solution
9855                         {
9856                             return new _Matrix;
9857                         }
9858                         if (pivotValue <= desiredPrecision && tempMatrix(m+1,0) <= desiredPrecision)
9859                             // aux objective is zero and can't be improved
9860                             // found a feasible solution; clean up artificial variables and move on to phase 2
9861                         {
9862                             for (ip = m1+m2; ip < m; ip++) {
9863                                 if (iposv.list_data[ip] == ip + n) {
9864                                     tempMatrix.SimplexHelper1 (ip,l1,nl1,true,pivotColumn, pivotValue);
9865                                     if (pivotValue > desiredPrecision) {
9866                                         goto one;
9867                                     }
9868                                 }
9869                             }
9870                             for (long i = m1; i<m1+m2; i++)
9871                                 if (l3.list_data[i-m1] == 1)
9872                                     for (long k=0; k<=n; k++) {
9873                                         tempMatrix.Store (i+1,k,-tempMatrix(i+1,k));
9874                                     }
9875 
9876                             break;
9877                         }
9878 
9879                         tempMatrix.SimplexHelper2 (ip,pivotColumn,desiredPrecision);
9880                         if (ip<0) {
9881                             return new _Matrix (1,1,false,true);    // unbounded function
9882                         }
9883 
9884     one:
9885                         tempMatrix.SimplexHelper3 (m,n-1,ip,pivotColumn);
9886                         if (iposv.list_data[ip] >= n+m1+m2) {
9887                             long k = 0;
9888                             for (k=0; k<nl1; k++)
9889                                 if (l1.list_data[k] == pivotColumn) {
9890                                     break;
9891                                 }
9892                             nl1--;
9893                             for (long i2=k; i2<nl1; i2++) {
9894                                 l1.list_data[i2] = l1.list_data[i2+1];
9895                             }
9896                         } else {
9897                             long k2 = iposv.list_data[ip] - m1 - n;
9898                             if (k2 >= 0 && l3.list_data[k2]) {
9899                                 l3.list_data[k2] = 0;
9900                                 tempMatrix.theData[(m+1)*tempMatrix.vDim + pivotColumn + 1] ++;
9901                                 for (long i=0; i<m+2; i++) {
9902                                     tempMatrix.theData[i*tempMatrix.vDim + pivotColumn + 1] *= -1.0;
9903                                 }
9904                             }
9905                         }
9906                         long s = izrov.list_data[pivotColumn];
9907                         izrov.list_data[pivotColumn] = iposv.list_data[ip];
9908                         iposv.list_data[ip] = s;
9909                     }// end of phase 1
9910                 }
9911 
9912                 while (1) {
9913                     long            pivotColumn,
9914                                     pivotRow;
9915                     hyFloat      pivotValue;
9916 
9917                     tempMatrix.SimplexHelper1 (-1,l1,nl1,false,pivotColumn,pivotValue);
9918                     if (pivotValue < desiredPrecision) { // done!
9919                         // produce the final solution
9920                         _Matrix * resMatrix = new _Matrix (1,n+1,false,true);
9921                         resMatrix->Store(0,0,doMaximize?tempMatrix(0,0):-tempMatrix(0,0));
9922                         for (long k=0; k<iposv.lLength; k++)
9923                             if (iposv.list_data[k]<n) {
9924                                 resMatrix->Store(0,iposv.list_data[k]+1,tempMatrix(k+1,0));
9925                             }
9926                         return resMatrix;
9927                     }
9928                     tempMatrix.SimplexHelper2 (pivotRow,pivotColumn,desiredPrecision);
9929                     if (pivotRow<0) {
9930                         return new _Matrix (1,1,false,true);
9931                     }
9932                     tempMatrix.SimplexHelper3 (m-1,n-1,pivotRow,pivotColumn);
9933                     long s = izrov.list_data[pivotColumn];
9934                     izrov.list_data[pivotColumn] = iposv.list_data[pivotRow];
9935                     iposv.list_data[pivotRow] = s;
9936                 }
9937 
9938             }
9939         } else {
9940             throw _String("SimplexSolve requires a numeric matrix with > 1 row and > 2 columns");
9941         }
9942 
9943 
9944     } catch (_String const& err) {
9945         HandleApplicationError (err);
9946     }
9947     return new _Matrix;
9948 }
9949 
9950 //_____________________________________________________________________________________________
9951 
CopyABlock(_Matrix * source,long startRow,long startColumn,long rowSpan,long colSpan)9952 void    _Matrix::CopyABlock (_Matrix * source, long startRow, long startColumn, long rowSpan, long colSpan)
9953 {
9954     long indexTarget = startRow*vDim + startColumn,
9955          indexSource = 0,
9956          sourceHDim  = rowSpan<=0?source->hDim:rowSpan,
9957          sourceVDim  = colSpan<=0?source->vDim:colSpan,
9958          maxRow         = MIN (hDim, startRow    + sourceHDim),
9959          maxColumn   = MIN (vDim, startColumn + sourceVDim);
9960 
9961     for  (long r = startRow; r < maxRow; r++) {
9962         for (long c = startColumn, c2 = 0; c < maxColumn; c++, c2++) {
9963             theData[indexTarget+c2] = source->theData[indexSource+c2];
9964         }
9965 
9966         indexSource += sourceVDim;
9967         indexTarget += vDim;
9968     }
9969 }
9970 
9971 
9972 //_____________________________________________________________________________________________
DirichletDeviate(void)9973 HBLObjectRef   _Matrix::DirichletDeviate (void)
9974 {
9975     /* -----------------------------------------------------------
9976         DirichletDeviate()
9977             Generate vector of random deviates from the Dirichlet
9978             distribution defined by contents of this matrix as
9979             hyperparameters (a > 0).
9980        ----------------------------------------------------------- */
9981     try {
9982 
9983         long        dim;
9984 
9985         hyFloat  denom   = 0.;
9986 
9987         _Matrix     res (1, dim = GetHDim()*GetVDim(), false, true);    // row vector
9988 
9989 
9990         if (!is_numeric()) {
9991             throw _String("Only numeric vectors can be passed to DirichletDeviate");
9992         }
9993 
9994         if (is_row() || is_column ()) {
9995             // generate a random deviate from gamma distribution for each hyperparameter
9996 
9997             for (long i = 0L; i < dim; i++) {
9998                 if (theData[i] < 0.) {
9999                     throw _String("Dirichlet not defined for negative parameter values.");
10000                 }
10001 
10002                 res.Store (0, i, gammaDeviate(theData[i]));
10003                 denom += res(0,i);
10004             }
10005 
10006             // normalize by sum
10007             for (long i = 0; i < dim; i++) {
10008                 res.Store (0, i, res(0,i)/denom);
10009             }
10010 
10011             return (HBLObjectRef) res.makeDynamic();
10012         } else {
10013             throw _String("Argument must be a row- or column-vector.");
10014         }
10015     } catch (_String const& err) {
10016         HandleApplicationError (err);
10017     }
10018     return new _Matrix (1,1,false,true);
10019 }
10020 
10021 
10022 
10023 //_____________________________________________________________________________________________
GaussianDeviate(_Matrix & cov)10024 HBLObjectRef   _Matrix::GaussianDeviate (_Matrix & cov)
10025 {
10026     /* ------------------------------------------------------
10027         GaussianDeviate()
10028             Generate vector of random deviates from k-
10029             dimensional Gaussian distribution given contents
10030             of this matrix as mean parameters, and argument
10031             as covariance matrix.
10032 
10033             Use algorithm described in Numerical Recipes
10034             3rd ed., p.379
10035        ------------------------------------------------------ */
10036 
10037     //ReportWarning (_String("Entered _Matrix::GaussianDeviate() with cov = ") & (_String *)(cov.toStr()));
10038 
10039     try {
10040 
10041         if (storageType != 1 || GetHDim() > 1) {
10042             HandleApplicationError (_String("ERROR in _Matrix::GaussianDeviate(), expecting to be called on numeric row vector matrix, current dimensions: ") & GetHDim() & "x" & GetVDim());
10043             return new _Matrix;
10044         }
10045 
10046         long kdim = GetVDim();    // number of entries in this _Matrix object as vector of means
10047 
10048         if (cov.check_dimension(kdim, kdim)) {
10049             _Matrix* cov_cd = (_Matrix *) cov.CholeskyDecompose(),
10050                     * gaussvec = new _Matrix (1, kdim, false, true);
10051 
10052             //ReportWarning (_String("\nCholesky decomposition of cov = ") & (_String *) cov_cd->toStr());
10053 
10054             // fill column vector with independent standard normal deviates
10055             for (long i = 0L; i < kdim; i++) {
10056                 gaussvec->Store (0, i, gaussDeviate());
10057             }
10058 
10059             //ReportWarning (_String ("\nvector of gaussian deviates = ") & (_String *) gaussvec.toStr());
10060 
10061             // left multiply vector by Cholesky decomposition of covariance matrix
10062             *gaussvec *= *cov_cd;
10063 
10064             // shift mean
10065             for (long i = 0L; i < kdim; i++) {
10066                 gaussvec->Store (0, i, (*gaussvec)(0,i) + theData[i]);
10067             }
10068 
10069             DeleteObject (cov_cd);
10070             return gaussvec;
10071         } else {
10072             throw (_String("Error in _Matrix::GaussianDeviate(), incompatible dimensions in covariance matrix: ") & cov.GetHDim() & "x" & cov.GetVDim());
10073 
10074         }
10075     } catch (const _String& err) {
10076         HandleApplicationError (err);
10077     }
10078 
10079     return new _Matrix;
10080 }
10081 
10082 
10083 //_____________________________________________________________________________________________
MultinomialSample(_Constant * replicates)10084 HBLObjectRef   _Matrix::MultinomialSample (_Constant *replicates) {
10085 
10086     try {
10087         _List         reference_manager;
10088 
10089         long          values      = GetHDim();
10090         unsigned long samples     = replicates?replicates->Value ():0;
10091 
10092         _Matrix     *eval    = (_Matrix*)Compute (),
10093                     * sorted = nil,
10094                     * result = nil;
10095 
10096         if (samples == 0UL) {
10097             throw _String ("Expected a numerical (>=1) value for the number of replicates");
10098         } else if ( ! eval->is_numeric() || GetVDim() != 2 || values < 2) {
10099             throw _String ("Expecting numerical Nx2 (with N>=1) matrix.");
10100         } else {
10101             _Constant one (1.);
10102             sorted = (_Matrix*) eval->SortMatrixOnColumn(&one, nil);
10103             reference_manager < sorted;
10104             hyFloat      sum = 0.;
10105 
10106             for (long n = 1L; n < 2*values; n+=2L) {
10107                 hyFloat v = sorted->theData[n];
10108                 if (v < 0.) {
10109                     sum = 0.;
10110                     break;
10111                 }
10112                 sum += v;
10113             }
10114             if (CheckEqual (sum, 0.)) {
10115                 throw _String ("The probabilities (second column) cannot add to 0 or be negative");
10116             } else {
10117                 sum = 1./sum;
10118 
10119                 _Matrix     *raw_result  = new _Matrix (1, values, false, true),
10120                 *normalized  = new _Matrix (1, values, false, true);
10121 
10122                 reference_manager <raw_result;
10123                 reference_manager <normalized;
10124 
10125 
10126                 for (long v = 0; v < values; v++) {
10127                     normalized->theData[values-1-v] = sorted->theData[1+2*v] * sum;
10128                 }
10129 
10130 
10131                  hyFloat  seconds_accumulator = .0,
10132                             temp;
10133 
10134                 for (unsigned long it = 0UL; it < samples; it++) {
10135                      raw_result->theData[DrawFromDiscrete(normalized->theData, values)] += 1.;
10136                 }
10137 
10138                 result = new _Matrix (values, 2, false, true);
10139 
10140                 for (long v = 0; v < values; v++) {
10141                     result->theData[2*v]   = (long)sorted->theData[2*(values-1-v)];
10142                     result->theData[2*v+1] = raw_result->theData[v];
10143                 }
10144 
10145 
10146                 return result;
10147             }
10148         }
10149     }
10150     catch (_String const& err) {
10151         HandleApplicationError (err);
10152     }
10153     return new _Matrix;
10154 }
10155 
10156 //_____________________________________________________________________________________________
InverseWishartDeviate(_Matrix & df)10157 HBLObjectRef   _Matrix::InverseWishartDeviate (_Matrix & df)
10158 {
10159     /* ---------------------------------------------------
10160         InverseWishartDeviate()
10161             Generates a random matrix whose inverse
10162             has the Wishart distribution with this matrix
10163             supplying the covariance matrix parameter and
10164             a degrees of freedom vector argument.
10165        --------------------------------------------------- */
10166 
10167     try {
10168         long        n       = GetHDim();
10169 
10170 
10171         if (!is_square_numeric()) {
10172             throw _String("Expecting a numerical square matrix.");
10173         }
10174 
10175         else if (!df.is_numeric() || !df.check_dimension(n,1)) {
10176             throw _String("Expecting numerical column vector for second argument (degrees of freedom).");
10177         } else {
10178             // compute Cholesky factor for this matrix inverse, extract the diagonal
10179             _List   reference_manager;
10180 
10181             _Matrix * inv       = (_Matrix *) Inverse(nil);
10182             _Matrix * invCD     = (_Matrix *) (inv->CholeskyDecompose());
10183 
10184             DeleteObject (inv);
10185             reference_manager < invCD;
10186 
10187             return WishartDeviate (df, *invCD);
10188         }
10189     } catch (const _String& err) {
10190         HandleApplicationError (err);
10191     }
10192     return new _Matrix;
10193 }
10194 
10195 //_____________________________________________________________________________________________
WishartDeviate(_Matrix & df)10196 HBLObjectRef   _Matrix::WishartDeviate (_Matrix & df) {
10197     _Matrix     diag;   // calls default constructor
10198     return WishartDeviate (df, diag);
10199 }
10200 
10201 //_____________________________________________________________________________________________
10202 
WishartDeviate(_Matrix & df,_Matrix & decomp)10203 HBLObjectRef   _Matrix::WishartDeviate (_Matrix & df, _Matrix & decomp) {
10204     /* ---------------------------------------------------
10205      WishartDeviate()
10206         Generates a random matrix following the Wishart
10207         distribution with this matrix supplying the
10208         covariance matrix parameter.
10209 
10210         First argument: degrees of freedom vector.
10211         Second argument (optional):
10212             Diagonal of Cholesky decomposition of
10213             covariance matrix, overrides this matrix.
10214      --------------------------------------------------- */
10215 
10216 
10217     try {
10218 
10219         long        n   = GetHDim();
10220 
10221         _Matrix     rdeviates (n, n, false, true),
10222                     rd_transpose;
10223 
10224 
10225         if (!(df.is_row () || df.is_column())) {
10226             throw _String("Expecting row vector for degrees of freedom argument.");
10227         } else if (df.is_column()) {
10228             df.Transpose(); // convert column vector to row vector
10229         }
10230 
10231         if (decomp.is_empty()) {    // no second argument, perform Cholesky decomposition
10232             if (!is_square_numeric()) {
10233                 throw _String("Expecting square numeric matrix.");
10234             } else {
10235                 _Matrix     * cholesky = (_Matrix *) CholeskyDecompose();
10236 
10237                 if (cholesky->GetHDim() > 0) {
10238                     decomp = *cholesky;
10239                     DeleteObject (cholesky);
10240                 } else {
10241                     return cholesky;  // empty _Matrix from error in CholeskyDecompose()
10242                 }
10243             }
10244         }
10245 
10246 
10247         // populate diagonal with square root of i.i.d. chi-square random deviates
10248         for (unsigned long i = 0UL; i < n; i++) {
10249             rdeviates.Store (i, i, sqrt(chisqDeviate(df(0,i)-i+1)) );
10250 
10251             // populate upper triagonal with i.i.d. standard normal N(0,1) deviates
10252             for (unsigned long j = i+1UL; j < n; j++) {
10253                 rdeviates.Store (i, j, gaussDeviate());
10254             }
10255         }
10256 
10257 
10258         // result is obtained from D^T B D, where B = A^T A, ^T is matrix transpose
10259         rd_transpose = rdeviates;
10260         rd_transpose.Transpose();
10261         rd_transpose *= rdeviates;  // A^T A
10262         rd_transpose *= decomp; // A^T A D
10263 
10264         decomp.Transpose();
10265         decomp *= rd_transpose; // D^T A^T A D
10266 
10267         return (HBLObjectRef) decomp.makeDynamic();
10268     } catch (const _String& err) {
10269         HandleApplicationError(err);
10270     }
10271     return new _Matrix;
10272 }
10273 
10274 //-----------------------------------------------------------------------------------------------------------------
10275 
_returnMatrixOrUseCache(long nrow,long ncol,long type,bool is_sparse,HBLObjectRef cache)10276 HBLObjectRef _returnMatrixOrUseCache (long nrow, long ncol, long type, bool is_sparse, HBLObjectRef cache) {
10277     if (cache && cache->ObjectClass() == MATRIX) {
10278         _Matrix *cached_mx = (_Matrix*)cache;
10279         if (cached_mx->check_dimension(nrow, ncol) && cached_mx->has_type (type) && cached_mx->is_dense() == !is_sparse) {
10280             cached_mx->Clear(false);
10281         } else {
10282             cached_mx->Clear();
10283             _Matrix::CreateMatrix (cached_mx, nrow, ncol, is_sparse, type == _NUMERICAL_TYPE ? true : false);
10284         }
10285         //cached_mx->AddAReference();
10286         return cached_mx;
10287     }
10288     return new _Matrix (nrow, ncol, is_sparse, type == _NUMERICAL_TYPE ? true : false);
10289 }
10290 
10291