1 /*
2
3 HyPhy - Hypothesis Testing Using Phylogenies.
4
5 Copyright (C) 1997-now
6 Core Developers:
7 Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
8 Art FY Poon (apoon42@uwo.ca)
9 Steven Weaver (sweaver@temple.edu)
10
11 Module Developers:
12 Lance Hepler (nlhepler@gmail.com)
13 Martin Smith (martin.audacis@gmail.com)
14
15 Significant contributions from:
16 Spencer V Muse (muse@stat.ncsu.edu)
17 Simon DW Frost (sdf22@cam.ac.uk)
18
19 Permission is hereby granted, free of charge, to any person obtaining a
20 copy of this software and associated documentation files (the
21 "Software"), to deal in the Software without restriction, including
22 without limitation the rights to use, copy, modify, merge, publish,
23 distribute, sublicense, and/or sell copies of the Software, and to
24 permit persons to whom the Software is furnished to do so, subject to
25 the following conditions:
26
27 The above copyright notice and this permission notice shall be included
28 in all copies or substantial portions of the Software.
29
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37
38 */
39
40 #include <string.h>
41 #include <stdio.h>
42 #include <time.h>
43 #include <float.h>
44 #include <math.h>
45 #include <limits.h>
46
47 #include "matrix.h"
48 #include "polynoml.h"
49 #include "associative_list.h"
50 #include "batchlan.h"
51 #include "likefunc.h"
52
53 #include "function_templates.h"
54 #include "mersenne_twister.h"
55 #include "global_things.h"
56 #include "string_file_wrapper.h"
57
58
59 //#include "profiler.h"
60
61 using namespace hy_global;
62
63 #define MEMORYERROR "Out of Memory"
64 #define ZEROOBJECT 0.0
65 #define ZEROPOINTER nil
66
67
68 _String MATRIX_AGREEMENT = "CONVERT_TO_POLYNOMIALS",
69 ANAL_COMP_FLAG = "ANALYTIC_COMPUTATIONS",
70 ANAL_MATRIX_TOLERANCE = "ANAL_MATRIX_TOLERANCE",
71 CACHE_FORMULA_DEPENDANCY = "CACHE_FORMULA_DEPENDANCY";
72
73
74 int _Matrix::precisionArg = 0;
75 int _Matrix::storageIncrement = 16;
76 int _Matrix::switchThreshold = 35;
77
78 hyFloat _Matrix::truncPrecision = 1e-16;
79 #define MatrixMemAllocate(X) MemAllocate(X, false, 64)
80 #define MatrixMemFree(X) free(X)
81 #define MX_ACCESS(a,b) theData[(a)*hDim+(b)]
82
83
84 hyFloat analMatrixTolerance = 1e-6,
85 zero = 0,
86 AUTO_PAD_DIAGONAL = 1,
87 toPolyOrNot=0.0,
88 toMorNot2M=1.0,
89 _log2 = log (2.0);
90
91 long ANALYTIC_COMPUTATION_FLAG = 0;
92
93 _Trie _HY_MatrixRandomValidPDFs;
94
95
96
97 //__________________________________________________________________________________
98
99 int fexact_ (long , long , double *, double , double , double , double *, double *);
100 void MatrixIndexError (long, long, long, long);
101
102
103 // function prototypes
104
105
106 #ifdef _SLKP_USE_AVX_INTRINSICS
echo_avx_sum_4(__m256d const x)107 void echo_avx_sum_4 (__m256d const x) {
108 double a[4];
109 _mm256_storeu_pd(a, x);
110 printf ("%g|%g|%g|%g\n", a[0], a[1], a[2], a[3]);
111 }
112 #endif
113
114
115
116
117 //__________________________________________________________________________________
118
MatrixIndexError(long hPos,long vPos,long hDim,long vDim)119 void MatrixIndexError (long hPos, long vPos, long hDim, long vDim) {
120 HandleApplicationError (
121 kErrorStringInvalidMatrixIndex & _String ((long)hPos).Enquote('[',']') & _String ((long)vPos).Enquote('[',']') &
122 " in an " &_String (hDim) & " by " &_String (vDim) & " matrix.");
123 }
124
125 //_____________________________________________________________________________________________
126
_Matrix()127 _Matrix::_Matrix () { // default constructor, doesn't do much
128 Initialize();
129 }
130 //_____________________________________________________________________________________________
131
Initialize(bool)132 void _Matrix::Initialize (bool) { // default constructor, doesn't do much
133 theData = nil;
134 theIndex = nil;
135 vDim = hDim = lDim = bufferPerRow = overflowBuffer = 0L;
136 storageType = _NUMERICAL_TYPE;
137 allocationBlock = 1;
138 theValue = nil;
139 compressedIndex = nil;
140
141 }
142
143 //_____________________________________________________________________________________________
144
_Matrix(_String const & s,bool isNumeric,_FormulaParsingContext & fpc,bool use_square_brackets)145 _Matrix::_Matrix (_String const& s, bool isNumeric, _FormulaParsingContext & fpc, bool use_square_brackets) {
146 // takes two separate formats
147 // 1st : {{i11,...,i1n}{i21,...,i2n}....{in1,...,inn}} // all elements must be explicitly specified
148 // 2st : {hor dim, <vert dim>,{hor index, vert index, value or formula}{...}...}
149
150 bool doErrors = fpc.errMsg() == nil,
151 compute_keys_values = fpc.buildComplexObjects();
152
153 _VariableContainer const* theP = fpc.formulaScope();
154
155 Initialize();
156
157 bool isAConstant = true; // is this a matrix of numbers, or formulas
158 char cc;
159
160
161 long i=s.FirstNonSpaceIndex(),
162 j=s.FirstNonSpaceIndex(i+1),
163 k=0,
164 hPos = 0,
165 vPos = 0;
166
167 char open_terminator = use_square_brackets ? '[' : '{',
168 close_terminator = use_square_brackets ? ']' : '}';
169
170 bool terminators [256] {false};
171 terminators [(unsigned char)','] = true;
172 terminators [(unsigned char)close_terminator] = true;
173
174
175 try {
176 auto handle_numeric_parameter = [&] (_String& term) -> long {
177 if (compute_keys_values) {
178 return round (ProcessNumericArgument(&term, theP));
179 }
180 else {
181 _String err_msg;
182 _Formula exp (term,theP, &err_msg);
183 if (exp.IsConstant()) {
184 return round(exp.Compute()->Value());
185 }
186 return 1;
187 }
188 };
189
190 if (j>i && s.length()>4) { // non-empty string
191 _String term;
192 if (s.char_at (i) == open_terminator && s.char_at (j) == open_terminator) { // first type
193 i = j+1;
194 // read the dimensions first
195
196 while (i<s.length()) {
197 long i2 = s.FindTerminator (i, terminators);
198 if (i2 == kNotFound) {
199 HandleApplicationError (kErrorStringUnterminatedMatrix & PrepareErrorContext(s, i));
200 }
201 i = i2;
202 cc = s.char_at (i);
203
204 if (cc==close_terminator) {
205 break;
206 }
207
208 if (cc==',') {
209 vDim++;
210 }
211 i++;
212 }
213
214 vDim++;
215 hDim = 1;
216
217 for (i = i + 1L; i<s.length()-1; i++) {
218 i = s.ExtractEnclosedExpression (i,open_terminator,close_terminator,fExtractRespectQuote | fExtractRespectEscape);
219 if (i < 0) {
220 break;
221 }
222 hDim ++;
223 }
224
225 if ( hDim<=0 || vDim<=0) {
226 return;
227 }
228
229 if (isNumeric) {
230 CreateMatrix (this, hDim, vDim, false, true, false);
231 } else {
232 CreateMatrix (this, hDim, vDim, false, false, true);
233 }
234
235 // scan the elements one-by-one
236
237 for (i=1; i<s.length()-1; i++) {
238 if (s.char_at(i) == open_terminator) {
239 while (s.char_at(i) != close_terminator) {
240 i++;
241 j = s.FindTerminator (i, terminators);
242
243 if (j<0) {
244 if (doErrors) {
245 HandleApplicationError (kErrorStringUnterminatedMatrix & PrepareErrorContext(s, i));
246 }
247 return;
248 }
249
250 _String lterm (s,s.FirstNonSpaceIndex(i,j-1,kStringDirectionForward),j-1); // store the term in a string
251
252 //printf ("%s\n", lterm.sData);
253
254 if (isNumeric) {
255 if (lterm.length() == 1 && lterm.char_at(0) =='*') {
256 lterm = kEmptyString; // dummy element in probability matrix
257 }
258
259 theData[vDim*hPos+vPos] = lterm.to_float ();
260 } else {
261 if (lterm.length() == 1 && lterm.char_at(0) =='*') {
262 lterm = kEmptyString; // dummy element in probability matrix
263 }
264
265 _String errMsg;
266 _Formula* theTerm;
267
268 if (doErrors) {
269 theTerm = new _Formula (lterm, theP);
270 } else {
271 theTerm = new _Formula (lterm, theP, fpc.errMsg());
272 }
273
274 isAConstant = isAConstant && theTerm->IsAConstant() && theTerm->ObjectClass() == NUMBER;
275 ((_Formula**)theData)[vDim*hPos+vPos] = theTerm;
276 }
277
278 vPos++;
279 if (vPos>vDim) {
280 throw _String ("Rows of unequal lengths in matrix definition in ") & PrepareErrorContext(lterm, 0);
281 }
282
283 i=j;
284 }
285 }
286 if (s[i]==close_terminator) {
287 if (vPos!=vDim) {
288 throw kErrorStringBadMatrixDefinition & PrepareErrorContext (s,i-16);
289 }
290 hPos++;
291 vPos = 0;
292 if (hPos>hDim) {
293 throw kErrorStringBadMatrixDefinition & PrepareErrorContext (s,i-16);
294 }
295 }
296 }
297 if (hPos!=hDim) {
298 throw kErrorStringBadMatrixDefinition & PrepareErrorContext (s,i-16);
299 }
300 } else { // second type of input
301 for (i=j,j=0; s.char_at (i) !=open_terminator && s.char_at (i) !=close_terminator && i<s.length(); i++) {
302 if (s.char_at(i)==',') { // neither hDim nore vDim have been specified
303 if (j > 0) {
304 break;
305 }
306 term = s.Cut(1,i-1);
307 hDim = handle_numeric_parameter (term);
308 j = i+1;
309 }
310 }
311
312 if (j) { // both hDim and vDim specified
313 term = s.Cut(j,i-1);
314 vDim = handle_numeric_parameter (term);
315 } else { // only one dim specified, matrix assumed to be square
316 term = s.Cut(1,i-1);
317 hDim = handle_numeric_parameter (term);
318 vDim = hDim;
319 }
320
321 if (hDim<=0 || vDim<=0) {
322 return;
323 }
324
325 if (isNumeric) {
326 CreateMatrix (this, hDim, vDim, true, true, false);
327 } else {
328 CreateMatrix (this, hDim, vDim, true, false, true);
329 }
330
331 // read the terms now
332
333 for (; i<s.length(); i++) {
334 if (s.char_at (i) ==open_terminator) {
335 hPos = -1;
336 vPos = -1;
337 k = i+1;
338
339 for (j=i+1; j<s.length () && s.char_at (j) !=close_terminator; j++) {
340 long j2 = s.FindTerminator (j, terminators);
341
342 if (j2<0) {
343 throw (kErrorStringUnterminatedMatrix & PrepareErrorContext(s,j));
344 }
345 j = j2;
346
347 if (s.char_at (j) ==',') {
348 term = s.Cut (s.FirstNonSpaceIndex(k,j-1,kStringDirectionForward),j-1);
349 _Formula coordF (term,theP);
350 hyFloat coordV = coordF.Compute()->Value();
351 if (hPos == -1) {
352 hPos = coordV;
353 } else {
354 vPos = coordV;
355 }
356 k = j+1;
357 } else {
358 j--;
359 }
360 }
361
362 if (hPos <0 || vPos<0 || hPos>=hDim || vPos>=vDim)
363 // bad index
364 {
365 MatrixIndexError (hPos,vPos,hDim,vDim);
366 return;
367 }
368
369 term = s.Cut(k,j-1); // read the element
370
371 if (isNumeric) {
372 if (term.length() == 1UL && term.get_char (0)=='*') {
373 term = kEmptyString; // dummy element in probability matrix
374 }
375
376 (*this)[vDim*hPos+vPos];
377 k = Hash (hPos,vPos);
378 theData[k]=term.to_float ();
379 } else {
380 if (term.length() == 1UL && term.get_char (0)=='*') {
381 term = kEmptyString; // dummy element in probability matrix
382 }
383
384 _Formula * theTerm = new _Formula (term,theP);
385 isAConstant = isAConstant && theTerm->IsAConstant();
386
387 (*this)[vDim*hPos+vPos];
388 k = Hash (hPos,vPos);
389 ((_Formula**)theData)[k]=theTerm;
390 }
391 i = j;
392 }
393 }
394 } // end else
395
396 if (!isNumeric) {
397 storageType = 2; // formula elements
398 checkParameter (ANAL_COMP_FLAG, ANALYTIC_COMPUTATION_FLAG, 0L);
399 if ((ANALYTIC_COMPUTATION_FLAG)&&!isAConstant) {
400 _Matrix::ConvertFormulas2Poly (false);
401 }
402
403 if (isAConstant) { // a matrix of numbers - store as such
404 _Matrix::Evaluate ();
405 }
406 _Matrix::AmISparse();
407 }
408 }
409 } catch (const _String& err) {
410 if (doErrors) {
411 HandleApplicationError(err);
412 }
413 }
414 }
415
416
417 //_____________________________________________________________________________________________
418
_Matrix(_Matrix const & m)419 _Matrix::_Matrix (_Matrix const& m) {
420 DuplicateMatrix (this, &m);
421 }
422
423 //_____________________________________________________________________________________________
424
_Matrix(_SimpleList const & sl,long colArg)425 _Matrix::_Matrix (_SimpleList const& sl, long colArg) {
426 if (sl.lLength) {
427 if (colArg > 0 && colArg < sl.lLength) {
428 CreateMatrix (this, sl.lLength/colArg + colArg*(sl.lLength%colArg > 0), colArg, false, true, false);
429 } else {
430 CreateMatrix (this, 1, sl.lLength, false, true, false);
431 }
432 for (long k=0; k<sl.lLength; k++) {
433 theData[k] = sl.list_data[k];
434 }
435 } else {
436 Initialize();
437 }
438 }
439
440 //_____________________________________________________________________________________________
441
_Matrix(hyFloat const * inList,unsigned long rows,unsigned long columns)442 _Matrix::_Matrix (hyFloat const* inList, unsigned long rows, unsigned long columns) {
443 CreateMatrix (this, rows, columns, false, true, false);
444 for (unsigned long k = 0; k < rows*columns; k++) {
445 theData[k] = inList[k];
446 }
447 }
448
449 //_____________________________________________________________________________________________
450
_Matrix(hyFloat constant,unsigned long rows,unsigned long columns)451 _Matrix::_Matrix (hyFloat constant, unsigned long rows, unsigned long columns) {
452 CreateMatrix (this, rows, columns, false, true, false);
453 for (unsigned long k = 0; k < rows*columns; k++) {
454 theData[k] = constant;
455 }
456 }
457
458
459 //_____________________________________________________________________________________________
460
_Matrix(_List const & sl,bool parse_escapes)461 _Matrix::_Matrix (_List const& sl, bool parse_escapes)
462 // list of strings
463 {
464 if (sl.nonempty()) {
465 CreateMatrix (this, 1, sl.lLength, false, false, true);
466 this->storageType = _FORMULA_TYPE;
467
468 if (parse_escapes) {
469 for (unsigned long k=0UL; k<sl.lLength; k++) {
470 StoreFormula (0L,k,*new _Formula (new _FString (*(_String*) sl.GetItem(k))), false, false);
471 }
472 } else {
473 for (unsigned long k=0UL; k<sl.lLength; k++) {
474 _String* entry_k = (_String*) sl.GetItem(k);
475 entry_k -> AddAReference();
476 StoreFormula (0L,k,*new _Formula (new _FString (entry_k)), false, false);
477 }
478
479 }
480 } else {
481 Initialize();
482 }
483 }
484
485 //_____________________________________________________________________________________________
CreateMatrix(_Matrix * populate_me,long rows,long columns,bool sparse,bool allocateStorage,bool expression_matrix)486 void _Matrix::CreateMatrix (_Matrix* populate_me, long rows, long columns, bool sparse, bool allocateStorage, bool expression_matrix) {
487
488 populate_me->theValue = nil;
489 populate_me->storageType = allocateStorage ? _NUMERICAL_TYPE : _POLYNOMIAL_TYPE;
490
491 if (rows && columns) {
492 if (sparse) { // store matrix as sparse
493 populate_me->lDim = rows*columns*populate_me->storageIncrement/100+1; // size of storage in elements
494 if (populate_me->lDim-1L<rows) {
495 // either the matrix or the allocation block are too small
496 // to sensibly store the matrix as sparse.
497 CreateMatrix (populate_me, rows, columns, false, allocateStorage, expression_matrix);
498 return;
499 }
500 populate_me->theIndex = (long*)MatrixMemAllocate(sizeof(long)*populate_me->lDim);
501 InitializeArray(populate_me->theIndex, populate_me->lDim, -1L);
502
503 } else {
504 populate_me->lDim = rows*columns;
505 populate_me->theIndex = nil; // no index storage needed
506 }
507
508 if (!allocateStorage) {
509 // matrix will store pointers to elements
510 populate_me->theData =(hyFloat*)MatrixMemAllocate(populate_me->lDim*sizeof(void*));
511 if (expression_matrix) {
512 InitializeArray ((_Formula**)populate_me->theData, populate_me->lDim, (_Formula*)ZEROPOINTER);
513 } else {
514 InitializeArray ((_MathObject**)populate_me->theData, populate_me->lDim, (HBLObjectRef)ZEROPOINTER);
515 }
516
517 } else {
518 populate_me->theData =(hyFloat*)MatrixMemAllocate (sizeof(hyFloat)*populate_me->lDim);
519 memset (populate_me->theData, 0, populate_me->lDim*sizeof(hyFloat));
520 }
521 populate_me->hDim = rows;
522 populate_me->vDim = columns;
523 populate_me->SetupSparseMatrixAllocations ();
524 populate_me->compressedIndex = nil;
525 } else {
526 populate_me->lDim = 0L;
527 populate_me->theIndex = nil;
528 populate_me->theData = nil;
529 populate_me->compressedIndex = nil;
530 populate_me->hDim = 0UL;
531 populate_me->vDim = 0UL;
532 }
533
534 }
535
536
537 //_____________________________________________________________________________________________
DuplicateMatrix(_Matrix * targetMatrix,_Matrix const * sourceMatrix)538 void DuplicateMatrix (_Matrix* targetMatrix, _Matrix const* sourceMatrix) {
539 if (targetMatrix==sourceMatrix) {
540 return;
541 }
542 targetMatrix->lDim = sourceMatrix->lDim;
543 targetMatrix->hDim = sourceMatrix->hDim;
544 targetMatrix->vDim = sourceMatrix->vDim;
545 targetMatrix->storageType = sourceMatrix->storageType;
546 targetMatrix->bufferPerRow =sourceMatrix->bufferPerRow;
547 targetMatrix->overflowBuffer = sourceMatrix->overflowBuffer;
548 targetMatrix->allocationBlock = sourceMatrix->allocationBlock;
549 targetMatrix->theValue = nil;
550
551 targetMatrix->compressedIndex = nil;
552
553 if (! sourceMatrix->is_dense()) {
554 if (!(targetMatrix->theIndex = (long*)MatrixMemAllocate(sizeof(long) *sourceMatrix->lDim))) { // allocate element index storage
555 HandleApplicationError ( kErrorStringMemoryFail );
556 } else {
557 memcpy ((void*)targetMatrix->theIndex,(void*)sourceMatrix->theIndex,sourceMatrix->lDim*sizeof(long));
558 }
559 if (sourceMatrix->compressedIndex) {
560 targetMatrix->compressedIndex = (long*)MatrixMemAllocate(sizeof(long) *(sourceMatrix->lDim+sourceMatrix->hDim));
561 memcpy ((void*)targetMatrix->compressedIndex,(void*)sourceMatrix->compressedIndex,sizeof(long) *(sourceMatrix->lDim+sourceMatrix->hDim));
562 }
563 } else {
564 targetMatrix->theIndex = nil;
565 }
566
567
568 targetMatrix->theData = nil;
569
570 if (sourceMatrix->lDim) {
571 if (sourceMatrix->is_polynomial())
572 // matrix will store pointers to elements
573 {
574 if (targetMatrix->lDim) {
575 if (!(targetMatrix->theData = (hyFloat*)MatrixMemAllocate(sizeof( char)*sourceMatrix->lDim*sizeof(void*)))) { // allocate element index storage
576 HandleApplicationError ( kErrorStringMemoryFail );
577 } else {
578 memcpy ((void*)targetMatrix->theData,(void*)sourceMatrix->theData,sourceMatrix->lDim*sizeof(void*));
579 if (sourceMatrix->is_dense()) { // non-sparse matrix
580 for (long i=0; i<sourceMatrix->lDim; i++)
581 if (sourceMatrix->GetMatrixObject(i)) {
582 (sourceMatrix->GetMatrixObject(i))->AddAReference();
583 }
584 } else
585 for (long i=0; i<sourceMatrix->lDim; i++) {
586 _MathObject* theO = (sourceMatrix->GetMatrixObject(i));
587 if (theO!=ZEROPOINTER) {
588 theO->AddAReference();
589 }
590 }
591
592 }
593 }
594 } else if (sourceMatrix->is_expression_based()) {
595 if (targetMatrix->lDim) {
596 targetMatrix->theData = (hyFloat*)MatrixMemAllocate(sourceMatrix->lDim*sizeof(void*));
597 _Formula ** theFormulas = (_Formula**)(sourceMatrix->theData), **newFormulas =
598 (_Formula**)(targetMatrix->theData);
599 if (sourceMatrix->is_dense() == false) {
600 for (long i = 0; i<sourceMatrix->lDim; i++)
601 if (sourceMatrix->IsNonEmpty(i)) {
602 newFormulas[i] = (_Formula*)theFormulas[i]->makeDynamic();
603 }
604 } else
605 for (long i = 0; i<sourceMatrix->lDim; i++)
606 if(theFormulas[i]!=(_Formula*)ZEROPOINTER) {
607 newFormulas[i] = (_Formula*)theFormulas[i]->makeDynamic();
608 } else {
609 newFormulas[i]=ZEROPOINTER;
610 }
611 }
612 } else {
613 if (targetMatrix->lDim) {
614 if (!(targetMatrix->theData =(hyFloat*)MatrixMemAllocate(sizeof( hyFloat)*targetMatrix->lDim))) { // allocate element index storage
615 HandleApplicationError ( kErrorStringMemoryFail );
616 } else {
617 memcpy ((hyPointer)targetMatrix->theData,(hyPointer)sourceMatrix->theData,sizeof(hyFloat)*sourceMatrix->lDim);
618 }
619 }
620 }
621 } else {
622 targetMatrix->theData = nil;
623 targetMatrix->lDim = 0;
624 }
625
626 }
627 //_____________________________________________________________________________________________
makeDynamic(void) const628 BaseRef _Matrix::makeDynamic (void) const {
629 _Matrix * result = new _Matrix;
630 DuplicateMatrix (result, this);
631
632 return result;
633 }
634
635 //_____________________________________________________________________________________________
Duplicate(BaseRefConst obj)636 void _Matrix::Duplicate (BaseRefConst obj) {
637 Clear();
638 DuplicateMatrix (this,(_Matrix const*)obj);
639 }
640
641
642 //_____________________________________________________________________________________________
643
_Matrix(long theHDim,long theVDim,bool sparse,bool allocateStorage)644 _Matrix::_Matrix (long theHDim, long theVDim, bool sparse, bool allocateStorage) // create an kEmptyString matrix of given dimensions;
645 // the flag specifies whether it is sparse or not
646
647 {
648 CreateMatrix (this, theHDim, theVDim, sparse, allocateStorage);
649 }
650
651
652
653 //_____________________________________________________________________________________________
654
IsNonEmpty(long logicalIndex) const655 inline bool _Matrix::IsNonEmpty (long logicalIndex) const {
656 if (is_dense() == false) {
657 return theIndex [logicalIndex] != -1;
658 }
659 if (is_numeric()) {
660 return true;
661 }
662 return GetMatrixObject(logicalIndex)!=ZEROPOINTER;
663 }
664
665 //__________________________________________________________________________________
666
HasChanged(bool)667 bool _Matrix::HasChanged(bool) {
668
669 switch (storageType) {
670 case _POLYNOMIAL_TYPE: {
671 return Any ([&] (_MathObject * f, unsigned long) -> bool {if (f) return f->HasChanged(); return false;},
672 [&] (unsigned long i) -> _MathObject * {return ((_MathObject**)theData)[i];});
673 }
674 break;
675
676 case _FORMULA_TYPE: {
677 return Any ([&] (_Formula * f, unsigned long) -> bool {if (f) return f->HasChanged(); return false;},
678 [&] (unsigned long i) -> _Formula * {return ((_Formula**)theData)[i];});
679 }
680 break;
681
682 case _SIMPLE_FORMULA_TYPE: {
683 if (cmd->has_volatile_entries) return true;
684 return cmd->varIndex.Any ([&] (long value, unsigned long) -> bool {
685 return LocateVar (value)->HasChanged();
686 });
687 }
688 break;
689 }
690
691 return false;
692 }
693 //__________________________________________________________________________________
694
695
ROTATE(hyFloat * a,long i,long j,long k,long l,hyFloat & g,hyFloat & h,hyFloat s,hyFloat tau,long hDim)696 inline static void ROTATE(hyFloat * a, long i, long j, long k, long l, hyFloat & g, hyFloat & h, hyFloat s, hyFloat tau, long hDim) {
697 // this is from NR
698 g = a[i*hDim + j];
699 h = a[k*hDim + l];
700 a[i*hDim + j] = g - s*(h + g*tau);
701 a[k*hDim + l] = h + s*(g - h*tau);
702 }
703
704 //__________________________________________________________________________________
705
is_square_numeric(bool dense) const706 bool _Matrix::is_square_numeric(bool dense) const {
707 if (storageType!=_NUMERICAL_TYPE || hDim != vDim || hDim==0L){
708 HandleApplicationError ("Square numerical matrix required");
709 }
710 if (dense && !is_dense()) {
711 HandleApplicationError ("Square dense numerical matrix required");
712 }
713 return true;
714 }
715
716 //__________________________________________________________________________________
Balance(void)717 void _Matrix::Balance (void) {
718 if (!is_square_numeric (true)) {
719 return;
720 }
721
722 hyFloat Squared_Radix = 2.0 * 2.0;
723 bool done = false;
724
725 while (!done) {
726 done = true;
727
728 for (long i = 0L; i < hDim; i++) {
729 hyFloat r = 0.0,
730 c = 0.0;
731
732 for (long j = 0L; j < vDim; j++)
733 if (i!=j) {
734 r += fabs (theData[i*vDim+j]);
735 c += fabs (theData[j*vDim+i]);
736 }
737
738 if (r > 0.0 && c > 0.0) {
739 hyFloat g = r / Squared_Radix,
740 f = 1.,
741 s = c+r;
742
743 while (c<g) {
744 f *= 2.0;
745 c *= Squared_Radix;
746 }
747
748 g = r * 2.0;
749
750 while (c>g) {
751 f /= 2.0;
752 c /= Squared_Radix;
753 }
754
755 if ((c+r)/f < 0.95*s) {
756 done = false;
757 g = 1. / f;
758 for (long j = 0; j < vDim; j++) {
759 theData[i*vDim+j] *= g;
760 theData[j*vDim+i] *= f;
761 }
762 }
763
764 }
765 }
766 }
767 }
768
769 //__________________________________________________________________________________
Schur(void)770 void _Matrix::Schur (void) {
771 if (!is_square_numeric (true)) {
772 return;
773 }
774
775 for (long m = 1L; m < hDim-1; m++) {
776 hyFloat x = 0.0;
777 long i = m;
778
779 for (long j = m; j < hDim; j++)
780 if (fabs (theData[j*vDim + m-1]) > x) {
781 x = theData[j*vDim + m-1];
782 i = j;
783 }
784
785 if (i!=m) {
786 for (long j=m-1; j<hDim; j++) {
787 hyFloat t = theData[i*vDim + j];
788 theData[i*vDim + j] = theData[m*vDim + j];
789 theData[m*vDim + j] = t;
790 }
791 {
792 for (long j=0; j<hDim; j++) {
793 hyFloat t = theData[j*vDim + i];
794 theData[j*vDim + i] = theData[j*vDim + m];
795 theData[j*vDim + m] = t;
796 }
797 }
798 }
799
800 if (x)
801 for (long i = m+1; i < hDim; i++) {
802 hyFloat y = theData[i*vDim + m -1];
803 if (y != 0.0) {
804 y /= x;
805 theData[i*vDim + m -1] = y;
806 for (long j = m; j < hDim; j++) {
807 theData[i*vDim+j] -= y*theData[m*vDim+j];
808 }
809 {
810 for (long j = 0; j < hDim; j++) {
811 theData[j*vDim+m] += y*theData[j*vDim+i];
812 }
813 }
814 }
815 }
816 }
817
818 for (long r = 2L; r < hDim; r++)
819 InitializeArray(theData + r*hDim, r-1, 0.0);
820 }
821
822
823 //__________________________________________________________________________________
EigenDecomp(_Matrix & real,_Matrix & imag) const824 void _Matrix::EigenDecomp (_Matrix& real, _Matrix & imag) const {
825 if (!is_square_numeric()) {
826 return;
827 }
828
829 hyFloat anorm = 0.0;
830
831 for (long k = 0L; k < hDim; k++) {
832 for (long k2 = k?k-1:0; k2 < hDim; k2++) {
833 anorm += fabs (MX_ACCESS(k,k2));
834 }
835 }
836
837 long nn = hDim - 1L;
838 hyFloat t = 0;
839
840 CreateMatrix (&real, hDim, 1, false, true, false);
841 CreateMatrix (&imag, hDim, 1, false, true, false);
842
843 while (nn >= 0) {
844 long its = 0,
845 l = 0;
846 do {
847 for (l = nn; l>=1; l--) {
848 hyFloat s = fabs (MX_ACCESS(l-1,l-1)) + fabs (MX_ACCESS(l,l));
849 if (s == 0.0) {
850 s = anorm;
851 }
852
853 if (fabs (MX_ACCESS(l,l-1)) + s == s) {
854 break;
855 }
856 }
857
858 hyFloat x = MX_ACCESS(nn,nn);
859 if (l == nn) { // one root
860 real.theData[nn] = x + t;
861 imag.theData[nn--] = 0.0;
862 } else {
863 hyFloat y = MX_ACCESS(nn-1,nn-1),
864 w = MX_ACCESS(nn,nn-1)*MX_ACCESS(nn-1,nn);
865
866 if ( l == nn - 1) { // two roots
867 hyFloat p = 0.5 * (y-x),
868 q = p*p + w,
869 z = sqrt (fabs(q));
870
871 x += t;
872
873 if (q >= 0.0) { // real pair
874 z = p + (p>0.0?z:-z);
875 real.theData[nn] = real.theData[nn-1] = x+z;
876 if (z) {
877 real.theData[nn] = x-w/z;
878 }
879 imag.theData[nn] = imag.theData[nn-1] = 0.0;
880 } else { // complex pair
881 real.theData[nn] = real.theData [nn-1] = x+p;
882 imag.theData[nn-1] = -(imag.theData[nn] = z);
883 }
884 nn -= 2;
885 } else { // no roots; continue iteration
886
887 hyFloat p,q,r,z,s;
888
889 if (its == 30) {
890 HandleApplicationError ("Too many QR iterations in EigenDecomp");
891 return;
892 }
893
894 if (its == 10 || its == 20) {
895 t += x;
896 for (long i=0; i<hDim; i++) {
897 MX_ACCESS(i,i) -= x;
898 }
899 hyFloat s = fabs(MX_ACCESS(nn,nn-1)) + fabs (MX_ACCESS(nn-1,nn-2));
900 y = x = 0.75 * s;
901 w = -0.4375*s*s;
902 }
903 its++;
904
905 long m = nn-2;
906
907 for (; m>=l; m--) {
908 z = MX_ACCESS(m,m);
909 r = x-z;
910 s = y-z;
911 p = (r*s - w)/MX_ACCESS(m+1,m) + MX_ACCESS(m,m+1);
912 q = MX_ACCESS(m+1,m+1)-z-r-s;
913 r = MX_ACCESS(m+2,m+1);
914 s = fabs (p) + fabs (q) + fabs (r);
915
916 p/=s;
917 q/=s;
918 r/=s;
919
920 if (m == l) {
921 break;
922 }
923
924 hyFloat u = fabs (MX_ACCESS(m,m-1)) * (fabs (q) + fabs (r)),
925 v = fabs (p) * (fabs (MX_ACCESS(m-1,m-1)) + fabs(z) + fabs(MX_ACCESS(m+1,m+1)));
926
927 if (u+v == v) {
928 break;
929 }
930
931 }
932
933 for (long i = m+2; i<hDim; i++) {
934 MX_ACCESS(i,i-2) = 0.0;
935 if (i!=m+2) {
936 MX_ACCESS(i,i-3) = 0.0;
937 }
938 }
939
940 for (long k = m; k <= nn-1; k++) {
941 if (k!=m) {
942 p = MX_ACCESS(k,k-1),
943 q = MX_ACCESS(k+1,k-1),
944 r = 0.0;
945 if (k != nn-1) {
946 r = MX_ACCESS(k+2,k-1);
947 }
948
949 if ((x = fabs(p) + fabs(q) + fabs(r)) != 0.0) {
950 p /= x;
951 q /= x;
952 r /= x;
953 }
954 }
955
956 s = sqrt (p*p+q*q+r*r);
957
958 if (s != 0.0) {
959 if (p < 0.0) {
960 s = -s;
961 }
962
963 if (k == m) {
964 if (l!=m) {
965 MX_ACCESS(k,k-1) = -MX_ACCESS(k,k-1);
966 }
967 } else {
968 MX_ACCESS(k,k-1) = -s*x;
969 }
970
971 p += s;
972 x = p/s;
973 y = q/s;
974 z=r/s;
975 q/=p;
976 r/=p;
977 for (long j=k; j<=nn; j++) {
978 p = MX_ACCESS(k,j)+q*MX_ACCESS(k+1,j);
979 if (k!=nn-1) {
980 p += r*MX_ACCESS(k+2,j);
981 MX_ACCESS(k+2,j) -= p*z;
982 }
983 MX_ACCESS(k+1,j) -= p*y;
984 MX_ACCESS(k,j) -= p*x;
985 }
986
987 long mmin = nn < k+3 ? nn: k+3;
988 for (long i = 0; i<=mmin; i++) {
989 p = x*MX_ACCESS(i,k) + y*MX_ACCESS(i,k+1);
990 if (k!=nn-1) {
991 p += z*MX_ACCESS(i,k+2);
992 MX_ACCESS(i,k+2) -= p*r;
993 }
994 MX_ACCESS(i,k+1) -= p*q;
995 MX_ACCESS(i,k) -= p;
996 }
997 }
998 }
999 }
1000 }
1001
1002 } while (l < nn - 1);
1003 }
1004
1005 }
1006
1007 //_____________________________________________________________________________________________
ValidateFormulaEntries(bool callback (long,long,_Formula *))1008 bool _Matrix::ValidateFormulaEntries (bool callback (long, long, _Formula*)) {
1009 if (storageType == _FORMULA_TYPE) {
1010 _Formula ** formula_entires = (_Formula**)theData;
1011
1012 long direct_index = 0L;
1013 for (unsigned long row = 0UL; row < hDim ; row++) {
1014 for (unsigned long col = 0UL; col < vDim ; col++) {
1015 _Formula * this_cell;
1016 if (is_dense()) {
1017 this_cell = formula_entires[direct_index++];
1018 } else {
1019 direct_index = Hash (row,col);
1020 if (direct_index >= 0) {
1021 this_cell = formula_entires[direct_index];
1022 } else {
1023 this_cell = nil;
1024 }
1025 }
1026 if (! callback (row, col, this_cell)) {
1027 return false;
1028 }
1029 }
1030 }
1031 return true;
1032 }
1033 return false;
1034 }
1035
1036
1037 //__________________________________________________________________________________
Eigensystem(HBLObjectRef cache) const1038 HBLObjectRef _Matrix::Eigensystem (HBLObjectRef cache) const {
1039 // find the eigenvectors of a symmetric matrix using Jacobi rotations
1040 // The original matrix is preserved.
1041 // returns an associative list with a sorted vector of eigenvalues and
1042 // a square matrix where columns are the corresponding eigenvalues
1043 if (!is_square_numeric()) {
1044 return new _AssociativeList();
1045 }
1046
1047 // check for symmetry
1048
1049 for (long k=0; k<hDim; k++) {
1050 for (long v=k+1; v<hDim; v++) {
1051 if (!CheckEqual((*this)(k,v), (*this)(v,k))) {
1052 //_String errorMsg ("Eigensystem presently only works on symmetric matrices");
1053 //WarnError (errorMsg);
1054 //return new _AssociativeList();
1055
1056 //_String nonSym = _String("Failed symmetry check:" ) & k & ":" & v;
1057 //WarnError (nonSym);
1058
1059
1060 _Matrix cpy (*this),
1061 *rl = new _Matrix,
1062 *im = new _Matrix;
1063
1064 cpy.CheckIfSparseEnough(true);
1065 cpy.Balance ();
1066 cpy.Schur ();
1067 cpy.EigenDecomp (*rl,*im);
1068
1069 return & ((*new _AssociativeList) < _associative_list_key_value {"0", rl}
1070 < _associative_list_key_value {"1", im});
1071 }
1072 }
1073 }
1074 _Matrix a (*this);
1075 a.CheckIfSparseEnough (true);
1076
1077 hyFloat* b = new hyFloat[hDim],
1078 * z = new hyFloat[hDim];
1079
1080 _Matrix * d = new _Matrix(hDim,1,false, true),
1081 * v = new _Matrix(hDim,hDim,false,true);
1082
1083 for (long cnt = 0, diagIndex=0; cnt < hDim; cnt ++, diagIndex+=hDim+1) {
1084 v->theData[diagIndex] = 1.;
1085 b[cnt] = (d->theData[cnt] = a.theData [diagIndex]);
1086 z[cnt] = 0.0;
1087 }
1088
1089 for (int pass = 0; pass < 50; pass ++) {
1090 hyFloat sm = 0.,
1091 tresh = 0.;
1092
1093 for (long ec = 0; ec < hDim-1; ec ++)
1094 for (long ec2 = ec+1; ec2 < hDim; ec2++) {
1095 sm += fabs(a.theData[ec*hDim+ec2]);
1096 }
1097
1098 if (sm == 0.0) {
1099 break;
1100 }
1101
1102 if (pass < 3) {
1103 tresh = 0.2 * sm / (hDim*hDim);
1104 }
1105
1106 for (long ec=0; ec < hDim-1; ec++) {
1107 for (long ec2=ec+1; ec2 < hDim; ec2++) {
1108 long midx = ec*hDim+ec2;
1109
1110 hyFloat mel = a.theData[midx],
1111 g = 100. * fabs (mel),
1112 t = fabs (d->theData[ec]),
1113 c = fabs (d->theData[ec2]);
1114
1115 if (pass>3 && t+g == t && c+g == c) {
1116 a.theData[midx] = 0.;
1117 } else if (fabs(mel) > tresh) {
1118 hyFloat h = d->theData[ec2]-d->theData[ec];
1119 if (fabs (h) + g == fabs (h)) {
1120 t = mel/h;
1121 } else {
1122 hyFloat theta = 0.5*h/mel;
1123 t = 1./(fabs(theta)+sqrt(1.+theta*theta));
1124 if (theta<0.0) {
1125 t = -t;
1126 }
1127 }
1128
1129 c = 1.0/sqrt(1.0+t*t);
1130
1131 hyFloat s = t*c;
1132 hyFloat tau = s/(1.0+c);
1133
1134 h = t*mel;
1135
1136 z[ec] -= h;
1137 z[ec2] += h;
1138 d->theData[ec] -= h;
1139 d->theData[ec2] += h;
1140
1141
1142 a.theData[midx] = 0.;
1143
1144 for (long j=0; j<ec; j++) {
1145 ROTATE (a.theData, j, ec, j, ec2, g, h, s, tau, hDim);
1146 }
1147
1148 for (long j=ec+1; j<ec2; j++) {
1149 ROTATE (a.theData, ec, j, j, ec2, g, h, s, tau, hDim);
1150
1151 }
1152 for (long j=ec2+1; j<hDim; j++) {
1153 ROTATE (a.theData, ec, j, ec2, j, g, h, s, tau, hDim);
1154 }
1155
1156 for (long j=0; j<hDim; j++) {
1157 ROTATE (v->theData, j, ec, j, ec2, g, h, s, tau, hDim);
1158 }
1159 }
1160 }
1161 }
1162 for (long ec=0; ec<hDim; ec++) {
1163 b[ec] += z[ec];
1164 d->theData[ec] = b[ec];
1165 z[ec] = 0.;
1166 }
1167 }
1168
1169
1170
1171 _Matrix ds (hDim,2,false, true),
1172 * vs = new _Matrix(hDim,hDim,false,true),
1173 * dss;
1174
1175 for (long r=0; r<hDim; r++) {
1176 ds.theData[2*r] = -d->theData[r];
1177 ds.theData[2*r+1] = r;
1178 }
1179
1180 _Constant sc (0.0);
1181 dss = (_Matrix*)ds.SortMatrixOnColumn (&sc, nil);
1182
1183 for (long r=0; r<hDim; r++) {
1184 d->theData[r] = -dss->theData[2*r];
1185 for (long c1 = r, c2 = dss->theData[2*r+1]; c1<hDim*hDim; c1+=hDim, c2+=hDim) {
1186 vs->theData[c1] = v->theData[c2];
1187 }
1188 }
1189
1190 DeleteObject (v);
1191 DeleteObject (dss);
1192
1193 delete [] b;
1194 delete [] z;
1195
1196 return & ((*new _AssociativeList) << _associative_list_key_value {"0", d}
1197 << _associative_list_key_value {"1", vs});
1198
1199 }
1200
1201 //__________________________________________________________________________________
LUDecompose(void) const1202 HBLObjectRef _Matrix::LUDecompose (void) const {
1203 // perform the LU decomposition using Crout's algorithm with partial pivoting
1204 // The original matrix is preserved.
1205 // after performing this decomposition, the routine LUSolve can be called with an arbitrary vector
1206 // the return object is an nx(n+1) matrix which contains the LU decomposition followed
1207 // by a vector of row interchanges
1208 if (!is_square_numeric(false)) { // only works for numerical matrices at this stage
1209 return new _Matrix();
1210 }
1211
1212 hyFloat * scalings = new hyFloat[hDim];
1213
1214 long per_row = vDim+1;
1215 _Matrix * result = new _Matrix (hDim,per_row,false,true);
1216 // result is a dense matrix
1217
1218 // duplicate the original matrix into result
1219
1220
1221 if (is_dense()) {//matrix is sparse
1222 for (long i=0L; i<hDim; i++) {
1223 long row_start = i*vDim;
1224 for (long j=0; j<vDim; j++) {
1225 result->theData[row_start+i+j]=theData[row_start+j];
1226 }
1227 }
1228 }
1229 else {
1230 for (long i=0; i<lDim; i++) {
1231 if (theIndex[i] != -1) {
1232 long cell_coord = theIndex[i];
1233 long r = cell_coord / hDim;
1234 result->Store(r,cell_coord-r*vDim,theData[i]);
1235 }
1236 }
1237 }
1238
1239 // produce the scaling vector used in interchanging the rows
1240 for (long i=0L; i<vDim; i++) {
1241
1242 hyFloat row_max = 0.0;
1243
1244 for (long j=i*per_row; j<(i+1)*per_row-1; j++) {
1245 StoreIfGreater(row_max, fabs(result->theData[j]));
1246 }
1247
1248 if (row_max==0.0) {
1249 HandleApplicationError (_String("LUDecompose doesn't work on singular matrices (row ") & i & ')');
1250 delete [] scalings;
1251 return new _MathObject;
1252 }
1253 scalings[i]=1.0/row_max;
1254 }
1255 // main loop for finding L and U
1256
1257 for (long j=0L; j<vDim; j++) {
1258 for (long i=0L; i<j; i++) {
1259 // fill in superdiagonal elements (U) in column j
1260 hyFloat sum = result->get(i,j);
1261 for (long k=0L; k<i; k++) {
1262 sum -= result->get(i,k) * result->get (k,j);
1263 }
1264 result->set (i,j) = sum;
1265 }
1266 long max_row_index = 0;
1267 hyFloat max_row_value = 0.0;
1268
1269 for (long i=j; i<hDim; i++) {
1270 // calculate the unscaled version of elements of L and the diagonal
1271 hyFloat sum = result->get(i,j);
1272
1273 for (long k=0L; k<j; k++) {
1274 sum -= result->get(i,k) * result->get (k,j);
1275 }
1276 result->set (i,j) = sum;
1277
1278 if (StoreIfGreater(max_row_value, scalings[i]*fabs(sum))) { // find max under the diagonal in column j
1279 max_row_index = i;
1280 }
1281 }
1282
1283 if (j!=max_row_index) { // interchange rows
1284 for (long k=0L; k<hDim; k++) {
1285 Exchange(result->set(max_row_index,k), result->set(j,k));
1286 }
1287 scalings[max_row_index]=scalings[j];
1288 }
1289 // store the index permutation
1290 result->theData[j*per_row+vDim] = max_row_index;
1291
1292 if (result->get(j,j) == 0.0) {
1293 result->set(j,j) = 1.0e-25;
1294 }
1295
1296 // divide by the pivoting element
1297
1298 if (j!=hDim-1) {
1299 hyFloat scaler = 1.0/result->get(j,j);
1300 for (long i=j+1L; i<hDim; i++) {
1301 result->set(i,j) *= scaler;
1302 }
1303 }
1304 }
1305 delete [] scalings;
1306 return result;
1307 }
1308 //__________________________________________________________________________________
LUSolve(HBLObjectRef p) const1309 HBLObjectRef _Matrix::LUSolve (HBLObjectRef p) const {
1310 // takes a matrix in LU decomposed state and a vector of row permutation returned by LU
1311 // returns a vector of solutions
1312
1313 if (storageType != _NUMERICAL_TYPE || hDim+1!=vDim || vDim<=0 ) { // only works for numerical matrices at this stage
1314 HandleApplicationError ("LUSolve only works with numerical non-empty matrices of dimension nx(n+1) returned by LUDecompose.");
1315 return nil;
1316 }
1317 if (p->ObjectClass()==MATRIX) {
1318 _Matrix *b=(_Matrix*)p;
1319 if (!((b->hDim!=hDim)||(b->vDim!=1)||(b->storageType!=1))) {
1320 hyFloat sum;
1321 _Matrix * result = new _Matrix (*b);
1322 result->CheckIfSparseEnough(true);
1323 long i = 0L,
1324 first_index = -1L;
1325
1326 for (; i<hDim; i++) {
1327 long row_index = get (i, vDim - 1L);
1328 if (row_index<0 || row_index>=hDim) {
1329 break;
1330 }
1331 hyFloat sum = result->theData[row_index];
1332 result->theData[row_index]=result->theData[i];
1333 if (first_index>=0)
1334 for (long j=first_index; j<i; j++) {
1335 sum -= get (i,j) *result->theData[j];
1336 }
1337 else if (sum != 0.0) {
1338 first_index = i;
1339 }
1340 result->theData[i]=sum;
1341 }
1342 if (i==hDim) {
1343 for (i=hDim-1; i>-1; i--) {
1344 hyFloat sum = result->theData[i];
1345 for (long j=i+1L; j<hDim; j++) {
1346 sum -= get (i,j) *result->theData[j];
1347 }
1348 result->theData[i]=sum/get(i,i);
1349 }
1350 return result;
1351 }
1352 }
1353 }
1354 HandleApplicationError ("LUSolve expects the 2nd parameter to be a column vector defining the right hand side of LUx=b");
1355 return new _Matrix(1,1,false,true);
1356 }
1357
1358
1359
1360 //__________________________________________________________________________________
CholeskyDecompose(void) const1361 HBLObjectRef _Matrix::CholeskyDecompose (void) const
1362 {
1363 /* ---------------------------------------------------
1364 CholeskyDecompose()
1365 Constrcts lower triangular matrix L such that
1366 its own transpose can serve as upper part in
1367 LU decomposition.
1368 Requires that matrix is symmetric and positive
1369 definite.
1370 * algorithm based on Numerical Recipes
1371 --------------------------------------------------- */
1372
1373 if (!is_square_numeric()) { // only works for numerical square matrices at this stage
1374 return new _Matrix();
1375 }
1376
1377 long n = GetHDim();
1378 hyFloat sum;
1379 _Matrix * lower_triangular = new _Matrix (*this); // duplication constructor
1380
1381 for (long i = 0; i < n; i++) {
1382 for (long j = i; j < n; j++) {
1383 sum = lower_triangular->get (i,j);
1384
1385 for (long k = i-1L; k >= 0L; k--) {
1386 sum -= lower_triangular->get (i,k) * lower_triangular->get (j,k);
1387 }
1388
1389 if (i==j) {
1390 if (sum <= 0.0) { // matrix is not positive-definite
1391 HandleApplicationError (_String("In CholeskyDecompose(): matrix not positive definite, (row ") & i & ')');
1392 return new _MathObject;
1393 }
1394
1395 lower_triangular->set (i, i) = sqrt(sum);
1396 }
1397 else {
1398 lower_triangular->set (j, i) = sum / lower_triangular->get(i,i);
1399 }
1400 }
1401 }
1402
1403 /* zero upper triagonal entries */
1404 for (long i = 0L; i < n; i++) {
1405 for (long j = i+1L; j < n; j++) {
1406 lower_triangular->set (i, j) = 0.;
1407 }
1408 }
1409
1410 return lower_triangular;
1411 }
1412
1413
1414
1415 //__________________________________________________________________________________
ApplyScalarOperation(CALLBACK && functor,HBLObjectRef cache) const1416 template <typename CALLBACK> HBLObjectRef _Matrix::ApplyScalarOperation (CALLBACK && functor, HBLObjectRef cache) const {
1417 if (storageType==_NUMERICAL_TYPE) {
1418 _Matrix* res;
1419
1420 if (cache && cache->ObjectClass() == MATRIX) {
1421 res = (_Matrix*)cache;
1422 *res = *this;
1423 res->AddAReference();
1424 } else {
1425 res = new _Matrix (*this);
1426 }
1427
1428 res->ForEach ([&] (hyFloat&& value, unsigned long index, long hashed) -> void {res->theData[hashed] = functor(value);},
1429 [&] (unsigned long index) -> hyFloat {return theData[index];});
1430
1431 return res;
1432 }
1433 HandleApplicationError ("Can't apply scalar opetarations to non-numeric matrices.");
1434 return new _Matrix(1,1,false,true);
1435 }
1436
1437 //__________________________________________________________________________________
Inverse(HBLObjectRef cache) const1438 HBLObjectRef _Matrix::Inverse (HBLObjectRef cache) const {
1439 if (!is_square_numeric(false)) {
1440 return new _MathObject;
1441 }
1442
1443 _Matrix * LUdec = (_Matrix*)LUDecompose();
1444 if (LUdec) {
1445 _Matrix b (hDim,1,false,true),
1446 * result = (_Matrix*)_returnMatrixOrUseCache(hDim,vDim,_NUMERICAL_TYPE,false, cache);
1447 b.theData[0]=1.0;
1448 for (long i=0L; i<hDim; i++) {
1449 if (i) {
1450 b.theData[i]=1.0;
1451 b.theData[i-1L]=0.0;
1452 }
1453 _Matrix* invVector = (_Matrix*)LUdec->LUSolve(&b);
1454 _Matrix corrTerm (GetHDim(),1, false, true);
1455 Multiply(corrTerm, *invVector);
1456 corrTerm -= b;
1457 //_Matrix* corrTerm = (_Matrix*)(*this*(*invVector)-b).makeDynamic();
1458 _Matrix* corrX = (_Matrix*)LUdec->LUSolve(&corrTerm);
1459 *invVector-=*corrX;
1460 DeleteObject (corrX);
1461 for (long j=0; j<hDim; j++) {
1462 result->set (j,i) = invVector->theData[j];
1463 }
1464 DeleteObject (invVector);
1465 }
1466 DeleteObject (LUdec);
1467 return result;
1468 }
1469 return new _Matrix (1,1,false,true);
1470
1471 }
1472
1473 //__________________________________________________________________________________
MultByFreqs(long freqID,bool reuse_value_object)1474 HBLObjectRef _Matrix::MultByFreqs (long freqID, bool reuse_value_object) {
1475 // multiply this transition probs matrix by frequencies
1476 HBLObjectRef value = ComputeNumeric(true);//!reuse_value_object);
1477
1478 //printf ("\n%s\n", _String ((_String*)toStr()).get_str());
1479
1480 if (freqID>=0) {
1481 _Matrix* freq_matrix = nil;
1482 freqID = modelFrequenciesIndices.list_data[freqID];
1483 if (freqID>=0) {
1484 freq_matrix = (_Matrix*)LocateVar(freqID)->GetValue();
1485 if (freq_matrix->storageType != _NUMERICAL_TYPE) {
1486 if (freq_matrix->theValue) {
1487 freq_matrix = (_Matrix*)freq_matrix->theValue;
1488 } else {
1489 freq_matrix = (_Matrix*)freq_matrix->ComputeNumeric();
1490 }
1491 }
1492 }
1493
1494 if (theIndex) {
1495 _Matrix* vm = (_Matrix*) value;
1496 hyFloat * __restrict dp = vm ->theData;
1497
1498 if (vm->compressedIndex) {
1499 //vm->_validateCompressedStorage();
1500 long from = 0L;
1501 if (freq_matrix) {
1502 for (long r = 0; r < hDim; r++) {
1503 long diagEntry = -1;
1504 hyFloat diagAccumulator = 0.;
1505 for (long c = from; c < vm->compressedIndex[r]; c++) {
1506 long col_index = vm->compressedIndex[c+hDim];
1507 if (col_index != r) {
1508 dp[c] *= freq_matrix->theData[col_index];
1509 diagAccumulator -= dp[c];
1510 } else {
1511 diagEntry = c;
1512 }
1513 }
1514 from = vm->compressedIndex[r];
1515 dp[diagEntry] = diagAccumulator;
1516 }
1517 } else {
1518 for (long r = 0; r < hDim; r++) {
1519 long diagEntry = -1;
1520 hyFloat diagAccumulator = 0.;
1521 for (long c = from; c < vm->compressedIndex[r]; c++) {
1522 //printf ("%ld\n", vm->theIndex[c]);
1523 if (vm->compressedIndex[c+hDim] != r) {
1524 diagAccumulator -= dp[c];
1525 } else {
1526 diagEntry = c;
1527 }
1528 }
1529 //printf ("%ld %ld %g\n", r, diagEntry, diagAccumulator);
1530 from = vm->compressedIndex[r];
1531 dp[diagEntry] = diagAccumulator;
1532 }
1533 }
1534 //vm->_validateCompressedStorage();
1535 } else {
1536 hyFloat *tempDiags = (hyFloat*) alloca (sizeof(hyFloat) * hDim);
1537 InitializeArray(tempDiags, hDim, 0.0);
1538
1539 if (freq_matrix) {
1540 for (long i=0; i<lDim; i++) {
1541 long p = theIndex[i];
1542 if (p != -1) {
1543 long h = p / vDim;
1544 p = p - h*vDim;
1545 if (h!=p) {
1546 tempDiags[h] += (dp[i] *= freq_matrix->theData[p]);
1547 }
1548 }
1549 }
1550 }
1551 else {
1552 for (long i=0; i<lDim; i++) {
1553 long p = theIndex[i];
1554 if (p != -1) {
1555 long h = p / vDim;
1556 p = p - h*vDim;
1557 if (h!=p) {
1558 tempDiags[h] += dp[i];
1559 }
1560 }
1561 }
1562 }
1563
1564 for (long j=0L; j<hDim; j++) {
1565 vm->Store (j,j,-tempDiags[j]);
1566 }
1567 }
1568
1569 } else {
1570 hyFloat * theMatrix = ((_Matrix*)value)->theData;
1571
1572 if (freq_matrix) {
1573 if (freq_matrix->theIndex) {
1574 HandleApplicationError(_String("Sparse frequency matrices are not supported"));
1575 } else {
1576 for (unsigned long column=0UL; column<vDim; column++) {
1577 const hyFloat freq_i = freq_matrix->theData[column];
1578 unsigned long entry = column;
1579 for (;entry < lDim - vDim; entry += vDim) {
1580 theMatrix[entry] *= freq_i;
1581 theMatrix[entry+=vDim] *= freq_i;
1582 }
1583 if (entry < lDim) {
1584 theMatrix[entry] *= freq_i;
1585 }
1586 }
1587 }
1588 }
1589
1590 for (unsigned long row_start = 0UL, row = 0UL; row_start < lDim; row_start+=vDim, row++) {
1591 unsigned long diag = row_start + row;
1592 theMatrix [diag] = 0.;
1593 for (unsigned long col = 0UL; col < row; col++) {
1594 theMatrix[diag] -= theMatrix[row_start + col];
1595 }
1596 for (unsigned long col = row+1; col < vDim; col++) {
1597 theMatrix[diag] -= theMatrix[row_start + col];
1598 }
1599 }
1600 }
1601
1602 }
1603 return value;
1604 }
1605
1606
1607 //__________________________________________________________________________________
Compute(void)1608 HBLObjectRef _Matrix::Compute (void) {
1609 //if ((storageType != 1)&&(storageType != 2))
1610 if (storageType != _NUMERICAL_TYPE) {
1611 if (storageType == _POLYNOMIAL_TYPE) {
1612 if (ANALYTIC_COMPUTATION_FLAG) {
1613 return this;
1614 }
1615 }
1616
1617 if (IsAStringMatrix()) {
1618 return this;
1619 }
1620
1621 if (theValue) {
1622 DeleteObject (theValue);
1623 }
1624
1625 if (storageType != _SIMPLE_FORMULA_TYPE) {
1626 theValue = Evaluate(false);
1627 } else {
1628 theValue = EvaluateSimple ();
1629 }
1630 return theValue;
1631 }
1632 return this;
1633 }
1634
1635 //__________________________________________________________________________________
ComputeNumeric(bool copy)1636 HBLObjectRef _Matrix::ComputeNumeric (bool copy) {
1637 if (storageType != _NUMERICAL_TYPE) {
1638 if (storageType == 0 && ANALYTIC_COMPUTATION_FLAG) {
1639 return this;
1640 }
1641 if (storageType != _SIMPLE_FORMULA_TYPE) {
1642 if (theValue) {
1643 DeleteObject (theValue);
1644 }
1645 theValue = Evaluate(false);
1646 } else {
1647 if (copy) {
1648 if (theValue) {
1649 DeleteObject (theValue);
1650 }
1651 theValue = EvaluateSimple();
1652 } else {
1653 theValue = EvaluateSimple((_Matrix*)theValue);
1654 }
1655 }
1656 return theValue;
1657 }
1658 if (copy) {
1659 if (theValue) {
1660 DeleteObject (theValue);
1661 }
1662 return (theValue = (_Matrix*)makeDynamic());
1663 }
1664 return this;
1665 }
1666
1667 //__________________________________________________________________________________
RetrieveNumeric(void)1668 HBLObjectRef _Matrix::RetrieveNumeric (void) {
1669 if (storageType != _NUMERICAL_TYPE) {
1670 if (theValue) {
1671 return theValue;
1672 }
1673
1674 return ComputeNumeric();
1675 }
1676 return this;
1677 }
1678
1679 //__________________________________________________________________________________
Sum(HBLObjectRef cache)1680 HBLObjectRef _Matrix::Sum (HBLObjectRef cache) {
1681 return _returnConstantOrUseCache(MaxElement (1), cache);
1682 }
1683
1684 //__________________________________________________________________________________
1685
1686
ExecuteSingleOp(long opCode,_List * arguments,_hyExecutionContext * context,HBLObjectRef cache)1687 HBLObjectRef _Matrix::ExecuteSingleOp (long opCode, _List* arguments, _hyExecutionContext* context, HBLObjectRef cache) {
1688
1689
1690 switch (opCode) { // first check operations without arguments
1691 case HY_OP_CODE_ABS: // Abs
1692 return Abs(cache);
1693 case HY_OP_CODE_COLUMNS: //Columns
1694 return _returnConstantOrUseCache(vDim, cache);
1695 case HY_OP_CODE_INVERSE: //Inverse
1696 return Inverse(cache);
1697 case HY_OP_CODE_EIGENSYSTEM: //Eigensystem
1698 return Eigensystem(cache);
1699 case HY_OP_CODE_EVAL: //Eval
1700 return (HBLObjectRef)ComputeNumeric()->makeDynamic();
1701 case HY_OP_CODE_EXP: //Exp
1702 return Exponentiate();
1703 case HY_OP_CODE_LUDECOMPOSE: // LUDecompose
1704 return LUDecompose();
1705 case HY_OP_CODE_LOG: // Log
1706 return ApplyScalarOperation ([] (hyFloat h) -> hyFloat {return log (h);}, cache);
1707 case HY_OP_CODE_ROWS: // Rows
1708 return _returnConstantOrUseCache (hDim, cache);
1709 case HY_OP_CODE_SIMPLEX: // Simplex
1710 return SimplexSolve();
1711 case HY_OP_CODE_TRANSPOSE: { // Transpose
1712 _Matrix* result = new _Matrix (*this);
1713 result->Transpose();
1714 return result;
1715 }
1716 case HY_OP_CODE_TYPE: // Type
1717 return Type(cache);
1718 }
1719
1720 _MathObject * arg0 = _extract_argument (arguments, 0UL, false);
1721
1722 switch (opCode) { // next check operations without arguments or with one argument
1723 case HY_OP_CODE_ADD: // +
1724 if (arg0) {
1725 return AddObj (arg0, cache);
1726 } else {
1727 return Sum (cache);
1728 }
1729 break;
1730 case HY_OP_CODE_SUB: // -
1731 if (arg0) {
1732 return SubObj(arg0, cache);
1733 } else {
1734 return ApplyScalarOperation ([] (hyFloat h) -> hyFloat {return -h;}, cache);
1735
1736 //return (HBLObjectRef)((*this)*(-1.0)).makeDynamic();
1737 }
1738 break;
1739 }
1740
1741 if (arg0) {
1742 switch (opCode) { // operations that require exactly one argument
1743 case HY_OP_CODE_IDIV: // $
1744 case HY_OP_CODE_DIV: // /
1745 return MultElements(arg0,opCode == HY_OP_CODE_DIV, cache);
1746 case HY_OP_CODE_MOD: // %
1747 return SortMatrixOnColumn (arg0, cache);
1748 case HY_OP_CODE_AND: // &&
1749 return pFDR (arg0, cache);
1750 case HY_OP_CODE_MUL: // *
1751 return MultObj(arg0, cache);
1752 case HY_OP_CODE_LESS: // <
1753 return PathLogLikelihood(arg0, cache);
1754 case HY_OP_CODE_LEQ: // <=
1755 return K_Means(arg0, cache);
1756 case HY_OP_CODE_EQ: // ==
1757 return _returnConstantOrUseCache(Equal (arg0), cache);
1758 //return ProfileMeanFit(arg0);
1759 case HY_OP_CODE_GREATER: // >
1760 return NeighborJoin (!CheckEqual(arg0->Value(),0.0), cache);
1761 case HY_OP_CODE_GEQ: // >=
1762 return MakeTreeFromParent (arg0->Value(), cache);
1763 case HY_OP_CODE_CCHI2: //CChi2
1764 if (arg0->ObjectClass()==NUMBER && arg0->Value()>0.999 ) {
1765 return _returnConstantOrUseCache (FisherExact(5.,80.,1.), cache);
1766 } else {
1767 return _returnConstantOrUseCache (FisherExact(0.,0.,0.), cache);
1768 }
1769 case HY_OP_CODE_LUSOLVE: // LUSolve
1770 return LUSolve (arg0);
1771 case HY_OP_CODE_RANDOM: // Random
1772 return Random (arg0, cache);
1773 case HY_OP_CODE_POWER: // ^ (Poisson log-likelihood)
1774 return PoissonLL (arg0, cache);
1775 case HY_OP_CODE_MAX: // Max
1776 case HY_OP_CODE_MIN: // Max
1777 if (arg0->ObjectClass()==NUMBER) {
1778 if (CheckEqual (arg0->Value(), 1)) {
1779 long index = 0L;
1780 hyFloat v[2] = {opCode == HY_OP_CODE_MAX?MaxElement (0,&index):MinElement(0,&index),0.0};
1781 v[1] = index;
1782 return new _Matrix (v,1,2);
1783 }
1784 }
1785 return _returnConstantOrUseCache (opCode == HY_OP_CODE_MAX?MaxElement (0):MinElement (0), cache);
1786 }
1787 _MathObject * arg1 = _extract_argument (arguments, 1UL, false);
1788
1789 switch (opCode) {
1790
1791 case HY_OP_CODE_MACCESS: // MAccess
1792 return MAccess (arg0,arg1, cache);
1793
1794 case HY_OP_CODE_MCOORD: // MCoord
1795 return MCoord (arg0, arg1, cache);
1796 }
1797
1798 }
1799
1800 switch (opCode) {
1801 case HY_OP_CODE_ADD: // +
1802 case HY_OP_CODE_SUB: // -
1803 case HY_OP_CODE_IDIV: // $
1804 case HY_OP_CODE_DIV: // /
1805 case HY_OP_CODE_MOD: // %
1806 case HY_OP_CODE_AND: // &&
1807 case HY_OP_CODE_MUL: // *
1808 case HY_OP_CODE_LESS: // <
1809 case HY_OP_CODE_LEQ: // <=
1810 case HY_OP_CODE_EQ: // ==
1811 case HY_OP_CODE_GREATER: // >
1812 case HY_OP_CODE_GEQ: // >=
1813 case HY_OP_CODE_CCHI2: //CChi2
1814 case HY_OP_CODE_LUSOLVE: // LUSolve
1815 case HY_OP_CODE_RANDOM: // Random
1816 case HY_OP_CODE_POWER: // ^ (Poisson log-likelihood)
1817 case HY_OP_CODE_MAX: // Max
1818 case HY_OP_CODE_MIN: // Max
1819 case HY_OP_CODE_MACCESS: // MAccess
1820 case HY_OP_CODE_MCOORD: // MCoord
1821 WarnWrongNumberOfArguments (this, opCode,context, arguments);
1822 break;
1823 default:
1824 WarnNotDefined (this, opCode,context);
1825 }
1826
1827 return new _MathObject;
1828 }
1829
1830 //_____________________________________________________________________________________________
AmISparse(void)1831 bool _Matrix::AmISparse(void) {
1832 if (theIndex) {
1833 return true; // duh!
1834 }
1835
1836 if (storageType == _FORMULA_TYPE || storageType == _SIMPLE_FORMULA_TYPE) {
1837 return false;
1838 }
1839
1840 long const threshhold = lDim * _Matrix::switchThreshold / 100 + 1;
1841 long k = 0L;
1842
1843 if (storageType==_NUMERICAL_TYPE) {
1844 for (long i=0; i<lDim; i++) {
1845 if (theData[i]!=ZEROOBJECT) {
1846 k++;
1847 if (k == threshhold) break;
1848 }
1849 }
1850 } else {
1851 for (long i=0; i<lDim; i++) {
1852 if (IsNonEmpty(i) && !GetMatrixObject(i)->IsObjectEmpty()) {
1853 k++;
1854 if (k == threshhold) break;
1855 }
1856 }
1857 }
1858
1859
1860
1861 if (k < threshhold) {
1862 // we indeed are sparse enough
1863 _Matrix sparseMe (hDim,vDim,true,storageType==_NUMERICAL_TYPE);
1864 if (storageType==_NUMERICAL_TYPE) {
1865 for (long i=0; i<lDim; i++) {
1866 if (theData[i]!=ZEROOBJECT) {
1867 sparseMe[i]=theData[i];
1868 }
1869 }
1870 } else if (storageType==0) {
1871 for (long i=0; i<lDim; i++) {
1872 if ((GetMatrixObject(i)!=ZEROPOINTER)&&(!GetMatrixObject(i)->IsObjectEmpty())) {
1873 sparseMe.StoreObject(i,GetMatrixObject(i));
1874 }
1875 GetMatrixObject(i)->AddAReference();
1876 }
1877 }
1878
1879 Clear();
1880 DuplicateMatrix (this, &sparseMe);
1881 return true;
1882 }
1883 return false;
1884 }
1885
1886 //_____________________________________________________________________________________________
AmISparseFast(_Matrix & whereTo)1887 bool _Matrix::AmISparseFast (_Matrix& whereTo) {
1888 if (theIndex) {
1889 return true; // duh!
1890 }
1891
1892 long k = 0L,
1893 threshold = lDim*_Matrix::switchThreshold/100;
1894
1895 //speculatively allocate memory to store non-zero indices
1896
1897 long * non_zero_index = (long*)alloca (threshold*sizeof(long));
1898
1899 #if defined _SLKP_USE_AVX_INTRINSICS
1900 __m256d zeros = _mm256_setzero_pd();
1901 long lDimMOD4 = lDim >> 2 << 2;
1902 for (long i = 0; i < lDimMOD4; i+=4) {
1903 int res = _mm256_movemask_pd(_mm256_cmp_pd (_mm256_loadu_pd (theData+i), zeros, _CMP_NEQ_OQ));
1904 if (res) { // something is different
1905 if (res & 1) { non_zero_index[k++] = i; if (k == threshold) break; };
1906 if (res & 2) { non_zero_index[k++] = i+1; if (k == threshold) break; };
1907 if (res & 4) { non_zero_index[k++] = i+2; if (k == threshold) break; };
1908 if (res & 8) { non_zero_index[k++] = i+3; if (k == threshold) break; };
1909 }
1910 }
1911
1912 if (k < threshold)
1913 for (long i = lDimMOD4; i < lDim; i++) {
1914 if (theData[i] != 0.0) {
1915 non_zero_index[k++] = i;
1916 if (k == threshold) {
1917 return false;
1918 }
1919 }
1920 }
1921 #elif _SLKP_USE_ARM_NEON
1922 float64x2_t zeros = vdupq_n_f64 (0.);
1923 long lDimMOD2 = lDim >> 1 << 1;
1924 for (long i = 0; i < lDimMOD2; i+=2) {
1925
1926 uint64x2_t res = vceqq_f64 (vld1q_f64 (theData+i), zeros);
1927 if (vaddvq_u64 (res)) { // something is different
1928 if (vgetq_lane_u64 (res,0)) { non_zero_index[k++] = i; if (k == threshold) break; };
1929 if (vgetq_lane_u64 (res,1)) { non_zero_index[k++] = i+1; if (k == threshold) break; };
1930 }
1931 }
1932
1933 if (k < threshold)
1934 for (long i = lDimMOD2; i < lDim; i++) {
1935 if (theData[i] != 0.0) {
1936 non_zero_index[k++] = i;
1937 if (k == threshold) {
1938 return false;
1939 }
1940 }
1941 }
1942 #else
1943 for (long i = 0; i < lDim; i++) {
1944 if (theData[i] != 0.0) {
1945 non_zero_index[k++] = i;
1946 if (k == threshold) {
1947 return false;
1948 }
1949 }
1950 }
1951 #endif
1952
1953
1954
1955 if (k < threshold) {
1956 // we indeed are sparse enough
1957
1958 bool canReuse = whereTo.lDim > k;
1959
1960 if (k == 0L) { // empty matrix
1961 //printf ("\nZERO SIZE\n");
1962 whereTo.lDim = 1L;
1963 } else {
1964 whereTo.lDim = k;
1965 }
1966
1967 if (whereTo.theIndex) {
1968 if (canReuse) {
1969 whereTo.theIndex = (long*)MemReallocate (whereTo.theIndex , whereTo.lDim*sizeof(long));
1970 } else {
1971 free (whereTo.theIndex);
1972 whereTo.theIndex = (long*)MatrixMemAllocate (whereTo.lDim*sizeof(long));
1973 }
1974 } else {
1975 whereTo.theIndex = (long*)MatrixMemAllocate (whereTo.lDim*sizeof(long));
1976 }
1977
1978 hyFloat * _hprestrict_ newData = canReuse ? whereTo.theData : (hyFloat*)MatrixMemAllocate (whereTo.lDim*sizeof(hyFloat));
1979
1980 if (whereTo.compressedIndex) {
1981 MatrixMemFree(whereTo.compressedIndex);
1982 }
1983 whereTo.compressedIndex = (long*) MatrixMemAllocate((whereTo.lDim + hDim) * sizeof (long));
1984
1985 long currentRow = 0L;
1986 long * __restrict wci = (long*)whereTo.compressedIndex;
1987
1988 for (long i=0L; i<k; i++) {
1989 //printf ("%ld %ld", i, theIndex[i]);
1990 long entryIndex = non_zero_index[i];
1991 whereTo.theIndex[i] = entryIndex;
1992
1993 long indexRow = entryIndex / vDim,
1994 indexColumn = entryIndex - indexRow * vDim;
1995
1996 wci[i + hDim] = indexColumn;
1997 if (indexRow > currentRow) {
1998 for (long l = currentRow; l < indexRow; l++) {
1999 wci[l] = i;
2000 }
2001 currentRow = indexRow;
2002 }
2003 newData[i] = theData [entryIndex];
2004 }
2005
2006 for (long l = currentRow; l < hDim; l++)
2007 whereTo.compressedIndex[l] = whereTo.lDim;
2008
2009 if (canReuse) {
2010 whereTo.theData = (hyFloat*) MemReallocate(newData, whereTo.lDim*sizeof(hyFloat));
2011 } else {
2012 free (whereTo.theData);
2013 whereTo.theData = newData;
2014 }
2015
2016 return true;
2017 }
2018
2019 return false;
2020 }
2021
2022 //_____________________________________________________________________________________________
2023
IsValidTransitionMatrix() const2024 bool _Matrix::IsValidTransitionMatrix() const {
2025 if (is_square() && is_numeric()) {
2026 long d = GetHDim();
2027 hyFloat * sums = (hyFloat*)alloca (sizeof (hyFloat)*d);
2028 long idx = 0L;
2029 const hyFloat tolerance = kMachineEpsilon * 10.;
2030 for (long r = 0L; r < d; r++) {
2031 sums [r] = 0.;
2032 for (long c = 0L; c < d; c++, idx++) {
2033 hyFloat term = theData[idx];
2034 if (term < 0.0 || term > 1.0) {
2035 if (CheckEqual(0.0, term, tolerance)) {
2036 theData[idx] = 0.;
2037 continue;
2038 }
2039 if (CheckEqual(1.0, term, tolerance)) {
2040 theData[idx] = 1.;
2041 sums[r] += 1.;
2042 continue;
2043 }
2044 char buffer [255];
2045 snprintf (buffer, 255, "FAILED IsValidTransitionMatrix at (%ld, %ld) = %20.15g\n", r, c, term);
2046 ReportWarning(buffer);
2047 return false;
2048 }
2049 sums[r] += term;
2050 }
2051 }
2052 for (long r = 0L; r < d; r++) {
2053 if (!CheckEqual(1.0, sums[r])) {
2054 char buffer [255];
2055 snprintf (buffer, 255, "FAILED ROW SUM at (%ld) = %20.15g\n", r, sums[r]);
2056 ReportWarning(buffer);
2057 return false;
2058 }
2059 }
2060 return true;
2061 }
2062 return false;
2063 }
2064
2065
2066 //_____________________________________________________________________________________________
2067
IsReversible(_Matrix * freqs)2068 bool _Matrix::IsReversible(_Matrix* freqs) {
2069
2070 try {
2071
2072 if (!is_square()) {
2073 throw _String ("Not a square matrix in _Matrix::IsReversible");
2074 }
2075
2076 if (freqs && freqs->GetHDim () * freqs->GetVDim () != GetHDim()) {
2077 throw _String ("Incompatible frequency and rate matrix dimensions in _Matrix::IsReversible");
2078 }
2079
2080 if (!is_numeric() && !is_expression_based()) {
2081 throw _String ("Unsupported rate matrix type in _Matrix::IsReversible");
2082 }
2083
2084 if (freqs && !freqs->is_numeric() && !freqs->is_expression_based()) {
2085 throw _String ("Unsupported frequency matrix type in _Matrix::IsReversible");
2086 }
2087
2088
2089 bool needAnalytics = is_expression_based() || (freqs && freqs->is_expression_based());
2090 if (needAnalytics) {
2091 if (freqs) {
2092 for (long r = 0; r < hDim; r++)
2093 for (long c = r+1; c < hDim; c++) {
2094 bool compResult = true;
2095 if (is_expression_based()) {
2096 _Formula* rc = GetFormula(r,c),
2097 * cr = GetFormula(c,r);
2098
2099 if (rc && cr) {
2100 _Polynomial *rcp = (_Polynomial *)rc->ConstructPolynomial(),
2101 *crp = (_Polynomial *)cr->ConstructPolynomial();
2102
2103 if (rcp && crp) {
2104 HBLObjectRef tr = nil,
2105 tc = nil;
2106
2107 if (freqs->is_expression_based()) {
2108 if (freqs->GetFormula(r,0)) {
2109 tr = freqs->GetFormula(r,0)->ConstructPolynomial();
2110 if (tr) {
2111 tr->AddAReference();
2112 } else {
2113 throw _String ("Could not convert matrix cell (") & r & ',' & c & ") to a polynomial";
2114 }
2115 }
2116 if (freqs->GetFormula(c,0)) {
2117 tc = freqs->GetFormula(c,0)->ConstructPolynomial();
2118 if (tc) {
2119 tc->AddAReference();
2120 } else {
2121 DeleteObject (tr);
2122 throw _String ("Could not convert frequency cell (") & c & ") to a polynomial";
2123 }
2124 }
2125 } else {
2126 tr = new _Constant ((*freqs)[r]);
2127 tc = new _Constant ((*freqs)[c]);
2128 }
2129 if (tr && tc) {
2130 _Polynomial * rcpF = (_Polynomial*)rcp->Mult(tr, nil),
2131 * crpF = (_Polynomial*)crp->Mult(tc, nil);
2132
2133 compResult = rcpF->Equal(crpF);
2134 DeleteObject (rcpF);
2135 DeleteObject (crpF);
2136
2137 } else {
2138 compResult = !(tr||tc);
2139 }
2140
2141 DeleteObject (tr);
2142 DeleteObject (tc);
2143
2144
2145 } else {
2146 throw _String ("Could not convert a matrix cell at (") & r & ',' & c & ") to a polynomial: " & _StringBuffer ((_StringBuffer*)rc->toStr(kFormulaStringConversionNormal));
2147 }
2148 } else {
2149 compResult = !(rc || cr);
2150 }
2151 if (!compResult) {
2152 throw _String ("Unequal cells at (") & r & ',' & c & ")";
2153 }
2154 }
2155
2156 }
2157 } else {
2158 for (long r = 0; r < hDim; r++)
2159 for (long c = r+1; c < hDim; c++) {
2160 bool compResult = true;
2161 _Formula* rc = GetFormula(r,c),
2162 * cr = GetFormula(c,r);
2163
2164 if (rc && cr) {
2165 _Polynomial *rcp = (_Polynomial *)rc->ConstructPolynomial(),
2166 *crp = (_Polynomial *)cr->ConstructPolynomial();
2167
2168 if (rcp && crp) {
2169 compResult = rcp->Equal(crp);
2170 } else {
2171 compResult = rc->EqualFormula(cr);
2172 }
2173 } else {
2174 compResult = !(rc || cr);
2175 }
2176
2177 if (!compResult) {
2178 return false;
2179 }
2180 }
2181 }
2182 return true;
2183 } else {
2184 if (freqs) {
2185 for (long r = 0; r < hDim; r++)
2186 for (long c = r+1; c < hDim; c++)
2187 if (! CheckEqual ((*this)(r,c)*(*freqs)[r], (*this)(c,r)*(*freqs)[c])) {
2188 return false;
2189 }
2190 } else {
2191 for (long r = 0; r < hDim; r++)
2192 for (long c = r+1; c < hDim; c++)
2193 if (! CheckEqual ((*this)(r,c), (*this)(c,r))) {
2194 return false;
2195 }
2196 }
2197 return true;
2198 }
2199 } catch (const _String& reason) {
2200 ReportWarning (_String ("Reversibility checks failed: ") & reason);
2201 return false;
2202 }
2203 return false;
2204 }
2205
2206 //_____________________________________________________________________________________________
2207
CheckIfSparseEnough(bool force,bool copy)2208 bool _Matrix::CheckIfSparseEnough(bool force, bool copy) {
2209
2210 // check if matrix is sparse enough to justify compressed storage
2211
2212 if (theIndex && (force || lDim>hDim*vDim*::_Matrix::switchThreshold/100)) {
2213 // switch to normal matrix storage - more than half elements are non-zero
2214 // -= allocationBlock;
2215
2216 long square_dimension = vDim*hDim;
2217
2218 if (!is_numeric()) {
2219 // pointers
2220 hyPointer* tempData = (hyPointer*) MemAllocate (square_dimension*sizeof(hyPointer));
2221 InitializeArray(tempData, square_dimension, (hyPointer)nil);
2222
2223 if (copy) {
2224 for (unsigned long i = 0UL; i<lDim; i++) {
2225 if (IsNonEmpty(i)) {
2226 tempData[theIndex[i]]=((hyPointer*)theData)[i];
2227 }
2228 }
2229 }
2230 MatrixMemFree( theData);
2231 theData = (hyFloat*)tempData;
2232 } else {
2233 //objects
2234 hyFloat* tempData = (hyFloat*) MemAllocate (square_dimension*sizeof(hyFloat), true, 64);
2235 // InitializeArray(tempData, square_dimension, 0.0);
2236
2237 if (copy) {
2238 for (unsigned long i = 0UL; i<lDim; i++) {
2239 long k = theIndex[i];
2240 if (k >= 0) {
2241 tempData [k] = ((hyFloat*)theData) [i];
2242 }
2243 }
2244 }
2245 MatrixMemFree( theData);
2246 theData = (hyFloat*)tempData;
2247
2248 }
2249 lDim = square_dimension;
2250 MatrixMemFree( theIndex);
2251 theIndex = nil;
2252 return true;
2253 }
2254 return false;
2255 }
2256
2257 //_____________________________________________________________________________________________
IncreaseStorage(void)2258 bool _Matrix::IncreaseStorage (void) {
2259 if (compressedIndex) {
2260 HandleApplicationError("Internal error. Called _Matrix::IncreaseStorage on compressed index matrix");
2261 return false;
2262 }
2263 lDim += allocationBlock;
2264 theIndex = (long*)MemReallocate(theIndex, lDim*sizeof(long));
2265 for (long i = lDim-allocationBlock; i < lDim; i++) {
2266 theIndex [i] = -1;
2267 }
2268
2269 if (!is_numeric()) {
2270 // pointers or formulas
2271 theData = (hyFloat*) MemReallocate(theData, lDim*sizeof(void*));
2272 for (long i = lDim-allocationBlock; i < lDim; i++) {
2273 ((_Formula**)theData) [i] = ZEROPOINTER;
2274 }
2275
2276 } else {
2277 //objects
2278 theData = (hyFloat*) MemReallocate(theData, lDim*sizeof(hyFloat));
2279 for (long i = lDim-allocationBlock; i < lDim; i++) {
2280 theData [i] = ZEROOBJECT;
2281 }
2282 }
2283 return TRUE;
2284
2285 }
2286
2287
2288 //_____________________________________________________________________________________________
2289
Convert2Formulas(void)2290 void _Matrix::Convert2Formulas (void)
2291 {
2292 if (is_numeric()) {
2293 storageType = _FORMULA_TYPE;
2294 _Formula** tempData = (_Formula**)MatrixMemAllocate (sizeof(void*)*lDim);
2295 if (is_dense()) {
2296 for (long i = 0; i<lDim; i++) {
2297 tempData[i] = new _Formula (new _Constant (((hyFloat*)theData)[i]));
2298 }
2299 } else
2300 for (long i = 0; i<lDim; i++) {
2301 if (IsNonEmpty(i)) {
2302 //_Constant c (((hyFloat*)theData)[i]);
2303 //_Formula f((_PMathObj)c.makeDynamic());
2304 //tempData[i] = (_Formula*)f.makeDynamic();
2305 tempData[i] = new _Formula (new _Constant (((hyFloat*)theData)[i]));
2306 } else {
2307 tempData[i]=nil;
2308 }
2309 }
2310
2311 MatrixMemFree (theData);
2312 theData = (hyFloat*)tempData;
2313 }
2314 }
2315
2316
2317
2318
2319 //_____________________________________________________________________________________________
2320
ScanForVariables(_AVLList & theReceptacle,bool inclG,_AVLListX * tagger,long weights) const2321 void _Matrix:: ScanForVariables(_AVLList& theReceptacle, bool inclG, _AVLListX* tagger, long weights) const {
2322 ScanForVariables2 (theReceptacle, inclG, -1, true, tagger, weights);
2323 }
2324 //_____________________________________________________________________________________________
2325
ScanForVariables2(_AVLList & theReceptacle,bool inclG,long modelID,bool inclCat,_AVLListX * tagger,long weights) const2326 void _Matrix:: ScanForVariables2(_AVLList& theReceptacle, bool inclG, long modelID, bool inclCat, _AVLListX* tagger, long weights) const {
2327 if (is_expression_based()) { // a formula based matrix, there is stuff to do
2328 if (modelID >= 0) {
2329 _AssociativeList* definedCache = nil;
2330 _Variable* cachedDeps = FetchVar(LocateVarByName (CACHE_FORMULA_DEPENDANCY));
2331
2332 if (cachedDeps && cachedDeps->ObjectClass () == ASSOCIATIVE_LIST)
2333 // 20100316 SLKP: I am pretty sure this is broken...
2334 {
2335 definedCache = (_AssociativeList*)cachedDeps->GetValue();
2336 _String matrixKey (modelID);
2337 _Matrix* cachedValues = (_Matrix*)definedCache->GetByKey (matrixKey,MATRIX);
2338
2339 if (cachedValues == nil) {
2340 _Formula ** theFormulas = (_Formula**)theData;
2341
2342 _SimpleList sl1,
2343 sl2;
2344 _AVLList a1 (&sl1),
2345 a2 (&sl2);
2346
2347 if (theIndex) {
2348 for (long i = 0; i<lDim; i++)
2349 if (IsNonEmpty(i)) {
2350 theFormulas[i]->ScanFForVariables(a1,false);
2351 theFormulas[i]->ScanFForVariables(a2,true);
2352 }
2353 } else
2354 for (long i = 0; i<lDim; i++)
2355 if (theFormulas[i]!=(_Formula*)ZEROPOINTER) {
2356 theFormulas[i]->ScanFForVariables(a1,false);
2357 theFormulas[i]->ScanFForVariables(a2,true);
2358 }
2359
2360 a1.ReorderList();
2361 a2.ReorderList();
2362
2363 cachedValues = new _Matrix (2,sl2.lLength,false,true);
2364
2365 for (unsigned long k=0; k<sl1.lLength; k++) {
2366 cachedValues->theData[k] = sl1.list_data[k];
2367 }
2368 {
2369 for (unsigned long k=sl1.lLength; k<sl2.lLength; k++) {
2370 cachedValues->theData[k] = -1.;
2371 }
2372 }
2373 {
2374 for (unsigned long k=0; k<sl2.lLength; k++) {
2375 cachedValues->theData[k+sl2.lLength] = sl2.list_data[k];
2376 }
2377 }
2378
2379 _FString aKey (matrixKey,false);
2380
2381 definedCache->MStore (&aKey, cachedValues, false);
2382
2383 }
2384
2385 long colCount = cachedValues->GetVDim(),
2386 rowIndex = inclG?colCount:0;
2387
2388 for (long k=0; k<colCount; k++,rowIndex++) {
2389 long vI = cachedValues->theData[rowIndex];
2390 if (vI >= 0) {
2391 theReceptacle.Insert ((BaseRef)vI);
2392 if (tagger) {
2393 tagger->UpdateValue((BaseRef)vI, weights, 0);
2394 }
2395 } else {
2396 break;
2397 }
2398 }
2399
2400 return;
2401 }
2402
2403 }
2404
2405 _Formula ** theFormulas = (_Formula**)theData;
2406
2407 if (theIndex) {
2408 for (long i = 0; i<lDim; i++)
2409 if (IsNonEmpty(i)) {
2410 theFormulas[i]->ScanFForVariables(theReceptacle,inclG,false,inclCat, false, tagger, weights);
2411 }
2412 } else
2413 for (long i = 0; i<lDim; i++) {
2414 if (theFormulas[i]!=(_Formula*)ZEROPOINTER) {
2415 theFormulas[i]->ScanFForVariables (theReceptacle,inclG,false,inclCat, false, tagger, weights);
2416 }
2417 }
2418 } else if (storageType == 0) { // a polynomial based matrix, there is stuff to do
2419 _MathObject ** thePoly = (_MathObject**)theData;
2420 if (theIndex)
2421 for (long i = 0; i<lDim; i++) {
2422 if (IsNonEmpty(i)) {
2423 thePoly[i]->ScanForVariables(theReceptacle,inclG,tagger, weights);
2424 }
2425 }
2426 else
2427 for (long i = 0; i<lDim; i++) {
2428 if (thePoly[i]!=ZEROPOINTER) {
2429 thePoly[i]->ScanForVariables (theReceptacle,inclG,tagger, weights);
2430 }
2431 }
2432 }
2433
2434 }
2435
2436 //_____________________________________________________________________________________________
2437
IsConstant(void)2438 bool _Matrix::IsConstant(void)
2439 {
2440 if (storageType == 1) {
2441 return true;
2442 }
2443
2444 if (storageType == 2) { // a formula based matrix, there is stuff to do
2445 _Formula ** theFormulas = (_Formula**)theData;
2446 if (theIndex) {
2447 for (long i = 0; i<lDim; i++)
2448 if (IsNonEmpty(i) && !theFormulas[i]->IsConstant()) {
2449 return false;
2450 }
2451 } else
2452 for (long i = 0; i<lDim; i++)
2453 if (theFormulas[i]!=(_Formula*)ZEROPOINTER && !theFormulas[i]->IsConstant()) {
2454 return false;
2455 }
2456
2457 return true;
2458
2459 }
2460 return false;
2461 }
2462
2463 //_____________________________________________________________________________________________
2464
ProcessFormulas(long & stackLength,_AVLList & varList,_SimpleList & newFormulas,_SimpleList & references,_AVLListX & flaStrings,bool runAll,_Matrix * stencil)2465 bool _Matrix::ProcessFormulas (long& stackLength, _AVLList& varList, _SimpleList& newFormulas,
2466 _SimpleList& references, _AVLListX& flaStrings,
2467 bool runAll, _Matrix * stencil) {
2468 _Formula * thisFormula = nil;
2469 _Formula ** theFormulas = (_Formula**)theData;
2470
2471 bool isGood = true;
2472
2473 if (theIndex) {
2474 for (long i = 0L; i<lDim; i++) {
2475 long cellIndex = theIndex [i];
2476 if (cellIndex>-1) {
2477 if (stencil && CheckEqual(stencil->theData[cellIndex],0.0)) {
2478 references << -1;
2479 continue;
2480 }
2481 thisFormula = theFormulas[i];
2482
2483 if (runAll || thisFormula->AmISimple(stackLength,varList)) {
2484 _String * flaString = (_String*)thisFormula->toStr(kFormulaStringConversionNormal, nil,true);
2485 long fref = flaStrings.Insert(flaString,newFormulas.lLength);
2486 if (fref < 0) {
2487 references << flaStrings.GetXtra (-fref-1);
2488 DeleteObject (flaString);
2489 } else {
2490 newFormulas << (long)thisFormula;
2491 references << fref;
2492 }
2493
2494 } else {
2495 isGood = false;
2496 break;
2497 }
2498 } else {
2499 references << -1;
2500 }
2501 }
2502 } else {
2503 for (long i = 0L; i<lDim; i++) {
2504 if ((theFormulas[i]!=(_Formula*)ZEROPOINTER)&&(!theFormulas[i]->IsEmpty())) {
2505 thisFormula = theFormulas[i];
2506
2507 if (stencil && CheckEqual(stencil->theData[i],0.0)) {
2508 references << -1;
2509 continue;
2510 }
2511
2512 if (runAll || thisFormula->AmISimple(stackLength,varList)) {
2513 _String * flaString = (_String*)thisFormula->toStr(kFormulaStringConversionNormal, nil,true);
2514 long fref = flaStrings.Insert(flaString,newFormulas.lLength);
2515 if (fref < 0) {
2516 references << flaStrings.GetXtra (-fref-1);
2517 DeleteObject (flaString);
2518 } else {
2519 newFormulas << (long)thisFormula;
2520 references << fref;
2521 }
2522 } else {
2523 isGood = false;
2524 break;
2525 }
2526 } else {
2527 references << -1;
2528 }
2529 }
2530 }
2531 return isGood;
2532 }
2533
2534 //_____________________________________________________________________________________________
BranchLengthStencil(void) const2535 _Matrix* _Matrix::BranchLengthStencil (void) const {
2536
2537 _Matrix * stencil = (_Matrix*)hy_env::EnvVariableGet(hy_env::branch_length_stencil, MATRIX);
2538 if (stencil) {
2539 if (stencil->storageType == _NUMERICAL_TYPE && stencil->hDim==stencil->vDim && stencil->hDim == hDim) {
2540 stencil->CheckIfSparseEnough (true);
2541 } else {
2542 stencil = nil;
2543 }
2544 }
2545
2546 return stencil;
2547 }
2548
2549 //_____________________________________________________________________________________________
BranchLengthExpression(_Matrix * baseFreqs,bool mbf)2550 _String* _Matrix::BranchLengthExpression (_Matrix* baseFreqs, bool mbf) {
2551 if (storageType == _FORMULA_TYPE) {
2552
2553 long stack_length = 0L;
2554
2555 _SimpleList new_formulas,
2556 references;
2557
2558 _List converted_expressions;
2559 _AVLListX converted_expressions_avl(&converted_expressions);
2560 _Matrix* stencil = BranchLengthStencil();
2561
2562 print_digit_specification = hy_env::EnvVariableGetDefaultNumber(hy_env::print_float_digits);
2563
2564 _SimpleList varList = PopulateAndSort([&] (_AVLList & list) -> void {
2565 ProcessFormulas (stack_length,list,new_formulas,references,converted_expressions_avl,true,stencil);
2566 });
2567
2568 _StringBuffer * sendMeBack = new _StringBuffer(256L);
2569
2570 if (baseFreqs->is_numeric()) {
2571 // numerical base frequencies
2572 _Matrix multipliersByRate (new_formulas.countitems(),1,false,true);
2573
2574 ForEach([this, &multipliersByRate, &references, baseFreqs, mbf] (BaseRef object, unsigned long direct_index, unsigned long index) -> void {
2575 long this_ref = references.get (index);
2576 if (this_ref >= 0) {
2577 multipliersByRate.set(0,this_ref) += (*baseFreqs)(direct_index/vDim,0) *
2578 (mbf?(*baseFreqs)(direct_index%vDim,0):1.0);
2579 }
2580
2581 }, [] (unsigned long) -> BaseRef {return nil;});
2582
2583
2584 for (unsigned long k=0UL; k<new_formulas.countitems(); k++) {
2585 hyFloat this_multiplier = multipliersByRate (k, 0);
2586
2587 if (!CheckEqual(this_multiplier,0.0)) {
2588 if (sendMeBack->nonempty()) {
2589 (*sendMeBack) << '+';
2590 }
2591
2592 (*sendMeBack) << '('
2593 << (_String*)converted_expressions.GetItem(k)
2594 << ")*"
2595 << _String(this_multiplier, print_digit_specification);
2596
2597 }
2598 }
2599 } else if (baseFreqs->is_expression_based()) {
2600 // formula-based equilibrium frequencies
2601
2602 _List freqFla,
2603 multipliersByRate;
2604
2605 for (long k=0L; k<new_formulas.countitems(); k++) {
2606 multipliersByRate.AppendNewInstance(new _StringBuffer (128L));
2607 }
2608
2609 for (long k=0L; k<hDim; k++) {
2610 freqFla.AppendNewInstance ((_String*)baseFreqs->GetFormula(k,0)->toStr(kFormulaStringConversionNormal, nil,true));
2611 }
2612
2613 ForEach([this, &multipliersByRate, &references, baseFreqs, mbf, &freqFla] (BaseRef object, unsigned long direct_index, unsigned long index) -> void {
2614 long this_ref = references.get (index);
2615 if (this_ref >= 0L) {
2616 _StringBuffer * thisAdder = (_StringBuffer*)multipliersByRate(this_ref);
2617 if (thisAdder->nonempty()) {
2618 (*thisAdder) << '+';
2619 }
2620 (*thisAdder) << '(';
2621 if (mbf) {
2622 (*thisAdder) << (_String*)freqFla(direct_index%vDim)
2623 << ")*(";
2624 }
2625 (*thisAdder) << (_String*)freqFla(direct_index/vDim) << ')';
2626 }
2627 }, [] (unsigned long) -> BaseRef {return nil;});
2628
2629 for (long k=0L; k<new_formulas.countitems(); k++) {
2630 ((_StringBuffer*)multipliersByRate(k))->TrimSpace();
2631 if (k) {
2632 (*sendMeBack) << '+';
2633 }
2634
2635 (*sendMeBack) << '('
2636 << (_String*)converted_expressions.GetItem(k)
2637 << ")*("
2638 << (_String*)multipliersByRate(k)
2639 << ')';
2640 }
2641 }
2642 sendMeBack->TrimSpace();
2643 if (sendMeBack->nonempty()) {
2644 _Formula blF (*sendMeBack);
2645 _Polynomial* isPoly = (_Polynomial*)blF.ConstructPolynomial();
2646 if (isPoly) {
2647 DeleteObject (sendMeBack);
2648 sendMeBack = (_StringBuffer*)isPoly->toStr();
2649 }
2650 }
2651 return sendMeBack;
2652 }
2653 return new _String;
2654 }
2655
2656 //_____________________________________________________________________________________________
MakeMeSimple(void)2657 void _Matrix::MakeMeSimple (void) {
2658 if (is_expression_based()) {
2659 long stackLength = 0L;
2660
2661 _SimpleList newFormulas,
2662 references;
2663
2664 _List flaStringsL;
2665 _AVLListX flaStrings(&flaStringsL);
2666
2667
2668 _SimpleList varListAux;
2669 _AVLList varList (&varListAux);
2670
2671
2672 if (!is_dense()) {
2673 CompressSparseMatrix(false, (hyFloat*)alloca (sizeof (hyFloat) * lDim));
2674 }
2675 if (ProcessFormulas (stackLength,varList,newFormulas,references,flaStrings)) {
2676 storageType = _SIMPLE_FORMULA_TYPE;
2677
2678 cmd = new _CompiledMatrixData;
2679 cmd->has_volatile_entries = false;
2680
2681 for (unsigned long k = 0; k < newFormulas.lLength; k++) {
2682 cmd->has_volatile_entries = ((_Formula*)newFormulas.get(k))->ConvertToSimple(varList) || cmd->has_volatile_entries;
2683 }
2684
2685 cmd->varIndex.Duplicate (&varListAux);
2686 cmd->theStack = (_SimpleFormulaDatum*)MatrixMemAllocate (stackLength*sizeof(_SimpleFormulaDatum));
2687 cmd->varValues = (_SimpleFormulaDatum*)MatrixMemAllocate ((cmd->varIndex.countitems()>0?varList.countitems():1)*sizeof(_SimpleFormulaDatum));
2688 long allocation_size = MAX (references.lLength, 1) * sizeof (long);
2689 cmd->formulaRefs = (long*)MemAllocate (allocation_size);
2690 memcpy (cmd->formulaRefs, references.list_data, allocation_size);
2691 cmd->formulaValues = new hyFloat [newFormulas.lLength];
2692 cmd->formulasToEval.Duplicate (&newFormulas);
2693 }
2694
2695 }
2696 }
2697 //_____________________________________________________________________________________________
MakeMeGeneral(void)2698 void _Matrix::MakeMeGeneral (void) {
2699 if (storageType == _SIMPLE_FORMULA_TYPE) {
2700 for (long k = 0L; k < cmd->formulasToEval.lLength; k++) {
2701 ((_Formula*)cmd->formulasToEval.list_data[k])->ConvertFromSimpleList(cmd->varIndex);
2702 }
2703
2704 delete [] cmd->formulaValues;
2705 free (cmd->formulaRefs);
2706
2707 MatrixMemFree (cmd->theStack);
2708 MatrixMemFree (cmd->varValues);
2709 delete (cmd);
2710 cmd = nil;
2711 storageType = _FORMULA_TYPE;
2712 }
2713 }
2714 //_____________________________________________________________________________________________
Evaluate(bool replace)2715 HBLObjectRef _Matrix::Evaluate (bool replace)
2716 // evaluate the matrix overwriting (or not) the old one
2717 {
2718 _Matrix result (hDim, vDim, bool (theIndex), true);
2719
2720 if (storageType == 2) {
2721 HBLObjectRef formValue = nil;
2722 _Formula ** theFormulas = (_Formula**)theData;
2723 if (theIndex) {
2724 for (long i = 0; i<lDim; i++) {
2725 //long k =
2726 if (theIndex[i]!=-1) {
2727 formValue = theFormulas[i]->Compute();
2728 if (formValue) {
2729 result[HashBack(i)] = formValue->Value();
2730 //DeleteObject (formValue);
2731 } else {
2732 result[HashBack(i)] = 0;
2733 }
2734 }
2735 }
2736 // check for probablilty matrices * fillers
2737 if ((hDim==vDim)&&(!replace))
2738 for (long i = 0; i<hDim; i++) {
2739 long k = Hash(i,i);
2740 if ((k>=0)&&theFormulas[k]->IsEmpty()) {
2741 hyFloat *st = &result[k];
2742 *st=0;
2743 for (long j = 0; j<vDim; j++) {
2744 if (j==i) {
2745 continue;
2746 }
2747 *st-=result(i,j);
2748 }
2749 } else if (k<0) {
2750 hyFloat *st = &result[i*vDim+i];
2751 *st=0;
2752 for (long j = 0; j<vDim; j++) {
2753 if (j==i) {
2754 continue;
2755 }
2756 *st-=result(i,j);
2757 }
2758 }
2759 }
2760 } else {
2761 for (long i = 0; i<lDim; i++) {
2762 if (theFormulas[i]!=(_Formula*)ZEROPOINTER) {
2763 formValue = theFormulas[i]->Compute();
2764 if (formValue && formValue->ObjectClass() == NUMBER) {
2765 result.theData[i] = formValue->Value();
2766 //DeleteObject (formValue);
2767 } else {
2768 result.theData[i] = 0;
2769 }
2770 }
2771 }
2772 // check for probablilty matrices * fillers
2773
2774 if ((hDim==vDim)&&(!replace))
2775 for (long i = 0; i<lDim; i+=vDim+1) {
2776 if (theFormulas[i]!=(_Formula*)ZEROPOINTER) {
2777 if (theFormulas[i]->IsEmpty()) {
2778 hyFloat st = 0;
2779 long k = i/vDim,j;
2780 for (j = k*vDim; j<k*vDim+k; j++) {
2781 st-=result.theData[j];
2782 }
2783 for (j = k*vDim+k+1; j<(k+1)*vDim; j++) {
2784 st-=result.theData[j];
2785 }
2786 result.theData[i] = st;
2787 }
2788 }
2789 }
2790 }
2791 }
2792 if (storageType == 0) {
2793 HBLObjectRef polValue = nil;
2794 _MathObject ** thePoly = (_MathObject**)theData;
2795 if (theIndex) {
2796 for (long i = 0; i<lDim; i++) {
2797 if (IsNonEmpty(i)) {
2798 polValue = thePoly[i]->Compute();
2799 if (polValue) {
2800 result[HashBack(i)] = polValue->Value();
2801 DeleteObject (polValue);
2802 } else {
2803 result[i] = 0;
2804 }
2805 }
2806 }
2807
2808 } else {
2809 for (long i = 0; i<lDim; i++) {
2810 if (thePoly[i]!=(_MathObject*)ZEROPOINTER) {
2811 polValue = thePoly[i]->Compute();
2812 if (polValue) {
2813 result[i] = polValue->Value();
2814 DeleteObject (polValue);
2815 } else {
2816 result[i] = 0;
2817 }
2818 }
2819 }
2820 }
2821 }
2822 if (replace) {
2823 *this = result;
2824 } else {
2825 return (HBLObjectRef)result.makeDynamic();
2826 }
2827 return nil;
2828 }
2829
2830 //_____________________________________________________________________________________________
ConvertToSimpleList(_SimpleList & sl)2831 void _Matrix::ConvertToSimpleList (_SimpleList & sl)
2832 {
2833 sl.Clear();
2834 if (storageType == _NUMERICAL_TYPE) {
2835 sl.RequestSpace (hDim*vDim+1);
2836
2837 for (long i=0; i<hDim; i++)
2838 for (long j=0; j<vDim; j++) {
2839 sl << (*this)(i,j);
2840 }
2841 } else {
2842 if (storageType == _FORMULA_TYPE) {
2843 _Matrix * c = ((_Matrix*)Compute());
2844 if (c->storageType == _NUMERICAL_TYPE) {
2845 c -> ConvertToSimpleList (sl);
2846 }
2847 }
2848 }
2849 }
2850
2851 //_____________________________________________________________________________________________
IsAStringMatrix(void) const2852 bool _Matrix::IsAStringMatrix (void) const
2853 // check if a formula matrix contains strings
2854 {
2855 if (is_expression_based()) {
2856 try {
2857 return Any ([&] (_Formula * f, unsigned long) -> bool {
2858 if (f) {
2859 if (f->ObjectClass() == STRING)
2860 return true;
2861 throw (0);
2862 }
2863 return false;
2864 },
2865 [&] (unsigned long i) -> _Formula * {return ((_Formula**)theData)[i];});
2866
2867 } catch (int ) {
2868 return false;
2869 }
2870
2871 }
2872 return false;
2873 }
2874
2875 //_____________________________________________________________________________________________
FillInList(_List & fillMe,bool convert_numbers) const2876 void _Matrix::FillInList (_List& fillMe, bool convert_numbers) const {
2877 // check if a formula matrix contains strings
2878 if (is_expression_based()) {
2879 for (unsigned long r=0UL; r<hDim; r++)
2880 for (unsigned long c=0UL; c<vDim; c++) {
2881 _Formula * entryFla = GetFormula(r,c);
2882 if (entryFla) {
2883 HBLObjectRef computedValue = FetchObjectFromFormulaByType (*entryFla, STRING);
2884 if (computedValue) {
2885 fillMe < new _StringBuffer (((_FString*)computedValue)->get_str());
2886 } else {
2887 fillMe.Clear();
2888 return;
2889 }
2890 }
2891 }
2892 } else {
2893 if (convert_numbers && is_numeric()) {
2894 for (unsigned long r=0UL; r<hDim; r++) {
2895 for (unsigned long c=0UL; c<vDim; c++) {
2896 fillMe.AppendNewInstance (new _String ((*this)(r,c)));
2897 }
2898 }
2899 }
2900 }
2901 }
2902
2903 //_____________________________________________________________________________________________
_validateCompressedStorage(void) const2904 bool _Matrix::_validateCompressedStorage (void) const {
2905 if (theIndex && compressedIndex) {
2906 long from = 0L;
2907 long last_index = -1L;
2908 for (long r = 0; r < hDim; r++) {
2909 if (compressedIndex[r] < from || compressedIndex[r] > lDim ) {
2910 HandleApplicationError(_String ("Inconsistent compressedIndex row element count at " ) & r & " : " & from & " vs " & compressedIndex[r]);
2911 return false;
2912 }
2913 for (long c = from; c < compressedIndex[r]; c++) {
2914 long myIndex = theIndex[c];
2915 if (myIndex <= last_index) {
2916 HandleApplicationError(_String ("Lack of sortedness in theIndex at " ) & c & " : " & myIndex & " vs " & last_index);
2917 return false;
2918 }
2919 if (myIndex < 0 || myIndex >= hDim * vDim) {
2920 HandleApplicationError(_String ("Out of bounds in theIndex at " ) & c & " : " & myIndex & ", lDim = " & lDim);
2921 return false;
2922 }
2923 last_index = myIndex;
2924 if (c > from) {
2925 if (compressedIndex[c+hDim] <= compressedIndex [c-1+hDim]) {
2926 HandleApplicationError(_String ("Lack of sortedness in columns at " ) & c & " : " & compressedIndex[c+hDim] & " vs " & compressedIndex[c+hDim-1]);
2927 return false;
2928 }
2929 }
2930 if (myIndex / vDim != r || myIndex % hDim != compressedIndex[c+vDim]) {
2931 HandleApplicationError(_String ("Stored index does match row/column " ) & myIndex & "(" & c & ") : " & r & " , " & compressedIndex[c+hDim]);
2932 return false;
2933 }
2934 }
2935 from = compressedIndex[r];
2936 }
2937
2938 if (compressedIndex[hDim-1] != lDim) {
2939 HandleApplicationError(_String ("Incompatible compressedIndex[hDim-1] and lDim: " ) & compressedIndex[hDim-1] & " : " & lDim);
2940 return false;
2941 }
2942
2943 return true;
2944 }
2945 return false;
2946 }
2947
2948 //_____________________________________________________________________________________________
EvaluateSimple(_Matrix * existing_storage)2949 HBLObjectRef _Matrix::EvaluateSimple (_Matrix* existing_storage) {
2950 // evaluate the matrix overwriting the old one
2951 _Matrix * result;
2952
2953 if (existing_storage && existing_storage->hDim == hDim && existing_storage->vDim == vDim && existing_storage->is_numeric() && ((bool)existing_storage->theIndex == (bool)theIndex)) {
2954 existing_storage->ZeroNumericMatrix();
2955 result = existing_storage;
2956 } else {
2957 if (existing_storage) {
2958 DeleteObject (existing_storage);
2959 }
2960 result = new _Matrix (hDim, vDim, bool (theIndex), true);
2961 }
2962
2963
2964 if (cmd->varIndex.lLength) {
2965 for (long i=0; i<cmd->varIndex.lLength; i++) {
2966 _Variable* curVar = LocateVar(cmd->varIndex.list_data[i]);
2967 if (curVar->ObjectClass () != MATRIX) {
2968 if (curVar->IsIndependent()) {
2969 cmd->varValues[i].value = LocateVar (cmd->varIndex.list_data[i])->Value();
2970 } else {
2971 cmd->varValues[i].value = LocateVar (cmd->varIndex.list_data[i])->Compute()->Value();
2972 }
2973 } else {
2974 cmd->varValues[i].reference = (hyPointer)((_Matrix*)LocateVar (cmd->varIndex.list_data[i])->Compute())->theData;
2975 }
2976 }
2977 }
2978
2979
2980 for (long f = 0L; f < cmd->formulasToEval.lLength; f++) {
2981 cmd->formulaValues [f] = ((_Formula*)cmd->formulasToEval.list_data[f])->ComputeSimple(cmd->theStack, cmd->varValues);
2982 }
2983
2984 long * fidx = cmd->formulaRefs;
2985
2986 if (theIndex) {
2987
2988 result->bufferPerRow = bufferPerRow;
2989 result->overflowBuffer = overflowBuffer;
2990 result->allocationBlock = allocationBlock;
2991
2992 long* diagIndices = nil;
2993
2994 if (compressedIndex) {
2995 if (result->lDim < lDim + hDim) {
2996 result->theIndex = (long*)MemReallocate((hyPointer)result->theIndex,sizeof(long)*(lDim + hDim));
2997 result->theData = (hyFloat*)MemReallocate ((hyPointer)result->theData,sizeof(hyFloat)*(lDim+hDim));
2998
2999 }
3000
3001 result->lDim = lDim;
3002
3003 if (result->compressedIndex) {
3004 result->compressedIndex = (long*)MemReallocate ((hyPointer)result->compressedIndex,sizeof(long)*(lDim+hDim+hDim));
3005 } else {
3006 result->compressedIndex = (long*)MemAllocate (sizeof(long)*(lDim+hDim+hDim));
3007 }
3008
3009 if (hDim == vDim) {
3010
3011 long elements_added = 0L;
3012 long current_element_index_old_matrix = 0L;
3013 long current_element_index_new_matrix = 0L;
3014 long from = 0L;
3015 auto copy_indices = [&] () -> void {
3016 result->theIndex[current_element_index_new_matrix] = theIndex[current_element_index_old_matrix];
3017 result->compressedIndex[current_element_index_new_matrix+hDim] = compressedIndex[hDim+current_element_index_old_matrix];
3018 };
3019 diagIndices = (long*)alloca (sizeof (long) * hDim);
3020 auto inject_diagonal = [&] (long r) -> void {
3021 elements_added++;
3022 result->lDim ++;
3023 diagIndices [r] = current_element_index_new_matrix;
3024 result->theIndex[current_element_index_new_matrix] = r*vDim + r;
3025 result->theData[current_element_index_new_matrix] = 0.;
3026 result->compressedIndex[current_element_index_new_matrix+hDim] = r;
3027 current_element_index_new_matrix++;
3028 };
3029 /*if (!_validateCompressedStorage()) {
3030 HandleApplicationError("Error in compressed storage [before]");
3031 }*/
3032 for (long r = 0; r < hDim; r++) {
3033 diagIndices[r] = -1L;
3034 for (long c = from; c < compressedIndex[r]; c++, current_element_index_old_matrix++, current_element_index_new_matrix++) {
3035 if (compressedIndex[c + hDim] < r) { // column before diagonal; copy data
3036 result->theData[current_element_index_new_matrix] = cmd->formulaValues[fidx[current_element_index_old_matrix]];
3037 copy_indices();
3038 } else if (compressedIndex[c + hDim] > r) {
3039 if (diagIndices[r] == -1) { // no diagonal entry
3040 inject_diagonal(r);
3041 }
3042 result->theData[current_element_index_new_matrix] = cmd->formulaValues[fidx[current_element_index_old_matrix]];
3043 copy_indices();
3044 } else { // diagnoal entry
3045 copy_indices();
3046 diagIndices[r] = current_element_index_new_matrix;
3047 }
3048 }
3049 if (diagIndices[r] == -1) { // no diagonal entry
3050 inject_diagonal(r);
3051 }
3052 from = compressedIndex[r];
3053 result->compressedIndex[r] = from+elements_added;
3054
3055 }
3056 /*if (!result->_validateCompressedStorage()) {
3057 HandleApplicationError("Error in compressed storage");
3058 }*/
3059 } else {
3060 for (long i = 0; i<lDim; i++) {
3061 long idx = theIndex[i];
3062 result->theData[i] = cmd->formulaValues[fidx[i]];
3063 result->theIndex[i] = idx;
3064 result->compressedIndex[i] = compressedIndex[i];
3065 }
3066 for (long i = lDim; i<lDim+hDim; i++) {
3067 result->compressedIndex[i] = compressedIndex[i];
3068 }
3069 }
3070
3071 } else {
3072
3073 if (result->lDim != lDim) {
3074 result->lDim = lDim;
3075 result->theIndex = (long*)MemReallocate((hyPointer)result->theIndex,sizeof(long)*lDim);
3076 result->theData = (hyFloat*)MemReallocate ((hyPointer)result->theData,sizeof(hyFloat)*lDim);
3077 }
3078
3079 for (long i = 0; i<lDim; i++) {
3080 long idx = theIndex[i];
3081
3082 if (idx != -1) {
3083 result->theData[i] = cmd->formulaValues[fidx[i]];
3084 }
3085
3086 result->theIndex[i] = idx;
3087 }
3088 }
3089
3090
3091 if (hDim==vDim) {
3092
3093 if (result->compressedIndex) {
3094 long from = 0L;
3095 for (long r = 0; r < hDim; r++) {
3096 //printf ("%ld\n", diagIndices[r]);
3097 long di = diagIndices[r];
3098 for (long c = from; c < result->compressedIndex[r]; c++) {
3099 //printf ("%ld %g\n", c, result->theData[c]);
3100 if (c != di) {
3101 result->theData[di] -= result->theData[c];
3102 }
3103 }
3104 from = result->compressedIndex[r];
3105 }
3106 /*for (long r = 0; r < hDim; r++) {
3107 printf ("%ld %g\n", diagIndices[r], result->theData[diagIndices[r]]);
3108 }
3109 exit (0);*/
3110 }
3111 else {
3112 hyFloat* diagStorage = (hyFloat*)alloca (sizeof(hyFloat) * hDim);
3113 memset (diagStorage, 0, sizeof(hyFloat) * hDim);
3114 for (long i = 0; i<lDim; i++) {
3115 long k = result->theIndex[i];
3116 if (k!=-1) {
3117 diagStorage[k/hDim] += result->theData[i];
3118 }
3119 }
3120 for (long i = 0; i<hDim; i++) {
3121 (*result)[i*hDim+i] = -diagStorage[i];
3122 }
3123 }
3124
3125
3126 }
3127 } else {
3128
3129 for (long i = 0; i<lDim; i++) {
3130 if (fidx[i]>= 0) {
3131 result->theData[i] = cmd->formulaValues[fidx[i]];
3132 }
3133 }
3134
3135 if (hDim==vDim)
3136 for (long i = 0L, r = 0L; i<lDim; i+=vDim+1L, r++) {
3137 if (fidx[i] < 0) { // mod Aug 2 2005
3138 //if (theFormulas[i]->IsEmpty())
3139 //{
3140
3141 hyFloat st = 0.;
3142 long j;
3143
3144 for (j = r*vDim; j<r*vDim+r; j++) {
3145 st += result->theData[j];
3146 }
3147
3148 for (j = r*vDim+r+1; j<(r+1)*vDim; j++) {
3149 st += result->theData[j];
3150 }
3151
3152 result->theData[i] = -st;
3153 //}
3154 }
3155 }
3156 }
3157 //return (_PMathObj)result.makeDynamic();
3158 return result;
3159 }
3160 //_____________________________________________________________________________________________
ClearFormulae(void)3161 void _Matrix::ClearFormulae (void)
3162 {
3163 _Formula ** theFormulas = (_Formula**)theData;
3164 if (theIndex) {
3165 for (long i = 0; i<lDim; i++) {
3166 if (IsNonEmpty(i)) {
3167 delete (theFormulas[i]);
3168 }
3169 }
3170 } else
3171 for (long i = 0; i<lDim; i++) {
3172 if (theFormulas[i]!=(_Formula*)ZEROPOINTER) {
3173 delete (theFormulas[i]);
3174 }
3175 }
3176 }
3177
3178 //_____________________________________________________________________________________________
ClearObjects(void)3179 void _Matrix::ClearObjects (void)
3180 {
3181 _MathObject ** thePolys = (_MathObject**)theData;
3182 if (theIndex) {
3183 for (long i = 0; i<lDim; i++) {
3184 if (IsNonEmpty(i)) {
3185 DeleteObject (thePolys[i]);
3186 }
3187 }
3188 } else
3189 for (long i = 0; i<lDim; i++) {
3190 if (thePolys[i]!=(_MathObject*)ZEROPOINTER) {
3191 DeleteObject (thePolys[i]);
3192 }
3193 }
3194 }
3195
3196 //_____________________________________________________________________________________________
3197
Clear(bool complete)3198 void _Matrix::Clear (bool complete) {
3199 DeleteObject (theValue);
3200 if (is_expression_based()) { // has formulas in it - must delete
3201 ClearFormulae();
3202 }
3203 if (is_polynomial()) { // has objects in it - must delete
3204 ClearObjects();
3205 }
3206
3207 if (theIndex) {
3208 if (complete) {
3209 MatrixMemFree (theIndex);
3210 theIndex = nil;
3211 } else {
3212 InitializeArray(theIndex, lDim, -1L);
3213 }
3214 }
3215 if (theData) {
3216 if (complete) {
3217 MatrixMemFree (theData);
3218 hDim = vDim = 0;
3219 theData = nil;
3220 } else {
3221 memset (theData, 0, lDim * (is_numeric() ? sizeof (hyFloat): sizeof (void*)));
3222 }
3223 }
3224 if (compressedIndex) {
3225 MatrixMemFree (compressedIndex);
3226 compressedIndex = nil;
3227 }
3228
3229 }
3230
3231 //_____________________________________________________________________________________________
3232
ZeroNumericMatrix(void)3233 void _Matrix::ZeroNumericMatrix (void) {
3234 if (is_numeric()) {
3235 memset (theData, 0, sizeof (hyFloat) * lDim);
3236 if (!is_dense()) {
3237 InitializeArray (theIndex, lDim, -1L);
3238 }
3239 }
3240 }
3241
3242 //_____________________________________________________________________________________________
3243
Resize(long newH)3244 void _Matrix::Resize (long newH) {
3245 if (newH >= 0 && newH != hDim && is_numeric() && is_dense()) {
3246 hDim = newH;
3247 lDim = newH*vDim;
3248
3249 if (theData) {
3250 theData = (hyFloat*) MemReallocate ((hyPointer)theData,sizeof (hyFloat)*lDim);
3251 } else {
3252 theData = (hyFloat*) MemAllocate (sizeof (hyFloat)*lDim);
3253 }
3254 }
3255 }
3256
3257 //_____________________________________________________________________________________________
3258
~_Matrix(void)3259 _Matrix::~_Matrix (void) {
3260 _Matrix::Clear();
3261 }
3262
3263 //_____________________________________________________________________________________________
3264
operator =(_Matrix const & m)3265 _Matrix const& _Matrix::operator = (_Matrix const& m) {
3266 // SLKP 20180917 : reuse memory if copying dense numeric matrices of the same dimension
3267 if (m.is_numeric() && is_numeric() && CanFreeMe() && m.theIndex == nil && theIndex == nil && m.GetHDim () == GetHDim () && GetVDim () == m.GetVDim()) {
3268 unsigned long i = 0UL;
3269 for (unsigned long r = 0UL; r < hDim; r++) {
3270 for (unsigned long c = 0UL; c < vDim; c++, i++) {
3271 theData[i] = m.theData[i];
3272 }
3273 }
3274 } else {
3275 Clear();
3276 DuplicateMatrix (this, &m);
3277 }
3278 return *this;
3279 }
3280
3281 //_____________________________________________________________________________________________
3282
operator =(_Matrix const * m)3283 _Matrix const& _Matrix::operator = (_Matrix const* m) {
3284 //Clear();
3285 //DuplicateMatrix (this, m);
3286 *this = *m;
3287 return *this;
3288 }
3289
3290
3291 //_____________________________________________________________________________________________
AbsValue(void) const3292 hyFloat _Matrix::AbsValue (void) const{
3293 if (is_numeric() && (is_row() || is_column())) {
3294 hyFloat norm = 0.;
3295
3296 this->ForEach ([&] (hyFloat&& value, unsigned long, long) -> void {norm += value * value;},
3297 [&] (unsigned long index) -> hyFloat {return theData[index];});
3298
3299 return sqrt(norm);
3300 }
3301
3302 return 0.;
3303 }
3304
3305 //_____________________________________________________________________________________________
Abs(HBLObjectRef cache)3306 HBLObjectRef _Matrix::Abs (HBLObjectRef cache)
3307 {
3308 if (storageType == 1 && (hDim==1 || vDim == 1)) {
3309 return _returnConstantOrUseCache(AbsValue(), cache);
3310 }
3311 return _returnConstantOrUseCache(MaxElement(), cache);
3312
3313 }
3314
3315 //_____________________________________________________________________________________________
3316
AddMatrix(_Matrix & storage,_Matrix & secondArg,bool subtract)3317 void _Matrix::AddMatrix (_Matrix& storage, _Matrix& secondArg, bool subtract)
3318 // addition operation on matrices
3319 // internal function
3320
3321 {
3322
3323 // check matrix dimensions to ensure that they are addable
3324 if (!((hDim==secondArg.hDim)&&(storage.hDim==secondArg.hDim)&&(vDim==secondArg.vDim)&&(storage.vDim==secondArg.vDim))) {
3325 HandleApplicationError (_String ("Incompatible dimensions when trying to add or subtract matrices: first argument was a ") & _String (hDim) & 'x'
3326 & _String (vDim) & " matrix and the second was a "& _String (secondArg.hDim) & 'x' & _String (secondArg.vDim) & " matrix.");
3327 return;
3328 }
3329
3330 if (is_numeric()) {
3331 if (&storage != this) { // not an add&store operation
3332 // copy *this to storage
3333 if (theIndex) { //sparse matrix
3334 for (long i = 0; i<lDim; i++) {
3335 long k = theIndex[i];
3336 if (k!=-1) {
3337 storage[k] = theData[i];
3338 }
3339 }
3340 } else { // dense matrix
3341 memcpy (storage.theData, theData, sizeof (hyFloat)*lDim);
3342 }
3343 }
3344
3345 if (secondArg.theIndex) { //sparse matrix
3346 if (storage.theIndex) {
3347 if (subtract) {
3348 for (long i = 0; i<secondArg.lDim; i++) {
3349 long k = secondArg.theIndex[i];
3350 if (k!=-1) {
3351 storage[k]-=secondArg.theData[i];
3352 }
3353 }
3354 } else {
3355 for (long i = 0; i<secondArg.lDim; i++) {
3356 long k = secondArg.theIndex[i];
3357 if (k!=-1) {
3358 storage[k]+=secondArg.theData[i];
3359 }
3360 }
3361 }
3362 } else {
3363 if (subtract) {
3364 for (long i = 0; i<secondArg.lDim; i++) {
3365 long k = secondArg.theIndex[i];
3366 if (k!=-1) {
3367 storage.theData[k]-=secondArg.theData[i];
3368 }
3369 }
3370 } else {
3371 for (long i = 0; i<secondArg.lDim; i++) {
3372 long k = secondArg.theIndex[i];
3373 if (k!=-1) {
3374 storage.theData[k]+=secondArg.theData[i];
3375 }
3376 }
3377 }
3378 }
3379
3380 } else {
3381 hyFloat * _hprestrict_ argData = secondArg.theData;
3382 hyFloat * _hprestrict_ stData = storage.theData;
3383
3384 long upto = secondArg.lDim >> 4 << 4;
3385
3386 if (subtract) {
3387 #ifdef _SLKP_USE_AVX_INTRINSICS
3388 #define CELL_OP1(x,y) __m256d y = _mm256_sub_pd (_mm256_loadu_pd (stData+x), _mm256_loadu_pd (argData+x))
3389 #define CELL_OP2(x,y) _mm256_storeu_pd (stData+x,y)
3390
3391 #pragma GCC unroll 4
3392 #pragma clang loop vectorize(enable)
3393 #pragma clang loop interleave(enable)
3394 #pragma clang loop unroll(enable)
3395 #pragma GCC ivdep
3396 #pragma ivdep
3397 for (long idx = 0; idx < upto; idx+=16) {
3398 CELL_OP1 (idx,r1);
3399 CELL_OP1 (idx+4,r2);
3400 CELL_OP1 (idx+8,r3);
3401 CELL_OP1 (idx+12,r4);
3402 CELL_OP2 (idx,r1);
3403 CELL_OP2 (idx+4,r2);
3404 CELL_OP2 (idx+8,r3);
3405 CELL_OP2 (idx+12,r4);
3406 }
3407 #elif defined _SLKP_USE_ARM_NEON
3408 #define CELL_OP1(x,y) float64x2_t y = vsubq_f64 (vld1q_f64 (stData+x), vld1q_f64 (argData+x))
3409 #define CELL_OP2(x,y) vst1q_f64 (stData+x,y)
3410 #pragma GCC unroll 4
3411 #pragma clang loop vectorize(enable)
3412 #pragma clang loop interleave(enable)
3413 #pragma clang loop unroll(enable)
3414 #pragma GCC ivdep
3415 #pragma ivdep
3416 for (long idx = 0; idx < upto; idx+=8) {
3417 CELL_OP1 (idx,r1);
3418 CELL_OP1 (idx+2,r2);
3419 CELL_OP1 (idx+4,r3);
3420 CELL_OP1 (idx+6,r4);
3421 CELL_OP2 (idx,r1);
3422 CELL_OP2 (idx+2,r2);
3423 CELL_OP2 (idx+4,r3);
3424 CELL_OP2 (idx+6,r4);
3425 }
3426
3427 #else
3428 for (long idx = 0; idx < upto; idx+=4) {
3429 stData[idx]-=argData[idx];
3430 stData[idx+1]-=argData[idx+1];
3431 stData[idx+2]-=argData[idx+2];
3432 stData[idx+3]-=argData[idx+3];
3433 }
3434 #endif
3435 } else {
3436 #ifdef _SLKP_USE_AVX_INTRINSICS
3437 #define CELL_OP(x) _mm256_storeu_pd (stData+x, _mm256_add_pd (_mm256_loadu_pd (stData+x), _mm256_loadu_pd (argData+x)))
3438
3439
3440 #pragma GCC unroll 4
3441 #pragma clang loop vectorize(enable)
3442 #pragma clang loop interleave(enable)
3443 #pragma clang loop unroll(enable)
3444 for (long idx = 0; idx < upto; idx+=16) {
3445 CELL_OP (idx);
3446 CELL_OP (idx+4);
3447 CELL_OP (idx+8);
3448 CELL_OP (idx+12);
3449 }
3450
3451 #elif defined _SLKP_USE_ARM_NEON
3452 #define CELL_OP1(x,y) float64x2_t y = vaddq_f64 (vld1q_f64 (stData+x), vld1q_f64 (argData+x))
3453 #define CELL_OP2(x,y) vst1q_f64 (stData+x,y)
3454 #pragma GCC unroll 4
3455 #pragma clang loop vectorize(enable)
3456 #pragma clang loop interleave(enable)
3457 #pragma clang loop unroll(enable)
3458 #pragma GCC ivdep
3459 #pragma ivdep
3460 for (long idx = 0; idx < upto; idx+=8) {
3461 CELL_OP1 (idx,r1);
3462 CELL_OP1 (idx+2,r2);
3463 CELL_OP1 (idx+4,r3);
3464 CELL_OP1 (idx+6,r4);
3465 CELL_OP2 (idx,r1);
3466 CELL_OP2 (idx+2,r2);
3467 CELL_OP2 (idx+4,r3);
3468 CELL_OP2 (idx+6,r4);
3469 }
3470
3471 #else
3472 for (long idx = 0; idx < upto; idx+=4) {
3473 stData[idx]+=argData[idx];
3474 stData[idx+1]+=argData[idx+1];
3475 stData[idx+2]+=argData[idx+2];
3476 stData[idx+3]+=argData[idx+3];
3477 }
3478 #endif
3479 }
3480 if (subtract)
3481 for (long idx = upto; idx < secondArg.lDim; idx++) {
3482 stData[idx]-=argData[idx];
3483 }
3484 else
3485 for (long idx = upto; idx < secondArg.lDim; idx++) {
3486 stData[idx]+=argData[idx];
3487 }
3488
3489 }
3490 } else
3491
3492 if (storageType == 0) {
3493 long i;
3494 if (&storage != this) { // not an add&store operation
3495 /* if (theIndex) //sparse matrix
3496 {
3497 for (i = 0; i<lDim; i++)
3498 if (IsNonEmpty(i))
3499 storage.StoreObject(theIndex[i],GetMatrixObject(i),true);
3500 }
3501 else // normal matrix
3502 {
3503 for (i = 0; i<lDim; i++)
3504 storage.StoreObject(i,GetMatrixObject(i),true);
3505 }*/
3506 }
3507
3508 if (secondArg.theIndex) { //sparse matrix
3509 if (theIndex) { // both matrices are sparse
3510 if (subtract) {
3511 for (i = 0; i<secondArg.lDim; i++)
3512 if (secondArg.IsNonEmpty(i)) {
3513 long hb =secondArg.HashBack (i), h = Hash (hb/vDim, hb%vDim);
3514 if (h<0) { // kEmptyString slot in matrix 1
3515 storage.StoreObject (hb,secondArg.GetMatrixObject(i)->Minus());
3516 } else {
3517 storage.StoreObject (hb, GetMatrixObject(h)->Sub (secondArg.GetMatrixObject(i)));
3518 }
3519 }
3520 } else {
3521 for (i = 0; i<secondArg.lDim; i++)
3522 if (secondArg.IsNonEmpty(i)) {
3523 long hb =secondArg.HashBack (i), h = Hash (hb/vDim, hb%vDim);
3524 if (h<0) { // kEmptyString slot in matrix 1
3525 storage.StoreObject (hb,secondArg.GetMatrixObject(i),true);
3526 } else {
3527 storage.StoreObject (hb,GetMatrixObject(h)->Add (secondArg.GetMatrixObject(i)));
3528 }
3529 }
3530 }
3531 } else { // *this is not sparse
3532 DuplicateMatrix(&storage,this);
3533 if (subtract) {
3534 for (i = 0; i<secondArg.lDim; i++)
3535 if (secondArg.IsNonEmpty(i)) {
3536 long p = secondArg.HashBack (i);
3537 if (CheckObject(p)) {
3538 storage.StoreObject (p,GetMatrixObject(p)->Sub(secondArg.GetMatrixObject(i)));
3539 } else {
3540 storage.StoreObject (p,secondArg.GetMatrixObject(i)->Minus());
3541 }
3542 }
3543 } else {
3544 for (i = 0; i<secondArg.lDim; i++)
3545 if (secondArg.IsNonEmpty(i)) {
3546 long p = secondArg.HashBack (i);
3547 if (CheckObject(p)) {
3548 storage.StoreObject (p,GetMatrixObject(p)->Add(secondArg.GetMatrixObject(i)));
3549 } else {
3550 storage.StoreObject (p,secondArg.GetMatrixObject(i),true);
3551 }
3552 }
3553 }
3554 }
3555 } else { // secondarg isn't sparse - storage must also be non-sparse
3556 if (storage.theIndex) { // storage is sparse - oops
3557 storage.CheckIfSparseEnough(true); // force to non-sparse storage
3558 }
3559 if (!theIndex) { // * this is not sparse
3560 HBLObjectRef tempP;
3561 DuplicateMatrix(&storage,this);
3562 if (subtract) {
3563 for (i = 0; i<secondArg.lDim; i++) {
3564 tempP = secondArg.GetMatrixObject(i);
3565 if (tempP) {
3566 if (CheckObject(i)) {
3567 storage.StoreObject(i,GetMatrixObject(i)->Sub(tempP));
3568 } else {
3569 storage.StoreObject(i,tempP->Minus());
3570 }
3571 }
3572 }
3573 } else {
3574 for (i = 0; i<secondArg.lDim; i++) {
3575 tempP = secondArg.GetMatrixObject(i);
3576 if (tempP) {
3577 if (CheckObject(i)) {
3578 storage.StoreObject(i,GetMatrixObject(i)->Add(tempP));
3579 } else {
3580 storage.StoreObject(i,tempP,true);
3581 }
3582 }
3583 }
3584 }
3585 } else { // *this is sparse
3586 HBLObjectRef tempP;
3587 long h;
3588 if (subtract) {
3589 for (i = 0; i<secondArg.lDim; i++) {
3590 tempP = secondArg.GetMatrixObject(i);
3591 if (tempP) {
3592 h = Hash (i/hDim,i%hDim);
3593 if (h>=0) {
3594 storage.StoreObject(i,GetMatrixObject(h)->Sub(tempP));
3595 } else {
3596 storage.StoreObject(i,tempP->Minus());
3597 }
3598 }
3599 }
3600 } else {
3601 for (i = 0; i<secondArg.lDim; i++) {
3602 tempP = secondArg.GetMatrixObject(i);
3603 if (tempP) {
3604 h = Hash (i/hDim,i%hDim);
3605 if (h>=0) {
3606 storage.StoreObject(i,GetMatrixObject(h)->Add(tempP));
3607 } else {
3608 storage.StoreObject(i,tempP,true);
3609 }
3610 }
3611 }
3612 }
3613 }
3614 }
3615 }
3616 if (storage.theIndex) {
3617 storage.CheckIfSparseEnough();
3618 }
3619 }
3620
3621 //_____________________________________________________________________________________________
3622
AddWithThreshold(_Matrix & secondArg,hyFloat prec)3623 bool _Matrix::AddWithThreshold (_Matrix& secondArg, hyFloat prec)
3624 {
3625 bool res = true;
3626 if (secondArg.theIndex) { //sparse matrix
3627 long i,k;
3628 for (i = 0; res&&(i<secondArg.lDim); i++) {
3629 k = secondArg.theIndex[i];
3630 if (k!=-1) {
3631 if (secondArg.theData[i]/theData[k] > prec) {
3632 res = false;
3633 }
3634 theData[k]+=secondArg.theData[i];
3635 }
3636 }
3637 for (; i<secondArg.lDim; i++) {
3638 k = secondArg.theIndex[i];
3639 if (k!=-1) {
3640 theData[k]+=secondArg.theData[i];
3641 }
3642 }
3643 } else {
3644 hyFloat* argData = secondArg.theData, *stData = theData,
3645 *bound = theData+lDim;
3646 for (; res&&(stData!=bound); argData++, stData++) {
3647 if (*argData/ *stData> prec) {
3648 res = false;
3649 }
3650 *stData+=*argData;
3651 }
3652 for (; stData!=bound; argData++, stData++) {
3653 *stData+=*argData;
3654 }
3655 }
3656 return !res;
3657 }
3658
3659 //_____________________________________________________________________________________________
3660
Subtract(_Matrix & storage,_Matrix & secondArg)3661 void _Matrix::Subtract (_Matrix& storage, _Matrix& secondArg)
3662 // subtraction operation on matrices
3663 // internal function
3664
3665 {
3666 AddMatrix (storage,secondArg,true);
3667 }
3668
3669 //_____________________________________________________________________________________________
3670
Multiply(_Matrix & storage,hyFloat c)3671 void _Matrix::Multiply (_Matrix& storage, hyFloat c)
3672 // multiply a matrix by a scalar
3673 // internal function
3674
3675 {
3676 if (is_numeric()) { // numbers
3677 hyFloat * _hprestrict_ destination = storage.theData;
3678 hyFloat const * source = theData;
3679
3680 if (theIndex) {
3681 for (long k = 0L; k < lDim; k++)
3682 if (storage.theIndex[k] != -1) {
3683 destination[k] = source[k]*c;
3684 }
3685 } else {
3686 #ifdef _SLKP_USE_AVX_INTRINSICS
3687 #define CELL_OP(k) _mm256_storeu_pd (destination + k, _mm256_mul_pd(value_op, _mm256_loadu_pd (source+k)))
3688 long lDimM4 = lDim >> 4 << 4,
3689 k = 0;
3690
3691 __m256d value_op = _mm256_set1_pd (c);
3692 for (k = 0L; k < lDimM4; k+=16) {
3693 CELL_OP (k);
3694 CELL_OP (k+4);
3695 CELL_OP (k+8);
3696 CELL_OP (k+12);
3697 }
3698 for (; k < lDim; k++) {
3699 destination[k] = source[k]*c;
3700 }
3701 #elif defined _SLKP_USE_ARM_NEON
3702 #define CELL_OP(k) vst1q_f64 (destination + k, vmulq_f64(value_op, vld1q_f64 (source+k)))
3703 long lDimM8 = lDim >> 3 << 3,
3704 k = 0;
3705
3706 float64x2_t value_op = vdupq_n_f64 (c);
3707 for (k = 0L; k < lDimM8; k+=8) {
3708 CELL_OP (k);
3709 CELL_OP (k+2);
3710 CELL_OP (k+4);
3711 CELL_OP (k+6);
3712 }
3713 for (; k < lDim; k++) {
3714 destination[k] = source[k]*c;
3715 }
3716 #else
3717 for (long k = 0L; k < lDim; k++) {
3718 destination[k] = source[k]*c;
3719 }
3720 #endif
3721 }
3722
3723 } else {
3724 _Constant * cc = new _Constant (c);
3725
3726 if (storageType == 2) {
3727 _String const star ('*');
3728
3729 for (long i=0; i<lDim; i++)
3730 if (IsNonEmpty (i)) {
3731 long h = HashBack (i);
3732 _Formula * f = GetFormula (h/vDim,h%vDim);
3733 f->GetList().AppendNewInstance (new _Operation (cc));
3734 f->GetList().AppendNewInstance (new _Operation (star,2));
3735 }
3736 } else {
3737 if (storageType != 3) {
3738 if (theIndex)
3739 //sparse matrix
3740 {
3741 for (long i=0; i<lDim; i++)
3742 if (IsNonEmpty (i)) {
3743 storage.StoreObject (HashBack(i),GetMatrixObject(i)->Mult (cc));
3744 }
3745 } else {
3746 for (long i=0; i<lDim; i++)
3747 if (IsNonEmpty (i)) {
3748 storage.StoreObject (i,GetMatrixObject(i)->Mult (cc));
3749 }
3750 }
3751 }
3752 DeleteObject (cc);
3753 }
3754
3755 }
3756 }
3757
3758
3759 //_____________________________________________________________________________________________
3760
Multiply(_Matrix & storage,_Matrix const & secondArg) const3761 void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
3762 // multiplication operation on matrices
3763 // internal function
3764 // storage is assumed to NOT be *this
3765
3766 {
3767 HBLObjectRef tempP, tempP2;
3768
3769 if ( !theIndex && !secondArg.theIndex)
3770 // simplest case of two non-sparse matrices - multiply in a straightforward way
3771 {
3772 if ( storageType == 0 && secondArg.storageType ==0) { // both matrices are polynomial in nature
3773 for (long i=0; i<hDim; i++)
3774 for (long j=i*secondArg.vDim; j<(i+1)*secondArg.vDim; j++) {
3775 _MathObject* secTerm = secondArg.GetMatrixObject(j%secondArg.vDim), *firstTerm = GetMatrixObject (i*vDim);
3776 if (firstTerm&&secTerm) {
3777 storage.StoreObject (j,firstTerm->Mult (secTerm));
3778 } else {
3779 storage.StoreObject (j,new _Polynomial(0.0));
3780 }
3781 for (long k=i*vDim+1, l=j%secondArg.vDim+secondArg.vDim; k<(i+1)*vDim; k++, l+=secondArg.vDim) {
3782 tempP = GetMatrixObject (k), tempP2 = secondArg.GetMatrixObject(l);
3783 if (tempP&&tempP2) {
3784 _MathObject* temp = tempP->Mult (tempP2);
3785 storage.StoreObject (j,temp->Add(storage.GetMatrixObject(j)));
3786 DeleteObject (temp);
3787 }
3788 }
3789 }
3790 } else {
3791 if ( hDim == vDim && secondArg.hDim == secondArg.vDim)
3792 /* two square dense matrices */
3793 {
3794 unsigned long cumulativeIndex = 0UL;
3795 const unsigned long dimm4 = (vDim >> 2) << 2;
3796
3797 //const hyFloat * row = theData;
3798 hyFloat * dest = storage.theData;
3799
3800 #if defined _SLKP_USE_AVX_INTRINSICS
3801 #define DO_GROUP_OP0(X,Y,k) Y = _mm256_loadu_pd(secondArg.theData + col_offset + k); X = _mm256_mul_pd (A4,Y);
3802 #ifdef _SLKP_USE_FMA3_INTRINSICS
3803 #define DO_GROUP_OP(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k); Y = _mm256_loadu_pd(secondArg.theData + col_offset + k); _mm256_storeu_pd (dest + row_offset + k, _mm256_fmadd_pd (A4,Y,X));
3804 #define DO_GROUP_OP1(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k); Y = _mm256_loadu_pd(secondArg.theData + col_offset + k); X = _mm256_fmadd_pd (A4,Y,X);
3805 #define DO_GROUP_OP2(X,k) _mm256_storeu_pd (dest + row_offset + k,X);
3806 #else
3807 #define DO_GROUP_OP(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k); Y = _mm256_loadu_pd(secondArg.theData + col_offset + k); _mm256_storeu_pd (dest + row_offset + k, _mm256_add_pd (X, _mm256_mul_pd(A4, Y)));
3808
3809 #define DO_GROUP_OP1(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k); Y = _mm256_loadu_pd(secondArg.theData + col_offset + k); X = _mm256_add_pd (X, _mm256_mul_pd(A4, Y));
3810 #define DO_GROUP_OP2(X,k) _mm256_storeu_pd (dest + row_offset + k,X);
3811 #endif
3812
3813 #elif _SLKP_USE_SSE_INTRINSICS
3814 #define DO_GROUP_OP1(X,Y,k) X = _mm_loadu_pd(dest + row_offset + k); Y = _mm_loadu_pd(secondArg.theData + col_offset + k); X = _mm_add_pd (X, _mm_mul_pd(A4, Y));
3815 #define DO_GROUP_OP2(X,k) _mm_storeu_pd (dest + row_offset + k,X);
3816
3817 #elif _SLKP_USE_ARM_NEON
3818 #define DO_GROUP_OP1(X,Y,k) X = vld1q_f64 (dest + row_offset + k); Y = vld1q_f64 (secondArg.theData + col_offset + k); X = vfmaq_f64 (X, A4, Y);
3819 #define DO_GROUP_OP2(X,k) vst1q_f64 (dest + row_offset + k,X);
3820 #endif
3821
3822
3823 #ifndef _SLKP_SSE_VECTORIZATION_
3824
3825 if (dimm4 == vDim) {
3826 #if defined _SLKP_USE_AVX_INTRINSICS
3827 long ti = 0L,
3828 row_offset = 0L;
3829
3830 if (vDim == 20UL) {
3831 for (long r = 0; r < 20; r++, row_offset += 20) {
3832 long col_offset = 0L;
3833 __m256d A4 = _mm256_set1_pd(theData[ti]);
3834 __m256d D4, B4, D4_1, B4_1, D4_2, B4_2, D4_3, B4_3, D4_4, B4_4;
3835 DO_GROUP_OP0 (D4, B4, 0);
3836 DO_GROUP_OP0 (D4_1, B4_1, 4);
3837 DO_GROUP_OP0 (D4_2, B4_2, 8);
3838 DO_GROUP_OP0 (D4_3, B4_3, 12);
3839 DO_GROUP_OP0 (D4_4, B4_4, 16);
3840 DO_GROUP_OP2 (D4, 0);
3841 DO_GROUP_OP2 (D4_1, 4);
3842 DO_GROUP_OP2 (D4_2, 8);
3843 DO_GROUP_OP2 (D4_3, 12);
3844 DO_GROUP_OP2 (D4_4, 16);
3845 ti++;
3846 col_offset = 20L;
3847 for (long c = 1; c < 20L; c++, ti++, col_offset += 20L) {
3848 A4 = _mm256_set1_pd(theData[ti]);
3849 //for (long k = 0; k < 20L; k+=4) {
3850 DO_GROUP_OP1 (D4, B4, 0);
3851 DO_GROUP_OP1 (D4_1, B4_1, 4);
3852 DO_GROUP_OP1 (D4_2, B4_2, 8);
3853 DO_GROUP_OP1 (D4_3, B4_3, 12);
3854 DO_GROUP_OP1 (D4_4, B4_4, 16);
3855 DO_GROUP_OP2 (D4, 0);
3856 DO_GROUP_OP2 (D4_1, 4);
3857 DO_GROUP_OP2 (D4_2, 8);
3858 DO_GROUP_OP2 (D4_3, 12);
3859 DO_GROUP_OP2 (D4_4, 16);
3860 //}
3861 }
3862 }
3863 } else if (vDim == 60UL) {
3864 for (long r = 0; r < 60; r++, row_offset += 60) {
3865 long col_offset = 0L;
3866 __m256d A4 = _mm256_set1_pd(theData[ti]);
3867 for (long k = 0; k < 60L; k+=4) {
3868 __m256d D4, B4;
3869 DO_GROUP_OP0 (D4, B4, k);
3870 DO_GROUP_OP2 (D4, k);
3871 }
3872 col_offset = 60L;
3873 ti++;
3874 for (long c = 1; c < 60L; c++, ti++, col_offset += 60L) {
3875 A4 = _mm256_set1_pd(theData[ti]);
3876 for (long k = 0; k < 60L; k+=4) {
3877 __m256d D4, B4;
3878 DO_GROUP_OP (D4, B4, k);
3879 }
3880 }
3881 }
3882 } else
3883 for (long r = 0; r < vDim; r++, row_offset += vDim) {
3884 long col_offset = 0L;
3885 __m256d A4 = _mm256_set1_pd(theData[ti]);
3886 for (long k = 0; k < vDim; k+=4) {
3887 __m256d D4, B4;
3888 DO_GROUP_OP0 (D4, B4, k);
3889 DO_GROUP_OP2 (D4, k);
3890 }
3891 col_offset = vDim;
3892 ti++;
3893 for (long c = 1; c < vDim; c++, ti++, col_offset += vDim) {
3894 A4 = _mm256_set1_pd(theData[ti]);
3895 #pragma GCC unroll 4
3896 #pragma clang loop vectorize(enable)
3897 #pragma clang loop interleave(enable)
3898 #pragma clang loop unroll(enable)
3899 for (long k = 0; k < vDim; k+=4) {
3900 __m256d D4, B4;
3901 DO_GROUP_OP (D4, B4, k);
3902 }
3903 }
3904 }
3905 return;
3906 #elif _SLKP_USE_SSE_INTRINSICS
3907 memset (dest, 0, lDim * sizeof (hyFloat));
3908 long ti = 0L,
3909 row_offset = 0L;
3910
3911 if (vDim == 20UL) {
3912 for (long r = 0; r < 20; r++, row_offset += 20) {
3913 long col_offset = 0L;
3914 for (long c = 0; c < 20L; c++, ti++, col_offset += 20L) {
3915 __m128d A4 = _mm_set1_pd(theData[ti]);
3916 //for (long k = 0; k < 20L; k+=4) {
3917 __m128d D4, B4, D4_1, B4_1, D4_2, B4_2, D4_3, B4_3, D4_4, B4_4;
3918 DO_GROUP_OP1 (D4, B4, 0);
3919 DO_GROUP_OP1 (D4_1, B4_1, 2);
3920 DO_GROUP_OP1 (D4_2, B4_2, 4);
3921 DO_GROUP_OP1 (D4_3, B4_3, 6);
3922 DO_GROUP_OP1 (D4_4, B4_4, 8);
3923 DO_GROUP_OP2 (D4, 0);
3924 DO_GROUP_OP2 (D4_1, 2);
3925 DO_GROUP_OP2 (D4_2, 4);
3926 DO_GROUP_OP2 (D4_3, 6);
3927 DO_GROUP_OP2 (D4_4, 8);
3928 DO_GROUP_OP1 (D4, B4, 10);
3929 DO_GROUP_OP1 (D4_1, B4_1, 12);
3930 DO_GROUP_OP1 (D4_2, B4_2, 14);
3931 DO_GROUP_OP1 (D4_3, B4_3, 16);
3932 DO_GROUP_OP1 (D4_4, B4_4, 18);
3933 DO_GROUP_OP2 (D4, 10);
3934 DO_GROUP_OP2 (D4_1, 12);
3935 DO_GROUP_OP2 (D4_2, 14);
3936 DO_GROUP_OP2 (D4_3, 16);
3937 DO_GROUP_OP2 (D4_4, 18);
3938 //}
3939 }
3940 }
3941 } else
3942 for (long r = 0; r < vDim; r++, row_offset += vDim) {
3943 long col_offset = 0L;
3944 for (long c = 0; c < vDim; c++, ti++, col_offset += vDim) {
3945 __m128d A4 = _mm_set1_pd(theData[ti]);
3946 #pragma GCC unroll 4
3947 #pragma clang loop vectorize(enable)
3948 #pragma clang loop interleave(enable)
3949 #pragma clang loop unroll(enable)
3950 for (long k = 0; k < vDim; k+=2) {
3951 __m128d D4, B4;
3952 DO_GROUP_OP1 (D4, B4, k);
3953 DO_GROUP_OP2 (D4, k);
3954 }
3955 }
3956 }
3957 return;
3958 #elif _SLKP_USE_ARM_NEON
3959 memset (dest, 0, lDim * sizeof (hyFloat));
3960 long ti = 0L,
3961 row_offset = 0L;
3962
3963 if (vDim == 20UL) {
3964 for (long r = 0; r < 20; r++, row_offset += 20) {
3965 long col_offset = 0L;
3966 for (long c = 0; c < 20L; c++, ti++, col_offset += 20L) {
3967 float64x2_t A4 = vdupq_n_f64(theData[ti]);
3968 //for (long k = 0; k < 20L; k+=4) {
3969 float64x2_t D4, B4, D4_1, B4_1, D4_2, B4_2, D4_3, B4_3, D4_4, B4_4;
3970 DO_GROUP_OP1 (D4, B4, 0);
3971 DO_GROUP_OP1 (D4_1, B4_1, 2);
3972 DO_GROUP_OP1 (D4_2, B4_2, 4);
3973 DO_GROUP_OP1 (D4_3, B4_3, 6);
3974 DO_GROUP_OP1 (D4_4, B4_4, 8);
3975 DO_GROUP_OP2 (D4, 0);
3976 DO_GROUP_OP2 (D4_1, 2);
3977 DO_GROUP_OP2 (D4_2, 4);
3978 DO_GROUP_OP2 (D4_3, 6);
3979 DO_GROUP_OP2 (D4_4, 8);
3980 DO_GROUP_OP1 (D4, B4, 10);
3981 DO_GROUP_OP1 (D4_1, B4_1, 12);
3982 DO_GROUP_OP1 (D4_2, B4_2, 14);
3983 DO_GROUP_OP1 (D4_3, B4_3, 16);
3984 DO_GROUP_OP1 (D4_4, B4_4, 18);
3985 DO_GROUP_OP2 (D4, 10);
3986 DO_GROUP_OP2 (D4_1, 12);
3987 DO_GROUP_OP2 (D4_2, 14);
3988 DO_GROUP_OP2 (D4_3, 16);
3989 DO_GROUP_OP2 (D4_4, 18);
3990 //}
3991 }
3992 }
3993 } else
3994 for (long r = 0; r < vDim; r++, row_offset += vDim) {
3995 long col_offset = 0L;
3996 for (long c = 0; c < vDim; c++, ti++, col_offset += vDim) {
3997 float64x2_t A4 = vdupq_n_f64(theData[ti]);
3998 #pragma GCC unroll 4
3999 #pragma clang loop vectorize(enable)
4000 #pragma clang loop interleave(enable)
4001 #pragma clang loop unroll(enable)
4002 for (long k = 0; k < vDim; k+=2) {
4003 float64x2_t D4, B4;
4004 DO_GROUP_OP1 (D4, B4, k);
4005 DO_GROUP_OP2 (D4, k);
4006 }
4007 }
4008 }
4009 return;
4010 #endif
4011 memset (dest, 0, lDim * sizeof (hyFloat));
4012
4013 for (unsigned long c = 0UL; c < secondArg.vDim; c ++) {
4014
4015 /*
4016 load a series of 4 consecutive elements from a column in the second matrix,
4017 say c [] = [i,i+1,i+2,i+3: c]
4018
4019 next, iterate over all rows in the first matrix, looking for matched consecutive
4020 elements, e.g.
4021
4022 r [] = [r: i,i+1,i+2,i+3]
4023
4024 compute sum_{t=0..3} c[t] * r[t]
4025
4026 add to the element (r,c) in the destination matrix
4027
4028 */
4029
4030 const unsigned long
4031 column_shift2 = secondArg.vDim << 1,
4032 column_shift3 = (secondArg.vDim << 1) + secondArg.vDim,
4033 column_shift4 = secondArg.vDim << 2;
4034
4035 for (unsigned long i = 0UL, vector_index = c; i < secondArg.hDim; i += 4UL, vector_index += column_shift4) {
4036 hyFloat c0 = secondArg.theData[vector_index],
4037 c1 = secondArg.theData[vector_index+secondArg.vDim],
4038 c2 = secondArg.theData[vector_index+column_shift2],
4039 c3 = secondArg.theData[vector_index+column_shift3];
4040
4041 for (unsigned long r = 0UL; r < hDim; r ++) {
4042
4043 unsigned long element = r*vDim + i;
4044
4045 hyFloat r0 = theData[element] * c0,
4046 r1 = theData[element+1] * c1,
4047 r2 = theData[element+2] * c2,
4048 r3 = theData[element+3] * c3;
4049
4050 r0 += r1;
4051 r2 += r3;
4052 dest[r*vDim + c] += r0 + r2;
4053
4054 }
4055 }
4056 }
4057 } else {
4058 #if defined _SLKP_USE_AVX_INTRINSICS
4059 /*
4060
4061 dest [i,j] = sum_k A[i,k] * B[k,j]
4062
4063 # data from B[k,j], B[k,j+1], B[k,j+2], B[k,j+3] goes into dest[i,j] ... dest [i,j+3]
4064 # dest [i,x] will take data from dest A[i,x]
4065
4066 9x9 matrix
4067
4068 dest [0,0] = A[0,0] * B[0,0] + A[0,1] * B[1,0] + A[0,2] * B[2,0] + A[0,3] * B[3,0] + ...;
4069 dest [0,1] = A[0,0] * B[0,1] + A[0,1] * B[1,1] + A[0,2] * B[2,1] + A[0,3] * B[3,1] + ...;
4070 dest [0,2] = A[0,0] * B[0,2] + A[0,1] * B[1,2] + A[0,2] * B[2,2] + A[0,3] * B[3,2] + ...;
4071 dest [0,3] = A[0,0] * B[0,3] + A[0,1] * B[1,3] + A[0,2] * B[2,3] + A[0,3] * B[3,3] + ...;
4072 dest [0,4] = A[0,0] * B[0,4] + A[0,1] * B[1,4] + A[0,2] * B[2,4] + A[0,3] * B[3,4] + ...;
4073 dest [0,5] = A[0,0] * B[0,5] + A[0,1] * B[1,5] + A[0,2] * B[2,5] + A[0,3] * B[3,5] + ...;
4074 dest [0,6] = A[0,0] * B[0,6] + A[0,1] * B[1,6] + A[0,2] * B[2,6] + A[0,3] * B[3,6] + ...;
4075 dest [0,7] = A[0,0] * B[0,7] + A[0,1] * B[1,7] + A[0,2] * B[2,7] + A[0,3] * B[3,7] + ...;
4076 dest [0,8] = A[0,0] * B[0,8] + A[0,1] * B[1,8] + A[0,2] * B[2,8] + A[0,3] * B[3,8] + ...;
4077
4078 ...
4079
4080 dest [3,0] = A[3,0] * B[0,0] + A[3,1] * B[1,0] + A[3,2] * B[2,0] + A[3,3] * B[3,0] + ...;
4081 dest [3,1] = A[3,0] * B[0,1] + A[3,1] * B[1,1] + A[3,2] * B[2,1] + A[3,3] * B[3,1] + ...;
4082 dest [3,2] = A[3,0] * B[0,2] + A[3,1] * B[1,2] + A[3,2] * B[2,2] + A[3,3] * B[3,2] + ...;
4083 dest [3,3] = A[3,0] * B[0,3] + A[3,1] * B[1,3] + A[3,2] * B[2,3] + A[3,3] * B[3,3] + ...;
4084 ....
4085 dest [3,8] = A[3,0] * B[0,8] + A[3,1] * B[1,8] + A[3,2] * B[2,8] + A[3,3] * B[3,8] + ...;
4086
4087 ...
4088
4089 iterate by row (R)
4090 iterate by column (C)
4091 A4 = load element A[R,C] into a 4 element vector
4092
4093 iterate by summing index (K), stride 4
4094 D4 = destination [R:K, R:K+4] // linear access
4095 B4 = B [C:K, C:K+4] // linear access
4096 D4 += A4*B4
4097 write D4 back to memory
4098
4099
4100 if something is left over
4101
4102 iterate by summing index (K)
4103 desination [R,K] += A[R,C] * B[C,K],
4104
4105 */
4106 long ti = 0L,
4107 row_offset = 0L;
4108
4109 if (dimm4 == 60UL) { // codons
4110
4111 for (long r = 0; r < hDim; r++, row_offset += vDim) {
4112 long col_offset = 0L;
4113 for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4114 __m256d A4 = _mm256_set1_pd(theData[ti]);
4115
4116 __m256d D4_1, D4_2, D4_3, D4_4;
4117 __m256d B4_1, B4_2, B4_3, B4_4;
4118
4119 DO_GROUP_OP1 (D4_1,B4_1,0);
4120 DO_GROUP_OP1 (D4_2,B4_2,4);
4121 DO_GROUP_OP1 (D4_3,B4_3,8);
4122 DO_GROUP_OP1 (D4_4,B4_4,12);
4123 DO_GROUP_OP2 (D4_1,0);
4124 DO_GROUP_OP2 (D4_2,4);
4125 DO_GROUP_OP2 (D4_3,8);
4126 DO_GROUP_OP2 (D4_4,12);
4127
4128 DO_GROUP_OP1 (D4_1,B4_1,16);
4129 DO_GROUP_OP1 (D4_2,B4_2,20);
4130 DO_GROUP_OP1 (D4_3,B4_3,24);
4131 DO_GROUP_OP1 (D4_4,B4_4,28);
4132 DO_GROUP_OP2 (D4_1,16);
4133 DO_GROUP_OP2 (D4_2,20);
4134 DO_GROUP_OP2 (D4_3,24);
4135 DO_GROUP_OP2 (D4_4,28);
4136
4137 DO_GROUP_OP1 (D4_1,B4_1,32);
4138 DO_GROUP_OP1 (D4_2,B4_2,36);
4139 DO_GROUP_OP1 (D4_3,B4_3,40);
4140 DO_GROUP_OP1 (D4_4,B4_4,44);
4141 DO_GROUP_OP2 (D4_1,32);
4142 DO_GROUP_OP2 (D4_2,36);
4143 DO_GROUP_OP2 (D4_3,40);
4144 DO_GROUP_OP2 (D4_4,44);
4145
4146 DO_GROUP_OP1 (D4_1,B4_1,48);
4147 DO_GROUP_OP1 (D4_2,B4_2,52);
4148 DO_GROUP_OP1 (D4_3,B4_3,56);
4149 DO_GROUP_OP2 (D4_1,48);
4150 DO_GROUP_OP2 (D4_2,52);
4151 DO_GROUP_OP2 (D4_3,56);
4152
4153 for (long k = dimm4; k < vDim; k++) {
4154 dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4155 }
4156 }
4157 }
4158 } else { // something else
4159 for (long r = 0; r < hDim; r++, row_offset += vDim) {
4160 long col_offset = 0L;
4161 for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4162 __m256d A4 = _mm256_set1_pd(theData[ti]);
4163 #pragma GCC unroll 4
4164 #pragma clang loop vectorize(enable)
4165 #pragma clang loop interleave(enable)
4166 #pragma clang loop unroll(enable)
4167 for (long k = 0; k < dimm4; k+=4) {
4168 __m256d D4, B4;
4169 DO_GROUP_OP (D4, B4, k);
4170 }
4171
4172 for (long k = dimm4; k < vDim; k++) {
4173 dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4174 }
4175 }
4176 }
4177 }
4178 #elif _SLKP_USE_SSE_INTRINSICS
4179 long ti = 0L,
4180 row_offset = 0L;
4181
4182 if (dimm4 == 60UL) { // codons
4183
4184
4185 for (long r = 0; r < hDim; r++, row_offset += vDim) {
4186 long col_offset = 0L;
4187 for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4188 __m128d A4 = _mm_set1_pd(theData[ti]);
4189
4190 __m128d D4_1, D4_2, D4_3, D4_4;
4191 __m128d B4_1, B4_2, B4_3, B4_4;
4192
4193 DO_GROUP_OP1 (D4_1,B4_1,0);
4194 DO_GROUP_OP1 (D4_2,B4_2,2);
4195 DO_GROUP_OP1 (D4_3,B4_3,4);
4196 DO_GROUP_OP1 (D4_4,B4_4,6);
4197 DO_GROUP_OP2 (D4_1,0);
4198 DO_GROUP_OP2 (D4_2,2);
4199 DO_GROUP_OP2 (D4_3,4);
4200 DO_GROUP_OP2 (D4_4,6);
4201
4202 DO_GROUP_OP1 (D4_1,B4_1,8);
4203 DO_GROUP_OP1 (D4_2,B4_2,10);
4204 DO_GROUP_OP1 (D4_3,B4_3,12);
4205 DO_GROUP_OP1 (D4_4,B4_4,14);
4206 DO_GROUP_OP2 (D4_1,8);
4207 DO_GROUP_OP2 (D4_2,10);
4208 DO_GROUP_OP2 (D4_3,12);
4209 DO_GROUP_OP2 (D4_4,14);
4210
4211 DO_GROUP_OP1 (D4_1,B4_1,16);
4212 DO_GROUP_OP1 (D4_2,B4_2,18);
4213 DO_GROUP_OP1 (D4_3,B4_3,20);
4214 DO_GROUP_OP1 (D4_4,B4_4,22);
4215 DO_GROUP_OP2 (D4_1,16);
4216 DO_GROUP_OP2 (D4_2,18);
4217 DO_GROUP_OP2 (D4_3,20);
4218 DO_GROUP_OP2 (D4_4,22);
4219
4220 DO_GROUP_OP1 (D4_1,B4_1,24);
4221 DO_GROUP_OP1 (D4_2,B4_2,26);
4222 DO_GROUP_OP1 (D4_3,B4_3,28);
4223 DO_GROUP_OP1 (D4_4,B4_4,30);
4224 DO_GROUP_OP2 (D4_1,24);
4225 DO_GROUP_OP2 (D4_2,26);
4226 DO_GROUP_OP2 (D4_3,28);
4227 DO_GROUP_OP2 (D4_4,30);
4228
4229 DO_GROUP_OP1 (D4_1,B4_1,32);
4230 DO_GROUP_OP1 (D4_2,B4_2,34);
4231 DO_GROUP_OP1 (D4_3,B4_3,36);
4232 DO_GROUP_OP1 (D4_4,B4_4,38);
4233 DO_GROUP_OP2 (D4_1,32);
4234 DO_GROUP_OP2 (D4_2,34);
4235 DO_GROUP_OP2 (D4_3,36);
4236 DO_GROUP_OP2 (D4_4,38);
4237
4238 DO_GROUP_OP1 (D4_1,B4_1,40);
4239 DO_GROUP_OP1 (D4_2,B4_2,42);
4240 DO_GROUP_OP1 (D4_3,B4_3,44);
4241 DO_GROUP_OP1 (D4_4,B4_4,46);
4242 DO_GROUP_OP2 (D4_1,40);
4243 DO_GROUP_OP2 (D4_2,42);
4244 DO_GROUP_OP2 (D4_3,44);
4245 DO_GROUP_OP2 (D4_4,46);
4246
4247 DO_GROUP_OP1 (D4_1,B4_1,48);
4248 DO_GROUP_OP1 (D4_2,B4_2,50);
4249 DO_GROUP_OP1 (D4_3,B4_3,52);
4250 DO_GROUP_OP1 (D4_4,B4_4,54);
4251 DO_GROUP_OP2 (D4_1,48);
4252 DO_GROUP_OP2 (D4_2,50);
4253 DO_GROUP_OP2 (D4_3,52);
4254 DO_GROUP_OP2 (D4_4,54);
4255
4256 DO_GROUP_OP1 (D4_1,B4_1,56);
4257 DO_GROUP_OP1 (D4_2,B4_2,58);
4258 DO_GROUP_OP2 (D4_1,56);
4259 DO_GROUP_OP2 (D4_2,58);
4260
4261 for (long k = dimm4; k < vDim; k++) {
4262 dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4263 }
4264 }
4265 }
4266 } else { // something else
4267 for (long r = 0; r < hDim; r++, row_offset += vDim) {
4268 long col_offset = 0L;
4269 for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4270 __m128d A4 = _mm_set1_pd(theData[ti]);
4271 #pragma GCC unroll 4
4272 #pragma clang loop vectorize(enable)
4273 #pragma clang loop interleave(enable)
4274 #pragma clang loop unroll(enable)
4275 for (long k = 0; k < dimm4; k+=2) {
4276 __m128d D4, B4;
4277 DO_GROUP_OP1 (D4, B4, k);
4278 DO_GROUP_OP2 (D4, k);
4279 }
4280
4281 for (long k = dimm4; k < vDim; k++) {
4282 dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4283 }
4284 }
4285 }
4286 }
4287 #elif _SLKP_USE_ARM_NEON
4288 long ti = 0L,
4289 row_offset = 0L;
4290
4291 if (dimm4 == 60UL) { // codons
4292
4293
4294 for (long r = 0; r < hDim; r++, row_offset += vDim) {
4295 long col_offset = 0L;
4296 for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4297 float64x2_t A4 = vdupq_n_f64(theData[ti]);
4298
4299 float64x2_t D4_1, D4_2, D4_3, D4_4;
4300 float64x2_t B4_1, B4_2, B4_3, B4_4;
4301
4302 DO_GROUP_OP1 (D4_1,B4_1,0);
4303 DO_GROUP_OP1 (D4_2,B4_2,2);
4304 DO_GROUP_OP1 (D4_3,B4_3,4);
4305 DO_GROUP_OP1 (D4_4,B4_4,6);
4306 DO_GROUP_OP2 (D4_1,0);
4307 DO_GROUP_OP2 (D4_2,2);
4308 DO_GROUP_OP2 (D4_3,4);
4309 DO_GROUP_OP2 (D4_4,6);
4310
4311 DO_GROUP_OP1 (D4_1,B4_1,8);
4312 DO_GROUP_OP1 (D4_2,B4_2,10);
4313 DO_GROUP_OP1 (D4_3,B4_3,12);
4314 DO_GROUP_OP1 (D4_4,B4_4,14);
4315 DO_GROUP_OP2 (D4_1,8);
4316 DO_GROUP_OP2 (D4_2,10);
4317 DO_GROUP_OP2 (D4_3,12);
4318 DO_GROUP_OP2 (D4_4,14);
4319
4320 DO_GROUP_OP1 (D4_1,B4_1,16);
4321 DO_GROUP_OP1 (D4_2,B4_2,18);
4322 DO_GROUP_OP1 (D4_3,B4_3,20);
4323 DO_GROUP_OP1 (D4_4,B4_4,22);
4324 DO_GROUP_OP2 (D4_1,16);
4325 DO_GROUP_OP2 (D4_2,18);
4326 DO_GROUP_OP2 (D4_3,20);
4327 DO_GROUP_OP2 (D4_4,22);
4328
4329 DO_GROUP_OP1 (D4_1,B4_1,24);
4330 DO_GROUP_OP1 (D4_2,B4_2,26);
4331 DO_GROUP_OP1 (D4_3,B4_3,28);
4332 DO_GROUP_OP1 (D4_4,B4_4,30);
4333 DO_GROUP_OP2 (D4_1,24);
4334 DO_GROUP_OP2 (D4_2,26);
4335 DO_GROUP_OP2 (D4_3,28);
4336 DO_GROUP_OP2 (D4_4,30);
4337
4338 DO_GROUP_OP1 (D4_1,B4_1,32);
4339 DO_GROUP_OP1 (D4_2,B4_2,34);
4340 DO_GROUP_OP1 (D4_3,B4_3,36);
4341 DO_GROUP_OP1 (D4_4,B4_4,38);
4342 DO_GROUP_OP2 (D4_1,32);
4343 DO_GROUP_OP2 (D4_2,34);
4344 DO_GROUP_OP2 (D4_3,36);
4345 DO_GROUP_OP2 (D4_4,38);
4346
4347 DO_GROUP_OP1 (D4_1,B4_1,40);
4348 DO_GROUP_OP1 (D4_2,B4_2,42);
4349 DO_GROUP_OP1 (D4_3,B4_3,44);
4350 DO_GROUP_OP1 (D4_4,B4_4,46);
4351 DO_GROUP_OP2 (D4_1,40);
4352 DO_GROUP_OP2 (D4_2,42);
4353 DO_GROUP_OP2 (D4_3,44);
4354 DO_GROUP_OP2 (D4_4,46);
4355
4356 DO_GROUP_OP1 (D4_1,B4_1,48);
4357 DO_GROUP_OP1 (D4_2,B4_2,50);
4358 DO_GROUP_OP1 (D4_3,B4_3,52);
4359 DO_GROUP_OP1 (D4_4,B4_4,54);
4360 DO_GROUP_OP2 (D4_1,48);
4361 DO_GROUP_OP2 (D4_2,50);
4362 DO_GROUP_OP2 (D4_3,52);
4363 DO_GROUP_OP2 (D4_4,54);
4364
4365 DO_GROUP_OP1 (D4_1,B4_1,56);
4366 DO_GROUP_OP1 (D4_2,B4_2,58);
4367 DO_GROUP_OP2 (D4_1,56);
4368 DO_GROUP_OP2 (D4_2,58);
4369
4370 for (long k = dimm4; k < vDim; k++) {
4371 dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4372 }
4373 }
4374 }
4375 } else { // something else
4376 for (long r = 0; r < hDim; r++, row_offset += vDim) {
4377 long col_offset = 0L;
4378 for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
4379 float64x2_t A4 = vdupq_n_f64(theData[ti]);
4380 #pragma GCC unroll 4
4381 #pragma clang loop vectorize(enable)
4382 #pragma clang loop interleave(enable)
4383 #pragma clang loop unroll(enable)
4384 for (long k = 0; k < dimm4; k+=2) {
4385 float64x2_t D4, B4;
4386 DO_GROUP_OP1 (D4, B4, k);
4387 DO_GROUP_OP2 (D4, k);
4388 }
4389
4390 for (long k = dimm4; k < vDim; k++) {
4391 dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
4392 }
4393 }
4394 }
4395 }
4396 #else
4397 const unsigned long
4398 column_shift2 = secondArg.vDim << 1,
4399 column_shift3 = (secondArg.vDim << 1) + secondArg.vDim,
4400 column_shift4 = secondArg.vDim << 2;
4401
4402 hyFloat * row = theData;
4403
4404 for (unsigned long i=0UL; i<hDim; i++, row += vDim) {
4405 for (unsigned long j=0UL; j<secondArg.vDim; j++) {
4406 hyFloat resCell = 0.0;
4407
4408 unsigned long k = 0UL,
4409 column = j;
4410
4411
4412 for (; k < dimm4; k+=4, column += column_shift4) {
4413 hyFloat pr1 = row[k] * secondArg.theData [column],
4414 pr2 = row[k+1] * secondArg.theData [column + secondArg.vDim ],
4415 pr3 = row[k+2] * secondArg.theData [column + column_shift2],
4416 pr4 = row[k+3] * secondArg.theData [column + column_shift3];
4417
4418 pr1 += pr2;
4419 pr3 += pr4;
4420
4421 resCell += pr1 + pr3;
4422 }
4423
4424 for (; k < vDim; k++, column += secondArg.vDim) {
4425 resCell += row[k] * secondArg.theData[column];
4426 }
4427
4428 dest[cumulativeIndex++] = resCell;
4429 }
4430 }
4431
4432
4433 #endif
4434 }
4435
4436 #else
4437 secondArg.Transpose();
4438 for (long i=0; i<hDim; i++, row += vDim) {
4439 for (long j=0; j<hDim; j++) {
4440 hyFloat resCell = 0.0;
4441 #pragma GCC unroll 8
4442 #pragma clang loop vectorize(enable)
4443 #pragma clang loop interleave(enable)
4444 #pragma clang loop unroll(enable)
4445 for (long k = 0, column = j*hDim; k < vDim; k++, column ++) {
4446 resCell += row[k] * secondArg.theData [column];
4447 }
4448
4449 storage.theData[cumulativeIndex++] = resCell;
4450 }
4451 }
4452 secondArg.Transpose();
4453 #endif
4454 } else
4455 /* rectangular matrices */
4456 {
4457 #define _HY_MATRIX_CACHE_BLOCK 128
4458 if (vDim >= 256) {
4459 long nt = 1;
4460 #ifdef _OPENMP
4461 #define GCC_VERSION (__GNUC__ * 10000 \
4462 + __GNUC_MINOR__ * 100 \
4463 + __GNUC_PATCHLEVEL__)
4464 #ifdef __HYPHYMPI__
4465 if (hy_mpi_node_rank == 0)
4466
4467 #endif
4468 nt = MIN(omp_get_max_threads(),secondArg.vDim / _HY_MATRIX_CACHE_BLOCK + 1);
4469 #endif
4470 for (long r = 0; r < hDim; r ++) {
4471 #ifdef _OPENMP
4472 #if _OPENMP>=201511
4473 #pragma omp parallel for default(none) shared(r,secondArg,storage) schedule(monotonic:guided) proc_bind(spread) if (nt>1) num_threads (nt)
4474 #else
4475 #if _OPENMP>=200803
4476 #pragma omp parallel for default(none) shared(r,secondArg,storage) schedule(guided) proc_bind(spread) if (nt>1) num_threads (nt)
4477 #endif
4478 #endif
4479 #endif
4480 for (long c = 0; c < secondArg.vDim; c+= _HY_MATRIX_CACHE_BLOCK) {
4481 hyFloat cacheBlockInMatrix2 [_HY_MATRIX_CACHE_BLOCK][_HY_MATRIX_CACHE_BLOCK];
4482 const long upto_p = (secondArg.vDim-c>=_HY_MATRIX_CACHE_BLOCK)?_HY_MATRIX_CACHE_BLOCK:(secondArg.vDim-c);
4483 for (long r2 = 0; r2 < secondArg.hDim; r2+= _HY_MATRIX_CACHE_BLOCK) {
4484 const long upto_p2 = (secondArg.hDim-r2)>=_HY_MATRIX_CACHE_BLOCK?_HY_MATRIX_CACHE_BLOCK:(secondArg.hDim-r2);
4485 for (long p = 0; p < upto_p; p++) {
4486 for (long p2 = 0; p2 < upto_p2; p2++) {
4487 cacheBlockInMatrix2[p][p2] = secondArg.theData [(r2+p2)*secondArg.vDim+c+p];
4488 }
4489 }
4490 if (upto_p2 % 4 == 0) {
4491 for (long p = 0; p < upto_p; p++) {
4492 hyFloat updater = 0.;
4493 for (long p2 = 0; p2 < upto_p2; p2+=4) {
4494 hyFloat pr1 = theData[r*vDim + r2 + p2]*cacheBlockInMatrix2[p][p2],
4495 pr2 = theData[r*vDim + r2 + p2+1]*cacheBlockInMatrix2[p][p2+1],
4496 pr3 = theData[r*vDim + r2 + p2+2]*cacheBlockInMatrix2[p][p2+2],
4497 pr4 = theData[r*vDim + r2 + p2+3]*cacheBlockInMatrix2[p][p2+3];
4498 pr1 += pr2;
4499 pr3 += pr4;
4500 updater += pr1 + pr3;
4501 }
4502 storage.theData[r*secondArg.vDim + c + p] += updater;
4503 }
4504 } else
4505 for (long p = 0; p < upto_p; p++) {
4506 hyFloat updater = 0.;
4507 for (long p2 = 0; p2 < upto_p2; p2++) {
4508 updater += theData[r*vDim + r2 + p2]*cacheBlockInMatrix2[p][p2];
4509 }
4510 storage.theData[r*secondArg.vDim + c + p] += updater;
4511 }
4512 }
4513 }
4514 }
4515
4516 } else {
4517
4518
4519 if (vDim % 4) {
4520 long mod4 = vDim-vDim%4;
4521 for (long i=0; i<hDim; i++) {
4522 for (long j=0; j<secondArg.vDim; j++) {
4523 hyFloat resCell = 0.0;
4524 long k = 0;
4525 for (; k < mod4; k+=4) {
4526 resCell += theData[i*vDim + k] * secondArg.theData[k*secondArg.vDim + j] +
4527 theData[i*vDim + k + 1] * secondArg.theData[(k+1)*secondArg.vDim + j] +
4528 theData[i*vDim + k + 2] * secondArg.theData[(k+2)*secondArg.vDim + j] +
4529 theData[i*vDim + k + 3] * secondArg.theData[(k+3)*secondArg.vDim + j];
4530 }
4531 for (; k < vDim; k++) {
4532 resCell += theData[i*vDim + k] * secondArg.theData[k*secondArg.vDim + j];
4533 }
4534
4535 storage.theData[i*secondArg.vDim + j] = resCell;
4536 }
4537 }
4538 } else {
4539 for (long i=0; i<hDim; i++) {
4540 for (long j=0; j<secondArg.vDim; j++) {
4541 hyFloat resCell = 0.0;
4542 for (long k = 0; k < vDim; k+=4) {
4543 resCell += theData[i*vDim + k] * secondArg.theData[k*secondArg.vDim + j] +
4544 theData[i*vDim + k + 1] * secondArg.theData[(k+1)*secondArg.vDim + j] +
4545 theData[i*vDim + k + 2] * secondArg.theData[(k+2)*secondArg.vDim + j] +
4546 theData[i*vDim + k + 3] * secondArg.theData[(k+3)*secondArg.vDim + j];
4547 }
4548
4549
4550 storage.theData[i*secondArg.vDim + j] = resCell;
4551 }
4552 }
4553 }
4554 }
4555
4556 }
4557 }
4558
4559 } else if (theIndex && !secondArg.theIndex) { // sparse multiplied by non-sparse
4560 if (storageType == 1 && secondArg.storageType ==1) { // both numeric
4561 if ( vDim == hDim && secondArg.vDim==secondArg.hDim) { // both square and same dimension
4562 /*
4563 break out a special case for universal code
4564 */
4565
4566 if (vDim == 61) {
4567
4568 if (compressedIndex) {
4569 long currentXIndex = 0L;
4570 hyFloat * _hprestrict_ res = storage.theData;
4571
4572 for (long i = 0; i < hDim; i++) { // row in source
4573 while (currentXIndex < compressedIndex[i]) {
4574 long currentXColumn = compressedIndex[currentXIndex + hDim];
4575 // go into the second matrix and look up all the non-zero entries in the currentXColumn row
4576
4577 hyFloat value = theData[currentXIndex];
4578 hyFloat * _hprestrict_ secArg = secondArg.theData + currentXColumn*61;
4579 #ifdef _SLKP_USE_AVX_INTRINSICS
4580 __m256d value_op = _mm256_set1_pd (value);
4581 #ifdef _SLKP_USE_FMA3_INTRINSICS
4582 #define CELL_OP(x) _mm256_storeu_pd (res+x, _mm256_fmadd_pd (value_op, _mm256_loadu_pd (secArg+x),_mm256_loadu_pd(res+x)))
4583 #else
4584 #define CELL_OP(x) _mm256_storeu_pd (res+x, _mm256_add_pd (_mm256_loadu_pd(res+x), _mm256_mul_pd(value_op, _mm256_loadu_pd (secArg+x))))
4585 #endif
4586
4587 CELL_OP(0);CELL_OP(4);CELL_OP(8);CELL_OP(12);
4588 CELL_OP(16);CELL_OP(20);CELL_OP(24);CELL_OP(28);
4589 CELL_OP(32);CELL_OP(36);CELL_OP(40);CELL_OP(44);
4590 CELL_OP(48);CELL_OP(52);CELL_OP(56);
4591 #elif _SLKP_USE_ARM_NEON
4592 float64x2_t value_op = vdupq_n_f64 (value);
4593 #define CELL_OP(x) vst1q_f64 (res+x, vfmaq_f64 (vld1q_f64(res+x), value_op, vld1q_f64 (secArg+x)))
4594
4595
4596 CELL_OP(0);CELL_OP(2);CELL_OP(4);CELL_OP(6);
4597 CELL_OP(8);CELL_OP(10);CELL_OP(12);CELL_OP(14);
4598 CELL_OP(16);CELL_OP(18);CELL_OP(20);CELL_OP(22);
4599 CELL_OP(24);CELL_OP(26);CELL_OP(28);
4600 CELL_OP(30);CELL_OP(32);CELL_OP(34);CELL_OP(36);CELL_OP(38);
4601 CELL_OP(40);CELL_OP(42);CELL_OP(44);CELL_OP(46);
4602 CELL_OP(48);CELL_OP(50);CELL_OP(52);CELL_OP(54);
4603 CELL_OP(56);CELL_OP(58);
4604 #else
4605 for (unsigned long i = 0UL; i < 60UL; i+=4UL) {
4606 res[i] += value * secArg[i];
4607 res[i+1] += value * secArg[i+1];
4608 res[i+2] += value * secArg[i+2];
4609 res[i+3] += value * secArg[i+3];
4610 }
4611 #endif
4612 res[60] += value * secArg[60];
4613 currentXIndex ++;
4614 }
4615 res += vDim;
4616
4617 }
4618
4619
4620
4621 } else {
4622
4623 for (unsigned long k=0UL; k<lDim; k++) { // loop over entries in the sparse matrix
4624 long m = theIndex[k];
4625 if (m >= 0L) {
4626 long i = ((unsigned long)m)%61;
4627
4628 hyFloat value = theData[k];
4629 hyFloat * _hprestrict_ res = storage.theData + (m-i);
4630 hyFloat * _hprestrict_ secArg = secondArg.theData + i*61;
4631
4632 #ifdef _SLKP_USE_AVX_INTRINSICS
4633 __m256d value_op = _mm256_set1_pd (value);
4634
4635 #ifdef _SLKP_USE_FMA3_INTRINSICS
4636 #define CELL_OP(x) _mm256_storeu_pd (res+x, _mm256_fmadd_pd (value_op, _mm256_loadu_pd (secArg+x),_mm256_loadu_pd(res+x)))
4637 #else
4638 #define CELL_OP(x) _mm256_storeu_pd (res+x, _mm256_add_pd (_mm256_loadu_pd(res+x), _mm256_mul_pd(value_op, _mm256_loadu_pd (secArg+x))))
4639 #endif
4640 CELL_OP(0);CELL_OP(4);CELL_OP(8);CELL_OP(12);
4641 CELL_OP(16);CELL_OP(20);CELL_OP(24);CELL_OP(28);
4642 CELL_OP(32);CELL_OP(36);CELL_OP(40);CELL_OP(44);
4643 CELL_OP(48);CELL_OP(52);CELL_OP(56);
4644 #elif _SLKP_USE_ARM_NEON
4645 float64x2_t value_op = vdupq_n_f64 (value);
4646 #define CELL_OP(x) vst1q_f64 (res+x, vfmaq_f64 (vld1q_f64(res+x), value_op, vld1q_f64 (secArg+x)))
4647
4648
4649 CELL_OP(0);CELL_OP(2);CELL_OP(4);CELL_OP(6);
4650 CELL_OP(8);CELL_OP(10);CELL_OP(12);CELL_OP(14);
4651 CELL_OP(16);CELL_OP(18);CELL_OP(20);CELL_OP(22);
4652 CELL_OP(24);CELL_OP(26);CELL_OP(28);
4653 CELL_OP(30);CELL_OP(32);CELL_OP(34);CELL_OP(36);CELL_OP(38);
4654 CELL_OP(40);CELL_OP(42);CELL_OP(44);CELL_OP(46);
4655 CELL_OP(48);CELL_OP(50);CELL_OP(52);CELL_OP(54);
4656 CELL_OP(56);CELL_OP(58);
4657 #else
4658 for (unsigned long i = 0UL; i < 60UL; i+=4UL) {
4659 res[i] += value * secArg[i];
4660 res[i+1] += value * secArg[i+1];
4661 res[i+2] += value * secArg[i+2];
4662 res[i+3] += value * secArg[i+3];
4663 }
4664 #endif
4665 res[60] += value * secArg[60];
4666 }
4667 }
4668 }
4669
4670 } else {
4671 long loopBound = (vDim >> 2) << 2;
4672
4673 if (compressedIndex) {
4674 long currentXIndex = 0L;
4675 hyFloat * _hprestrict_ res = storage.theData;
4676
4677 for (long i = 0; i < hDim; i++) { // row in source
4678 while (currentXIndex < compressedIndex[i]) {
4679 long currentXColumn = compressedIndex[currentXIndex + hDim];
4680 // go into the second matrix and look up all the non-zero entries in the currentXColumn row
4681
4682 hyFloat value = theData[currentXIndex];
4683 hyFloat * _hprestrict_ secArg = secondArg.theData + currentXColumn*vDim;
4684 #ifdef _SLKP_USE_AVX_INTRINSICS
4685 __m256d value_op = _mm256_set1_pd (value);
4686 #endif
4687 #ifdef _SLKP_USE_ARM_NEON
4688 float64x2_t value_op = vdupq_n_f64 (value);
4689 #endif
4690 for (unsigned long i = 0UL; i < loopBound; i+=4) {
4691 #ifdef _SLKP_USE_AVX_INTRINSICS
4692 #ifdef _SLKP_USE_FMA3_INTRINSICS
4693 _mm256_storeu_pd (res+i, _mm256_fmadd_pd (value_op, _mm256_loadu_pd (secArg+i),_mm256_loadu_pd(res+i)));
4694 #else
4695 _mm256_storeu_pd (res+i, _mm256_add_pd (_mm256_loadu_pd(res+i), _mm256_mul_pd(value_op, _mm256_loadu_pd (secArg+i))));
4696 #endif
4697 #elif defined _SLKP_USE_ARM_NEON
4698 vst1q_f64 (res+i, vfmaq_f64 (vld1q_f64(res+i), value_op, vld1q_f64 (secArg+i)));
4699 vst1q_f64 (res+i+2, vfmaq_f64 (vld1q_f64(res+i+2), value_op, vld1q_f64 (secArg+i+2)));
4700 #else
4701 res[i] += value * secArg[i];
4702 res[i+1] += value * secArg[i+1];
4703 res[i+2] += value * secArg[i+2];
4704 res[i+3] += value * secArg[i+3];
4705 #endif
4706 }
4707 for (unsigned long i = loopBound; i < vDim; i++) {
4708 res[i] += value * secArg[i];
4709 }
4710 currentXIndex ++;
4711 }
4712 res += vDim;
4713
4714 }
4715 } else {
4716 for (unsigned long k=0UL; k<lDim; k++) { // loop over entries in the sparse matrix
4717 long m = theIndex[k];
4718 if (m != -1L ) { // non-zero
4719 long i = ((unsigned long)m)%vDim;
4720 // this element will contribute to (r, c' = [0..vDim-1]) entries in the result matrix
4721 // in the form of A_rc * B_cc'
4722
4723 hyFloat value = theData[k];
4724 hyFloat *_hprestrict_ res = storage.theData + (m-i);
4725 hyFloat *_hprestrict_ secArg = secondArg.theData + i*vDim;
4726 #ifdef _SLKP_USE_AVX_INTRINSICS
4727 __m256d value_op = _mm256_set1_pd (value);
4728 #endif
4729 #ifdef _SLKP_USE_ARM_NEON
4730 float64x2_t value_op = vdupq_n_f64 (value);
4731 #endif
4732
4733 for (unsigned long i = 0UL; i < loopBound; i+=4) {
4734 #ifdef _SLKP_USE_AVX_INTRINSICS
4735 #ifdef _SLKP_USE_FMA3_INTRINSICS
4736 _mm256_storeu_pd (res+i, _mm256_fmadd_pd (value_op, _mm256_loadu_pd (secArg+i),_mm256_loadu_pd(res+i)));
4737 #else
4738 _mm256_storeu_pd (res+i, _mm256_add_pd (_mm256_loadu_pd(res+i), _mm256_mul_pd(value_op, _mm256_loadu_pd (secArg+i))));
4739 #endif
4740 #elif defined _SLKP_USE_ARM_NEON
4741 vst1q_f64 (res+i, vfmaq_f64 (vld1q_f64(res+i), value_op, vld1q_f64 (secArg+i)));
4742 vst1q_f64 (res+i+2, vfmaq_f64 (vld1q_f64(res+i+2), value_op, vld1q_f64 (secArg+i+2)));
4743 #else
4744 res[i] += value * secArg[i];
4745 res[i+1] += value * secArg[i+1];
4746 res[i+2] += value * secArg[i+2];
4747 res[i+3] += value * secArg[i+3];
4748 #endif
4749
4750 }
4751 for (unsigned long i = loopBound; i < vDim; i++) {
4752 res[i] += value * secArg[i];
4753 }
4754
4755 }
4756 }
4757 }
4758 } // special codon case
4759 } else {
4760 for (long k=0; k<lDim; k++) {
4761 long m = theIndex[k];
4762 if (m!=-1) {
4763 long i = ((unsigned long)m)/vDim;
4764 long j = m - i*vDim;
4765 hyFloat c = theData[k];
4766 hyFloat* stData = storage.theData+i*secondArg.vDim,
4767 * secArgData=secondArg.theData+j*secondArg.vDim,
4768 * stopper = secArgData+secondArg.vDim;
4769 for (; secArgData!=stopper; stData++,secArgData++) {
4770 *stData+=c**secArgData;
4771 }
4772 }
4773 }
4774 }
4775 } else { // polynomial entries
4776 for (long k=0; k<lDim; k++) {
4777 if (IsNonEmpty(k)) {
4778 long i = theIndex[k]/vDim;
4779 long j = theIndex[k]%vDim;
4780 _MathObject* p = GetMatrixObject(k);
4781 for (long l=j*secondArg.vDim, m=i*secondArg.vDim; l<(j+1)*secondArg.vDim; l++,m++) {
4782 tempP = secondArg.GetMatrixObject (l);
4783 if (!tempP) {
4784 continue;
4785 }
4786 _MathObject* temp = p->Mult (secondArg.GetMatrixObject (l));
4787 tempP = storage.GetMatrixObject(m);
4788 if (tempP) {
4789 storage.StoreObject (m, tempP->Add (temp));
4790 } else {
4791 storage.StoreObject (m, temp, true);
4792 }
4793 DeleteObject (temp);
4794 }
4795 }
4796 }
4797 }
4798
4799 } else if ( !theIndex && secondArg.theIndex)
4800 // non-sparse multiplied by sparse
4801 {
4802 if ( storageType == 1 && secondArg.storageType ==1) {
4803 if (vDim == hDim && secondArg.vDim==secondArg.hDim)
4804 // both are square matrices
4805 {
4806 for (long k=0; k<secondArg.lDim; k++) {
4807 long m = secondArg.theIndex[k];
4808 if (m!=-1) { // a non-zero value
4809 // because r_ij = sum_k a_ik * b_kj
4810 // a non-zero b_kj will contribute a_ik * b_kj to the a_ij cell of the result
4811 // loop over i...
4812
4813 hyFloat c = secondArg.theData[k];
4814
4815 for (long cell = m%secondArg.vDim, secondCell = m/secondArg.vDim; cell < lDim; cell += vDim, secondCell += vDim) {
4816 storage.theData[cell] += c * theData[secondCell];
4817 }
4818
4819 }
4820 }
4821 } else {
4822 for (long k=0; k<secondArg.lDim; k++) {
4823 long m = secondArg.theIndex[k];
4824 if (m!=-1) {
4825 long i = m/secondArg.vDim;
4826 long j = m%secondArg.vDim;
4827 hyFloat c = secondArg.theData[k];
4828 hyFloat *stData = storage.theData+j,
4829 *secData = theData+i,
4830 *stopper = theData+lDim;
4831 for (; secData<stopper; secData+=vDim, stData+=secondArg.vDim) {
4832 *stData += c**secData;
4833 }
4834 }
4835 }
4836 }
4837 } else { // polynomial entries
4838 for (long k=0; k<secondArg.lDim; k++) {
4839
4840 if (secondArg.IsNonEmpty(k)) {
4841 long i = secondArg.theIndex[k]/secondArg.vDim;
4842 long j = secondArg.theIndex[k]%secondArg.vDim;
4843 _MathObject* p = secondArg.GetMatrixObject(k);
4844 for (long l=i, m=j; l<lDim; l+=vDim,m+=secondArg.vDim) {
4845 tempP = GetMatrixObject (l);
4846 if (!tempP) {
4847 continue;
4848 }
4849 _MathObject* temp = p->Mult (tempP);
4850 tempP = storage.GetMatrixObject(m);
4851 if (tempP) {
4852 storage.StoreObject (m, tempP->Add (temp));
4853 } else {
4854 storage.StoreObject (m, temp, true);
4855 }
4856 DeleteObject (temp);
4857 }
4858 }
4859 }
4860
4861 }
4862 } else {
4863 //sparse by sparse
4864
4865 /**
4866 X Y where both X and Y are sparse can be multipled more efficiently than O (N^3)
4867 if cell (i,j) is non-zero in X, it will contribute to cells (i,k) in the result matrix,
4868 it will contribute to the cells of the result matrix (i,k) where k is such that there are non-zero entries in thh k-th row of matrix Y
4869 */
4870
4871 if (compressedIndex && secondArg.compressedIndex) {
4872 long currentXIndex = 0,
4873 storageIndex = 0;
4874
4875 for (long i = 0; i < hDim; i++) { // row in source
4876
4877 while (currentXIndex < compressedIndex[i]) {
4878 long currentXColumn = compressedIndex[currentXIndex + hDim];
4879 // go into the second matrix and look up all the non-zero entries in the currentXColumn row
4880 hyFloat c = theData[currentXIndex];
4881 long from = currentXColumn ? secondArg.compressedIndex[currentXColumn-1] : 0,
4882 to = secondArg.compressedIndex[currentXColumn];
4883 for (long secondIndex = from; secondIndex < to; secondIndex ++) {
4884 storage.theData[storageIndex + secondArg.compressedIndex[secondIndex + secondArg.hDim]] += c*secondArg.theData[secondIndex];
4885 }
4886 currentXIndex ++;
4887 }
4888
4889 storageIndex += secondArg.vDim;
4890
4891 }
4892 } else {
4893
4894 long * indexVector = (long*)alloca ( sizeof(long)*secondArg.hDim);
4895 // how many non-zero elements are there in the i-th row of matrix Y
4896 memset (indexVector,0,secondArg.hDim*sizeof(long));
4897
4898 long *indexTable,
4899 *indexTable2,
4900 indexTableColumnWidth = secondArg.vDim,
4901 indexTableDim = secondArg.hDim*indexTableColumnWidth;
4902
4903 indexTable = (long*)MatrixMemAllocate( sizeof(long)*indexTableDim);
4904 // element (i,j) of this matrix is the COLUMN index in which the j-th non-zero entry appears in row i of matrix Y
4905 indexTable2 = (long*)MatrixMemAllocate( sizeof(long)*indexTableDim);
4906 // element (i,j) of this matrix is the index (in theData) for the j-th non-zero entry appears in row i of matrix Y
4907
4908 memset (indexTable,0,indexTableDim*sizeof(long));
4909 memset (indexTable2,0,indexTableDim*sizeof(long));
4910
4911 if (is_numeric()) {
4912 // numeric
4913
4914 for (long i=0; i<secondArg.lDim; i++) {
4915 long elementIndex = secondArg.theIndex[i];
4916 if (__builtin_expect (elementIndex >= 0L,1)) {
4917 long k = elementIndex/secondArg.vDim;
4918 long j = k*secondArg.vDim+(indexVector[k]++);
4919 indexTable [j] = elementIndex % secondArg.vDim;
4920 indexTable2[j] = i;
4921 }
4922 }
4923 for (long k=0; k<lDim; k++) {
4924 long elementIndex = theIndex[k];
4925 if (__builtin_expect (elementIndex >= 0L,1)) {
4926 hyFloat c = theData[k];
4927 long i = elementIndex / vDim,
4928 j = elementIndex - i*vDim,
4929 n = j*indexTableColumnWidth,
4930 m = i*secondArg.vDim,
4931 nonZeroCount = indexVector[j];
4932
4933 for (long l=n; l<n+nonZeroCount; l++) {
4934 storage.theData[m+indexTable[l]] += c*secondArg.theData[indexTable2[l]];
4935 }
4936 }
4937 }
4938 } else { // polynomial entries
4939
4940 for (long i=0; i<secondArg.lDim; i++) {
4941 long elementIndex = secondArg.theIndex[i];
4942 if (IsNonEmpty(i)) {
4943 long k = elementIndex/secondArg.vDim;
4944 long j = k*secondArg.vDim+(indexVector[k]++);
4945 indexTable [j] = elementIndex%secondArg.vDim;
4946 indexTable2[j] = i;
4947 }
4948 }
4949 for (long k=0; k<lDim; k++) {
4950 if (IsNonEmpty(k)) {
4951 long i = theIndex[k]/vDim;
4952 long j = theIndex[k]%vDim;
4953 _MathObject* p = GetMatrixObject(k);
4954 long n = j*secondArg.vDim;
4955 long m = i*secondArg.vDim;
4956 for (long l=n; l<n+indexVector[j]; l++) {
4957 _MathObject* temp = p->Mult (secondArg.GetMatrixObject (indexTable2[l]));
4958 tempP = storage.GetMatrixObject(m+indexTable[l]%secondArg.vDim);
4959 if (tempP) {
4960 storage.StoreObject (m+indexTable[l]%secondArg.vDim, tempP->Add (temp));
4961 } else {
4962 storage.StoreObject (m+indexTable[l]%secondArg.vDim, temp, true);
4963 }
4964
4965 DeleteObject (temp);
4966 }
4967 }
4968 }
4969 }
4970
4971 MatrixMemFree( indexTable);
4972 MatrixMemFree( indexTable2);
4973 }
4974 }
4975
4976
4977 }
4978
4979 //_____________________________________________________________________________________________
4980
HashBack(long logicalIndex) const4981 long _Matrix::HashBack (long logicalIndex) const {
4982 // returns element's matrix index in the form vDim*(i-1)+j, where i, j are the matrix coordinates
4983 // given a buffer index
4984 return theIndex?theIndex [logicalIndex]:logicalIndex;
4985 }
4986
4987 //_____________________________________________________________________________________________
4988
MaxElement(char runMode,long * indexStore) const4989 hyFloat _Matrix::MaxElement (char runMode, long* indexStore) const {
4990 // returns matrix's largest abs value element
4991 if (storageType == _NUMERICAL_TYPE) {
4992 hyFloat max = 0.0,
4993 temp;
4994
4995 bool doAbsValue = runMode != 1 && runMode != 3,
4996 doMaxElement = runMode == 0 || runMode == 3;
4997
4998 if (doMaxElement) {
4999 max = -INFINITY;
5000 }
5001
5002 if (theIndex) {
5003 for (long i = 0; i<lDim; i++) {
5004 long k = theIndex[i];
5005 if (k != -1) {
5006 temp = theData[i];
5007 if (doAbsValue && temp<0.0) {
5008 temp = -temp;
5009 }
5010
5011 if (doMaxElement) {
5012 if (temp>max) {
5013 max = temp;
5014 if (indexStore) {
5015 *indexStore = k;
5016 }
5017 }
5018 } else {
5019 max += temp;
5020 }
5021 }
5022 }
5023 return max;
5024 } else {
5025
5026 if (doAbsValue) {
5027 if (doMaxElement) {
5028 for (long i = 0; i<lDim; i++) {
5029 hyFloat t = fabs(theData[i]);
5030 if (t > max) {
5031 max = t;
5032 if (indexStore) {
5033 *indexStore = i;
5034 }
5035 }
5036 }
5037 } else {
5038 for (long i = 0; i<lDim; i++) {
5039 max += fabs (theData[i]);
5040 }
5041 }
5042 } else {
5043 if (doMaxElement) {
5044 for (long i = 0; i<lDim; i++) {
5045 hyFloat t = theData[i];
5046 if (t > max) {
5047 max = t;
5048 if (indexStore) {
5049 *indexStore = i;
5050 }
5051 }
5052 }
5053 } else {
5054 for (long i = 0; i<lDim; i++) {
5055 max += theData[i];
5056 }
5057 }
5058 }
5059
5060 /*for (long i = 0; i<lDim; i++) {
5061 temp = theData[i];
5062 if (doAbsValue && temp<0.0) {
5063 temp = -temp;
5064 }
5065
5066 if (doMaxElement) {
5067 if (temp>max) {
5068 max = temp;
5069 if (indexStore) {
5070 *indexStore = i;
5071 }
5072 }
5073 } else {
5074 max += temp;
5075 }
5076 }*/
5077 return max;
5078 }
5079 }
5080 if (runMode) {
5081 return 0;
5082 }
5083
5084 return 10.0;
5085 }
5086
5087 //_____________________________________________________________________________________________
5088
RowAndColumnMax(hyFloat & r,hyFloat & c,hyFloat * cache)5089 void _Matrix::RowAndColumnMax (hyFloat& r, hyFloat &c, hyFloat * cache) {
5090
5091 // returns the maximum row sum / column sum
5092 // the cache must be big enough to hold hDim + vDim
5093 // leave as nil to allocate cache run time
5094 r = c = 10.;
5095
5096 if (is_numeric()) { // numeric matrix
5097 hyFloat *maxScratch = cache;
5098 r = c = 0.;
5099
5100 if (maxScratch == nil) {
5101 maxScratch = (hyFloat*)MemAllocate ((hDim+vDim)*sizeof(hyFloat), true);
5102 } else
5103 InitializeArray(maxScratch, hDim + vDim, 0.0);
5104
5105 hyFloat * rowMax = maxScratch,
5106 * colMax = maxScratch + hDim;
5107
5108 if (theIndex) {
5109 if (compressedIndex) {
5110 long from = 0L;
5111 for (long row = 0; row < hDim; row++) {
5112 for (long col = from; col < compressedIndex[row]; col++) {
5113 hyFloat temp = theData[col];
5114 if (temp > 0.0) {
5115 rowMax[row] += temp;
5116 colMax[compressedIndex[col+hDim]] += temp;
5117 } else {
5118 rowMax[row] -= temp;
5119 colMax[compressedIndex[col+hDim]] -= temp;
5120 }
5121 }
5122 from = compressedIndex[row];
5123 }
5124 } else {
5125 for (long i = 0; i<lDim; i++) {
5126 long k = theIndex[i];
5127 if (k!=-1) {
5128 hyFloat temp = theData[i];
5129
5130 if (temp>0.0) {
5131 rowMax[k/vDim] += temp;
5132 colMax[k%vDim] += temp;
5133 } else {
5134 rowMax[k/vDim] -= temp;
5135 colMax[k%vDim] -= temp;
5136 }
5137 }
5138 }
5139 }
5140 } else
5141 // dense matrix
5142 for (long i = 0, k=0; i<hDim; i++) {
5143 for (long j=0; j<vDim; j++, k++) {
5144 hyFloat temp = theData[k];
5145 if (temp<0.0) {
5146 rowMax[i] -= temp;
5147 colMax[j] -= temp;
5148 } else {
5149 rowMax[i] += temp;
5150 colMax[j] += temp;
5151 }
5152 }
5153 }
5154
5155 for (long i=0; i<hDim; i++) if (rowMax[i]>r) {
5156 r = rowMax [i];
5157 }
5158 for (long j=0; j<vDim; j++) if (colMax[j]>c) {
5159 c = colMax [j];
5160 }
5161
5162 if (!cache) {
5163 free(maxScratch);
5164 }
5165 }
5166 }
5167
5168 //_____________________________________________________________________________________________
5169
IsMaxElement(hyFloat bench)5170 bool _Matrix::IsMaxElement (hyFloat bench) {
5171 // returns matrix's largest abs value element
5172 if (is_numeric()) {
5173
5174 hyFloat mBench = -bench;
5175 if (!theIndex || compressedIndex) {
5176 for (long i = 0; i<lDim; i++) {
5177 hyFloat t = theData[i];
5178 if ( t>bench || t<mBench ) {
5179 return true;
5180 }
5181 }
5182 } else {
5183 for (long i = 0; i<lDim; i++) {
5184 if (theIndex [i] >= 0) {
5185 hyFloat t = theData[i];
5186 if ( t>bench || t<mBench ) {
5187 return true;
5188 }
5189 }
5190 }
5191
5192 }
5193 return false;
5194 } else if (storageType == 0) {
5195 _Polynomial ** pData = (_Polynomial **)theData;
5196 for (long i = 0; i<lDim; i++, pData++) {
5197 if ((*pData)->IsMaxElement(bench)) {
5198 return true;
5199 }
5200 }
5201 return false;
5202 }
5203 return true;
5204 }
5205
5206 //_____________________________________________________________________________________________
5207
MaxRelError(_Matrix & compMx)5208 hyFloat _Matrix::MaxRelError (_Matrix& compMx)
5209 // returns matrix's largest abs value element
5210 {
5211 if (storageType == 1) {
5212 hyFloat max = 0, temp;
5213 for (long i = 0; i<lDim; i++) {
5214 temp = theData[i]/compMx.theData[i];
5215 if (temp<0.0) {
5216 temp*=-1.0;
5217 }
5218 if (temp>max) {
5219 max = temp;
5220 }
5221 }
5222 return max;
5223 }
5224 return 10.0;
5225 }
5226
5227
5228 //_____________________________________________________________________________________________
5229
MinElement(char doAbsValue,long * storeIndex)5230 hyFloat _Matrix::MinElement (char doAbsValue, long* storeIndex)
5231 // returns matrix's smalles non-zero abs value element
5232 {
5233 if (storageType == 1) {
5234 hyFloat min = DBL_MAX;
5235
5236 if (theIndex)
5237 for (long i = 0; i<lDim; i++) {
5238 if (theIndex[i] < 0) {
5239 continue;
5240 }
5241
5242 hyFloat temp = theData[i];
5243
5244 if (temp < 0.0 && doAbsValue) {
5245 temp = -temp;
5246 }
5247
5248 if (temp<min) {
5249 if (storeIndex) {
5250 *storeIndex = theIndex[i];
5251 }
5252 min = temp;
5253 }
5254
5255 }
5256 else
5257 for (long i = 0; i<lDim; i++) {
5258 hyFloat temp = theData[i];
5259
5260 if (temp < 0.0 && doAbsValue) {
5261 temp = -temp;
5262 }
5263
5264 if (temp<min) {
5265 if (storeIndex) {
5266 *storeIndex = i;
5267 }
5268 min = temp;
5269 }
5270 }
5271
5272 return min;
5273 } else {
5274 return 1.0;
5275 }
5276 }
5277 //_____________________________________________________________________________________________
Transpose(void)5278 void _Matrix::Transpose (void)
5279 // transpose a matrix
5280 {
5281 if (storageType == 1) {
5282 if (hDim == vDim) { // do an in place swap
5283 if (!theIndex) { // non-sparse
5284 for (long i = 0; i<hDim; i++)
5285 for (long j = i+1; j<vDim; j++) {
5286 hyFloat z = theData[i*vDim+j];
5287 theData[i*vDim+j] = theData[j*vDim+i];
5288 theData[j*vDim+i] = z;
5289 }
5290 } else { // sparse
5291 for (long i = 0; i<lDim; i++) {
5292 long p = theIndex[i];
5293 if (p!=-1) {
5294 long k = p/vDim;
5295 long l = p - k*vDim;
5296
5297 if (l!=k) { // off - diag
5298 p = Hash (l,k);
5299 hyFloat z = theData[i];
5300 if (p>=0) {
5301 theData[i] = theData[p];
5302 theData[p] = z;
5303 } else {
5304 theIndex[i]=-1;
5305 (*this)[l*vDim+k]=z;
5306 }
5307 }
5308 }
5309 }
5310 }
5311 } else {
5312 _Matrix result (vDim, hDim, bool(theIndex), true);
5313 if (!theIndex) { // dense
5314 for (long i = 0; i<hDim; i++)
5315 for (long j = 0; j<vDim; j++) {
5316 result.theData[j*hDim+i]=theData[i*vDim+j];
5317 }
5318 } else {
5319 for (long i = 0; i<lDim; i++)
5320 if (IsNonEmpty(i)) {
5321 long r = theIndex[i]/vDim,
5322 c = theIndex[i]%vDim;
5323
5324 result[c*hDim+r]=theData[i];
5325 }
5326 }
5327 *this = result;
5328 }
5329 } else { // polynomial entries
5330 hyPointer z;
5331 if (hDim == vDim) {
5332 if (!theIndex) { // non-sparse
5333 for (long i = 0; i<hDim; i++)
5334 for (long j = i+1; j<vDim; j++) {
5335 if (storageType==2) {
5336 z = (hyPointer)GetFormula(i,j);
5337 } else {
5338 z = (hyPointer)GetMatrixObject(i*vDim+j);
5339 }
5340
5341 if (storageType==2) {
5342 ((_Formula**)theData)[i*vDim+j] = GetFormula(j,i);
5343 } else {
5344 ((HBLObjectRef*)theData)[i*vDim+j] = GetMatrixObject(j*vDim+i);
5345 }
5346
5347 ((hyPointer*)theData)[j*vDim+i] = z;
5348 }
5349 } else { // sparse
5350 long i,k,l,p;
5351 for (i = 0; i<lDim; i++) {
5352 if (IsNonEmpty(i)) {
5353 k = theIndex[i]/vDim;
5354 l = theIndex[i]%vDim;
5355 if (l!=k) {
5356 p = Hash (l,k);
5357
5358 if (storageType==2) {
5359 z = (hyPointer)GetFormula(k,l);
5360 } else {
5361 z = (hyPointer)GetMatrixObject(i);
5362 }
5363
5364 if (p>=0) {
5365 if (storageType==2) {
5366 ((_Formula**)theData)[i] = GetFormula(l,k);
5367 } else {
5368 ((_MathObject**)theData)[i] = GetMatrixObject(p);
5369 }
5370
5371 ((hyPointer*)theData)[p] = z;
5372 } else {
5373 theIndex[i]=-1;
5374 if (storageType==2) {
5375 StoreFormula(l,k,*(_Formula*)z,false,false);
5376 } else {
5377 StoreObject(l*vDim+k,(HBLObjectRef)z);
5378 }
5379 }
5380 }
5381 }
5382 }
5383 }
5384 } else {
5385
5386 _Matrix result;
5387 CreateMatrix (&result,vDim, hDim, bool(theIndex),false,storageType==2);
5388 result.storageType = storageType;
5389 if (!theIndex) {
5390 for (long i = 0; i<hDim; i++)
5391 for (long j = 0; j<vDim; j++) {
5392 if (storageType == 2) {
5393 result.StoreFormula(j,i,*GetFormula(i,j),true,false);
5394 } else {
5395 z = (hyPointer)GetMatrixObject (i*vDim+j);
5396 result.StoreObject(j*hDim+i,(HBLObjectRef)z);
5397 ((HBLObjectRef)z)->AddAReference();
5398 }
5399 }
5400 } else {
5401 long r,c;
5402 for (long i = 0; i<lDim; i++)
5403 if (IsNonEmpty(i)) {
5404 r = theIndex[i]/vDim;
5405 c = theIndex[i]%vDim;
5406 if (storageType == 2) {
5407 result.StoreFormula(c,r,*GetFormula(r,c),true,false);
5408 } else {
5409 z = (hyPointer)GetMatrixObject (i);
5410 result.StoreObject(c*hDim+r,(HBLObjectRef)z);
5411 ((HBLObjectRef)z)->AddAReference();
5412 }
5413 }
5414 }
5415 Swap (result);
5416 //*this = result;
5417 }
5418 }
5419 }
5420
5421 //_____________________________________________________________________________________________
5422
CompressSparseMatrix(bool transpose,hyFloat * stash)5423 void _Matrix::CompressSparseMatrix (bool transpose, hyFloat * stash) {
5424 /**
5425 The purpose of this function is to "pack" the elements of a sparse matrix into an index array (theIndex) so that
5426 (1) there are no "unused" elements in the middle
5427 (2) the elements in theIndex are in the same order as they are in the matrix (row by row)
5428
5429 Optonally, the packe matrix can be transposed.
5430 */
5431
5432
5433 if (theIndex) {
5434
5435 if (compressedIndex && transpose) {
5436 long ai = 0;
5437 long from = 0L;
5438 memcpy (stash, theData, sizeof (hyFloat)*lDim);
5439 long * by_column_counts = (long*)alloca ((hDim + lDim) * sizeof (long));
5440
5441 memset (by_column_counts, 0, hDim * sizeof (long));
5442
5443 // pass 1 : count records by column
5444 long * stored_by_col = (long*)alloca (hDim * sizeof (long));
5445 memset (stored_by_col, 0, hDim * sizeof (long));
5446
5447 for (long r = 0L; r < hDim; r++) {
5448 for (long c = from; c < compressedIndex[r]; c++) {
5449 by_column_counts[compressedIndex[hDim+c]] ++;
5450 }
5451 from = compressedIndex[r];
5452 }
5453
5454
5455 for (long r = 1L; r < hDim; r++) {
5456 by_column_counts[r] += by_column_counts[r-1];
5457 //printf ("%ld %ld (%ld)\n", r, by_column_counts[r], compressedIndex[r]);
5458 }
5459
5460 // pass 2 : invert indices
5461 from = 0L;
5462
5463
5464 for (long r = 0L; r < hDim; r++) {
5465 for (long c = from; c < compressedIndex[r]; c++) {
5466 long my_col = compressedIndex[c+hDim];
5467 // (i,j) => (j,i)
5468 long new_index = my_col * hDim + r;
5469 long col_offset = my_col ? by_column_counts[my_col-1] : 0;
5470 long new_compressed_index = stored_by_col[my_col] + col_offset;
5471 //printf ("%ld => %ld (%ld, %ld)\n", c, new_compressed_index,r*hDim + my_col, new_index);
5472 theIndex[new_compressed_index] = new_index;
5473 theData[new_compressed_index] = stash[c];
5474 by_column_counts[hDim + new_compressed_index] = r;
5475 stored_by_col[my_col]++;
5476 }
5477 from = compressedIndex[r];
5478 }
5479
5480
5481
5482 memcpy (compressedIndex, by_column_counts, sizeof (long)*(lDim+hDim));
5483 //_validateCompressedStorage();
5484
5485 /*for (long r = 0; r < hDim; r++) {
5486 long trow = r;
5487 for (long c = from; c < compressedIndex[r]; c++) {
5488 long tcol = compressedIndex[hDim+c];
5489 long transposed_index = tcol * vDim + trow;
5490 stash[ai] = theData[c];
5491 //sortedIndex.list_data[ai] = transposed_index;
5492 //sortedIndex3.list_data[ai] = transposed_index;
5493 ai++;
5494 //if (max < transposed_index) max = transposed_index;
5495 }
5496 from = compressedIndex[r];
5497 }*/
5498 return;
5499 }
5500
5501 _SimpleList sortedIndex ((unsigned long)lDim, (long*)alloca (lDim * sizeof (long))),
5502 sortedIndex3 ((unsigned long)lDim, (long*)alloca (lDim * sizeof (long))),
5503 sortedIndex2;
5504
5505
5506 /*const long blockChunk = 32,
5507 blockShift = hDim / blockChunk + 1*/
5508
5509 long max = 0,
5510 secondDim = transpose ? hDim : vDim,
5511 firstDim = transpose ? vDim : hDim;
5512
5513
5514 if (transpose) {
5515 long ai = 0L;
5516
5517 for (long i2=0; i2<lDim; i2++) {
5518 long k = theIndex[i2];
5519 if (__builtin_expect (k!=-1,1)) {
5520 long trow = k/vDim, tcol = k - trow*vDim;
5521 long transposed_index = tcol * vDim + trow;
5522 stash[ai] = theData[i2];
5523 sortedIndex.list_data[ai] = transposed_index;
5524 sortedIndex3.list_data[ai] = transposed_index;
5525 ai++;
5526 if (max < transposed_index) max = transposed_index;
5527 }
5528 }
5529
5530 sortedIndex3.lLength = ai;
5531 sortedIndex.lLength = ai;
5532 } else {
5533 for (long i2=0; i2<lDim; i2++) {
5534 long k = theIndex[i2];
5535 if (__builtin_expect (k!=-1,1)) {
5536 stash[sortedIndex.lLength] = theData[i2];
5537 sortedIndex << k;
5538 sortedIndex3 << k;
5539 if (max < k) max = k;
5540 }
5541 }
5542 }
5543
5544
5545 //printf ("_Matrix::CompressSparseMatrix %d\n%s\n%ld\n", transpose, _String ((_String*)sortedIndex3.toStr()).get_str(),max);
5546
5547 /*if (max > (lDim<<4)) {
5548 sortedIndex2. Populate(sortedIndex.lLength,0,1);
5549 SortLists(&sortedIndex3,&sortedIndex2);
5550 } else {*/
5551 sortedIndex3.CountingSort(max+1, &sortedIndex2,false);
5552 //}
5553
5554 lDim = sortedIndex.lLength;
5555
5556 /*for (long i=0; i<sortedIndex.lLength; i++) {
5557 //printf ("%ld %ld", i, theIndex[i]);
5558 theIndex[i] = sortedIndex.list_data[sortedIndex2.list_data[i]];
5559 theData[i] = stash[sortedIndex2.list_data[i]];
5560 }*/
5561
5562 if (compressedIndex) {
5563 MatrixMemFree(compressedIndex);
5564 }
5565
5566 compressedIndex = (long*) MatrixMemAllocate((lDim + firstDim) * sizeof (long));
5567
5568 long currentRow = 0;
5569 lDim = sortedIndex.lLength;
5570
5571 for (long i=0; i<sortedIndex.lLength; i++) {
5572 //printf ("%ld %ld", i, theIndex[i]);
5573 long entryIndex = sortedIndex.list_data[sortedIndex2.list_data[i]];
5574 theIndex[i] = entryIndex;
5575
5576 long indexRow = entryIndex / secondDim,
5577 indexColumn = entryIndex - indexRow * secondDim;
5578
5579 //printf ("[%ld] %ld %ld %ld\n", i, indexRow, indexColumn, currentRow);
5580
5581 compressedIndex[i + firstDim] = indexColumn;
5582 if (indexRow > currentRow) {
5583 for (long l = currentRow; l < indexRow; l++) {
5584 compressedIndex[l] = i;
5585 }
5586 //printf (">[%ld] %ld\n", currentRow, compressedIndex[currentRow]);
5587 currentRow = indexRow;
5588 } /*else {
5589 if (currentRow > indexRow) {
5590 printf ("\n\n\nBARF\n\n\n");
5591 }
5592 }*/
5593
5594 //printf (" %ld\n", theIndex[i]);
5595 theData[i] = stash[sortedIndex2.list_data[i]];
5596 }
5597
5598 for (long l = currentRow; l < firstDim; l++)
5599 compressedIndex[l] = lDim;
5600 }
5601 }
5602
5603
5604 //_____________________________________________________________________________________________
5605
Exponentiate(hyFloat scale_to,bool check_transition,_Matrix * existing_storage)5606 _Matrix* _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Matrix * existing_storage) {
5607 // find the maximal elements of the matrix
5608
5609
5610 try {
5611 if (!is_square()) {
5612 throw _String ("Exponentiate is not defined for non-square matrices");
5613 }
5614
5615 long i,
5616 power2 = 0L;
5617
5618 #ifndef _OPENMP
5619 matrix_exp_count++;
5620 #endif
5621
5622 hyFloat max = 1.0,
5623 *stash,
5624 *stash2 = 0;
5625 // = new hyFloat[hDim*(1+vDim)];
5626
5627 if (!is_polynomial()) {
5628 stash = (hyFloat*)alloca(sizeof (hyFloat) * hDim*(1+vDim));
5629 if (theIndex) {
5630 // transpose sparse matrix
5631 CompressSparseMatrix (true,stash);
5632 }
5633
5634 hyFloat t;
5635 //bool censor = false;
5636 RowAndColumnMax (max, t, stash);
5637 /*if (t > 10000. || max > 10000.) {
5638 censor = true;
5639 t = MIN (t,10000.);
5640 max = MIN (max, 10000.);
5641 }*/
5642 max *= t;
5643 //max = MaxElement();
5644 //max = max * max;
5645 if (max > .1) {
5646 max = scale_to*sqrt (10.*max);
5647 power2 = (long)((log (max)/_log2))+1L;
5648 max = exp (power2 * _log2);
5649 stash2 = (hyFloat*)alloca(sizeof (hyFloat) * lDim);
5650 memcpy (stash2, theData, sizeof (hyFloat) * lDim);
5651 /*if (censor) {
5652 for (long i = 0; i < lDim; i++) {
5653 if (theData[i] < -10000.) {
5654 theData[i] = -10000.;
5655 } else {
5656 if (theData[i] > 10000.) {
5657 theData[i] = 10000;
5658 }
5659 }
5660 }
5661 }*/
5662 (*this) *= 1.0/max;
5663 } else {
5664 power2 = 0;
5665 }
5666
5667
5668 } else {
5669 max = 1.;
5670 }
5671
5672 _Matrix *result;
5673
5674 if (!is_polynomial() && existing_storage && existing_storage->hDim == hDim && existing_storage->vDim == vDim && existing_storage->is_numeric() && existing_storage->is_dense()) {
5675 result = existing_storage;
5676 InitializeArray(result->theData, result->lDim, 0.0);
5677 } else {
5678 result = new _Matrix(hDim, vDim , is_polynomial(), !is_polynomial());
5679 }
5680
5681
5682 // put ones on the diagonal
5683
5684 if (!is_polynomial()) {
5685 long step = vDim + 1;
5686 for (long diag = 0; diag < result->lDim; diag += step) {
5687 result->theData[diag] = 1.;
5688 }
5689 } else {
5690 for (i=0; i<(*result).hDim*(*result).vDim; i+=vDim+1) {
5691 (*result).StoreObject(i,new _Polynomial (1.),false);
5692 }
5693 }
5694
5695 if (max == 0.0) {
5696 return result;
5697 }
5698
5699 (*result) += (*this);
5700
5701 i = 2;
5702
5703 if (precisionArg || is_polynomial()) {
5704 _Matrix temp (*this);
5705
5706 if (!is_polynomial()) {
5707 for (; i<=precisionArg; i++) {
5708 temp *= (*this);
5709 temp *= 1.0/i;
5710 (*result) += temp;
5711 }
5712 }
5713 else {
5714 while (temp.IsMaxElement (polynomialExpPrecision)) {
5715 if (i>maxPolynomialExpIterates) {
5716 break;
5717 }
5718 temp *= (*this);
5719 temp *= 1.0/i;
5720 (*result) += temp;
5721 i++;
5722 }
5723 if (i>maxPolynomialExpIterates) {
5724 _String wM ("Polynomial Matrix Exponential Failed to achieve accuracy POLYNOMIAL_EXP_PRECISION in under MAX_POLYNOMIAL_EXP_ITERATES. Either decrease the precision, or increase the maximum number of iterates.");
5725 ReportWarning (wM);
5726 }
5727 }
5728 } else {
5729 hyFloat tMax = MAX(MinElement()*sqrt ((hyFloat)hDim),truncPrecision);
5730
5731 i=2;
5732
5733
5734 if (is_dense()) { // avoid matrix allocation
5735 _Matrix temp ;
5736 temp.hDim = hDim;
5737 temp.vDim = vDim;
5738 temp.lDim = lDim;
5739 temp.theData = (hyFloat*)alloca(sizeof (hyFloat) * hDim*vDim);
5740 memcpy (temp.theData, theData, sizeof (hyFloat) * hDim*vDim);
5741
5742 _Matrix tempS;
5743 tempS.hDim = hDim;
5744 tempS.vDim = vDim;
5745 tempS.lDim = lDim;
5746 tempS.theData = stash;
5747 // zero out the stash TODO: 20200929 : is this necessary?
5748 memset (stash, 0, sizeof (hyFloat)*lDim);
5749 do {
5750 temp.MultbyS (*this,false, &tempS, nil);
5751 // after this call, temp and tempS are gonna swap pointers to theData
5752 temp *= 1.0/i;
5753 (*result) += temp;
5754 i ++;
5755 /*#ifndef _OPENMP
5756 taylor_terms_count++;
5757 #else
5758 #pragma omp atomic
5759 taylor_terms_count++;
5760 #endif*/
5761 } while (temp.IsMaxElement(tMax*truncPrecision*i));
5762
5763
5764 if (tempS.theData != stash) { // need to copy data to tempS
5765 Exchange (tempS.theData, temp.theData);
5766 memcpy (temp.theData, tempS.theData, lDim * sizeof (hyFloat));
5767 }
5768 tempS.theData = nil;
5769 temp.theData = nil;
5770
5771 } else {
5772 _Matrix temp (*this);
5773 _Matrix tempS (hDim, vDim, false, temp.storageType);
5774 do {
5775 temp.MultbyS (*this,theIndex!=nil, &tempS, stash);
5776 temp *= 1.0/i;
5777 (*result) += temp;
5778 i ++;
5779 /*#ifndef _OPENMP
5780 taylor_terms_count++;
5781 #else
5782 #pragma omp atomic
5783 taylor_terms_count++;
5784 #endif*/
5785 } while (temp.IsMaxElement(tMax*truncPrecision*i));
5786 }
5787
5788 // use Pade (4,4) here
5789
5790 /*_Matrix temp (*this), top (result) , bottom (result);
5791 temp *= .5;
5792 top+=temp;
5793 bottom-=temp;
5794 temp *= *this;
5795 temp *= 3.0/14.0;
5796 top+=temp;
5797 bottom+=temp;
5798 temp *= *this;
5799 temp *= 1.0/9.0;
5800 top+=temp;
5801 bottom-=temp;
5802 temp *= 1.0/20.0;
5803 top+=temp;
5804 bottom+=temp;
5805 _Matrix* inv = (_Matrix*)bottom.Inverse();
5806 top *= *inv;
5807 DeleteObject (inv);
5808 result = top;*/
5809 }
5810
5811 if (power2) {
5812 //(*this)*=max;
5813 memcpy(theData, stash2, sizeof (hyFloat) * lDim);
5814
5815 }
5816
5817 if (theIndex) {
5818 // transpose back
5819 if (compressedIndex) {
5820 long from = 0L, i = 0;
5821 for (long r = 0; r < hDim; r++) {
5822 #pragma GCC unroll 4
5823 #pragma clang loop vectorize(enable)
5824 #pragma clang loop interleave(enable)
5825 #pragma clang loop unroll(enable)
5826 for (long c = from; c < compressedIndex[r]; c++, i++) {
5827 theIndex[i] = compressedIndex[c+hDim] * vDim + r;
5828 }
5829 from = compressedIndex[r];
5830 }
5831 MatrixMemFree(compressedIndex);
5832 compressedIndex = nil;
5833 } else {
5834 for (i=0; i<lDim; i++) {
5835 long k = theIndex[i];
5836 if (k!=-1) {
5837 long div = k / vDim;
5838 theIndex[i] = (k - div * vDim)*vDim + div;
5839 }
5840 }
5841 }
5842 result->Transpose();
5843 }
5844
5845
5846 //_Matrix stash_mx (*result);
5847
5848 for (long s = 0; s<power2; s++) {
5849 /*#ifndef _OPENMP
5850 squarings_count++;
5851 #else
5852 #pragma omp atomic
5853 squarings_count++;
5854 #endif*/
5855 /* for (i = 0; i < hDim; i++) {
5856 if ((*result)(i,i) > 1.) {
5857 printf ("\n%ld\n", s);
5858 ObjectToConsole(result);
5859 NLToConsole();
5860 ObjectToConsole(&stash_mx);
5861 NLToConsole();
5862 _Matrix cp = *this;
5863 cp.CheckIfSparseEnough(true);
5864 ObjectToConsole(&cp);
5865 abort();
5866 }
5867 }*/
5868
5869 /*if (squarings_count == 3606) {
5870 for (long i = 0, c= 0; i < hDim; i++) {
5871 for (long j = 0; j < vDim; j++,c++) {
5872 fprintf (stderr, "%ld\t%ld\t%16.12g\n", i,j, result->theData[c]);
5873 }
5874 }
5875 }*/
5876 hyFloat maxDiff = result->Sqr(stash);
5877 /*if (squarings_count == 3606) {
5878 fprintf (stderr, "\n\n%16.12g\n\n", maxDiff);
5879 }*/
5880 if (maxDiff < DBL_EPSILON * 1.e3) {
5881 break;
5882 }
5883 }
5884
5885
5886 if (check_transition) {
5887 bool pass = true;
5888 if (result->is_dense()) {
5889 for (unsigned long r = 0L; r < result->lDim; r += result->vDim) {
5890 if (result->theData[r] > 1.) {
5891 pass = false;
5892 break;
5893 }
5894 }
5895 } else {
5896 for (unsigned long r = 0L; r < result->hDim; r ++) {
5897 if ((*result)(r,r) > 1.) {
5898 pass = false;
5899 break;
5900 }
5901 }
5902 }
5903 if (!pass) {
5904 if (scale_to < 1.e100) {
5905 DeleteObject (result);
5906 return this->Exponentiate(scale_to * 100, true);
5907 }
5908
5909 /*printf ("SCALE %lg : \n", scale_to);
5910
5911 for (unsigned long r = 0L; r < hDim; r ++) {
5912 hyFloat sum = 0.;
5913 printf ("%ld %18.16lg %18.16lg\n", r, (*result)(r,r), (*result)(r,r) - 1.);
5914 for (unsigned long c = 0L; c < vDim; c++) {
5915 sum += (*this)(r,c);
5916 }
5917 printf ("%ld %g\n", r, sum);
5918 }
5919
5920
5921 ObjectToConsole(this);
5922 ObjectToConsole(result);*/
5923
5924 throw _String ("Failed to compute a valid transition matrix; this is usually caused by ill-conditioned rate matrices (e.g. very large rate values)");
5925 }
5926 }
5927
5928 return result;
5929 }
5930 catch (const _String& e) {
5931 HandleApplicationError(e);
5932 }
5933
5934 return new _Matrix;
5935
5936 }
5937
5938 //_____________________________________________________________________________________________
5939
SetupSparseMatrixAllocations(void)5940 void _Matrix::SetupSparseMatrixAllocations (void) {
5941 overflowBuffer = hDim*storageIncrement/100;
5942 bufferPerRow = MAX (1, (lDim-overflowBuffer)/hDim);
5943 overflowBuffer = lDim-bufferPerRow*hDim;
5944 allocationBlock = hDim*vDim*storageIncrement/100+1;
5945 }
5946
5947 //_____________________________________________________________________________________________
5948
Hash(long i,long j) const5949 long _Matrix::Hash (long i, long j) const {
5950 // returns element's position in the buffer (-1 if not found)
5951
5952 if (is_dense()) {
5953 return i*vDim+j;
5954 }
5955 // ordinary matrix
5956
5957 if (compressedIndex) {
5958 for (long c = i ? compressedIndex[i-1] : 0; c < compressedIndex[i]; c++) {
5959 if (compressedIndex[hDim+c] == j) return c;
5960 }
5961 return -1;
5962 }
5963
5964 long elementIndex = i*vDim+j,
5965 m = i*bufferPerRow;
5966
5967 for (long allocation_block_index = 0L; allocation_block_index < lDim; allocation_block_index += allocationBlock, m += allocationBlock) {
5968 // look within the row for this allocation block
5969 for (long l = m; l < m + bufferPerRow; l++) {
5970 long try_me = theIndex[l];
5971 if (try_me != elementIndex) {
5972 if (try_me == -1) return -l - 2;
5973 } else return l;
5974 }
5975 // if not found, look in the overflow are for this block
5976 long upper_bound = MIN (lDim, allocation_block_index + allocationBlock);
5977 for (long n = upper_bound - overflowBuffer; n < upper_bound; n++) {
5978 long try_me = theIndex[n];
5979 if (try_me != elementIndex) {
5980 if (try_me == -1) return -n - 2;
5981 } else return n;
5982 }
5983 }
5984 return -1;
5985
5986
5987
5988 /*for (long blockIndex = 0; blockIndex<lDim/allocationBlock; blockIndex++,m+=allocationBlock) {
5989 for (long l=m; l<m+bufferPerRow; l++) {
5990 long p = theIndex[l];
5991 if (p!=elementIndex) {
5992 if (p==-1) {
5993 return -l-2;
5994 }
5995 } else {
5996 return l;
5997 }
5998 }
5999 long n = (blockIndex+1)*allocationBlock-1;
6000 for (long l = n; l>n-overflowBuffer; l--) {
6001 long p = theIndex[l];
6002 if (p!=elementIndex) {
6003 if (p==-1) {
6004 return -l-2;
6005 }
6006 } else {
6007 return l;
6008 }
6009 }
6010 }*/
6011 //return -1;
6012 }
6013
6014 //_____________________________________________________________________________________________
operator ()(long i,long j) const6015 hyFloat _Matrix::operator () (long i, long j) const {
6016 long lIndex = Hash (i,j);
6017 if (lIndex<0) {
6018 return ZEROOBJECT;
6019 } else {
6020 return theData[lIndex];
6021 }
6022 }
6023
6024 //_____________________________________________________________________________________________
ExtractElementsByEnumeration(_SimpleList * h,_SimpleList * v,bool column)6025 _Matrix* _Matrix::ExtractElementsByEnumeration (_SimpleList*h, _SimpleList*v, bool column) // extract by row
6026 {
6027 if (storageType && h->lLength == v->lLength && h->lLength > 0) {
6028 _Matrix * result = new _Matrix (column?h->lLength:1,column?1:h->lLength,false,true);
6029
6030 if (storageType == 2) // formulae
6031 for (long k=0; k<h->lLength; k++) {
6032 result->StoreFormula(column?k:0,column?0:k,*GetFormula(h->list_data[k],v->list_data[k]));
6033 }
6034 else
6035 for (long k=0; k<h->lLength; k++) {
6036 result->theData[k] = (*this)(h->list_data[k],v->list_data[k]);
6037 }
6038
6039 return result;
6040 }
6041 return new _Matrix;
6042 }
6043
6044
6045
6046 //_____________________________________________________________________________________________
MAccess(HBLObjectRef p,HBLObjectRef p2,HBLObjectRef cache)6047 HBLObjectRef _Matrix::MAccess (HBLObjectRef p, HBLObjectRef p2, HBLObjectRef cache) {
6048 if (!p) {
6049 HandleApplicationError ( kErrorStringInvalidMatrixIndex );
6050 return _returnConstantOrUseCache (0., cache);
6051 }
6052
6053 if (hDim <= 0L || vDim <= 0L) {
6054 return _returnConstantOrUseCache (0., cache);
6055 }
6056
6057 if (p->ObjectClass() == MATRIX) {
6058 if (p2 == nil) {
6059 _Matrix * nn = (_Matrix*)p;
6060 if (nn->storageType == 1) {
6061 if (nn->hDim == hDim && nn->vDim == vDim) {
6062 _SimpleList hL,
6063 vL;
6064
6065 for (long r=0; r<hDim; r++)
6066 for (long c=0; c<vDim; c++)
6067 if ((*nn)(r,c) > 0.0) {
6068 hL << r;
6069 vL << c;
6070 }
6071
6072 return ExtractElementsByEnumeration (&hL,&vL);
6073 } else {
6074 if (nn->hDim > 0 && nn->vDim == 1) { // extract by row
6075 _SimpleList hL;
6076
6077 for (unsigned long r=0UL; r<nn->hDim; r++) {
6078 long v = floor((*nn)(r,0L));
6079 if (v>=0L && v<hDim) {
6080 hL<<v;
6081 }
6082 }
6083
6084 if (hL.lLength) {
6085 _Matrix * result = new _Matrix (hL.lLength,vDim,false,true);
6086 unsigned long k = 0UL;
6087 for (unsigned long r=0UL; r<hL.lLength; r++) {
6088 unsigned long ri = hL.list_data[r];
6089 for (unsigned long c=0UL; c<vDim; c++,k++) {
6090 result->theData[k] = (*this)(ri,c);
6091 }
6092 }
6093 return result;
6094 }
6095
6096 return new _Matrix;
6097 } else if (nn->vDim > 0 && nn->hDim == 1) { // extract by column
6098 _SimpleList hL;
6099
6100 for (long r=0; r<nn->vDim; r++) {
6101 long v = (*nn)(0,r);
6102 if (v>=0 && v<vDim) {
6103 hL<<v;
6104 }
6105 }
6106
6107 if (hL.lLength) {
6108 _Matrix * result = new _Matrix (hDim,hL.lLength,false,true);
6109 long k = 0;
6110 for (long c=0; c<hDim; c++)
6111 for (long r=0; r<hL.lLength; r++,k++) {
6112 result->theData[k] = (*this)(c,hL.list_data[r]);
6113 }
6114 return result;
6115 }
6116
6117 return new _Matrix;
6118 }
6119 }
6120 }
6121 ReportWarning ("Incorrect dimensions or matrix type (must be numeric) for an indexing matrix in call to []");
6122 } else {
6123 if (p2->ObjectClass() == MATRIX) {
6124 _Matrix * nn = (_Matrix*)((_Matrix*)p)->ComputeNumeric();
6125 _Matrix * nn2 = (_Matrix*)((_Matrix*)p2)->ComputeNumeric();
6126
6127 if (nn->hDim == 1 && nn->vDim == 2 && nn->storageType == 1 && nn2->hDim == 1 && nn2->vDim == 2 && nn2->storageType == 1) {
6128 long left = (*nn)(0,0),
6129 top = (*nn)(0,1),
6130 bottom = (*nn2)(0,1),
6131 right = (*nn2)(0,0);
6132
6133 if (left >= 0 && left < hDim && right >= 0 && right < hDim && left <=right &&
6134 top >= 0 && top < vDim && bottom >=0 && bottom < vDim && top <= bottom) {
6135 _SimpleList hL,
6136 vL;
6137
6138 for (long r=left; r<=right; r++)
6139 for (long c=top; c<=bottom; c++) {
6140 hL << r;
6141 vL << c;
6142 }
6143
6144 _Matrix * subM = ExtractElementsByEnumeration (&hL,&vL);
6145 subM->hDim = right-left+1;
6146 subM->vDim = bottom-top+1;
6147
6148 return subM;
6149 }
6150 }
6151 ReportWarning ("Incorrect dimensions or matrix type (must be numeric 2x1 matrices) for an rectangular extract in call to []");
6152 }
6153
6154 }
6155 return new _Constant (0.0);
6156 } else {
6157 if (p->ObjectClass() == STRING) {
6158 _String aFormulaString (((_FString*)p)->get_str());
6159 _Formula f (aFormulaString, currentExecutionList ? currentExecutionList->nameSpacePrefix : nil);
6160
6161 if (!f.IsEmpty()) {
6162 /* check formula validity */
6163
6164
6165 _Variable * cv = CheckReceptacle(&hy_env::matrix_element_value, kEmptyString, false),
6166 * cr = CheckReceptacle(&hy_env::matrix_element_row, kEmptyString, false),
6167 * cc = CheckReceptacle(&hy_env::matrix_element_column, kEmptyString, false);
6168
6169 cv->CheckAndSet (0.0, false, NULL);
6170 cr->CheckAndSet (0.0, false, NULL);
6171 cc->CheckAndSet (0.0, false, NULL);
6172
6173 f.Compute();
6174 if (terminate_execution) {
6175 return new _Matrix ();
6176 } else {
6177
6178 _Formula * conditionalCheck = nil;
6179
6180 if (p2 && p2->ObjectClass() == STRING) {
6181 conditionalCheck = new _Formula (((_FString*)p2)->get_str(), currentExecutionList ? currentExecutionList->nameSpacePrefix : nil);
6182 if (conditionalCheck->IsEmpty()) {
6183 delete conditionalCheck;
6184 conditionalCheck = nil;
6185 }
6186
6187 conditionalCheck->Compute();
6188 if (terminate_execution) {
6189 delete conditionalCheck;
6190 return new _Matrix ();
6191 }
6192 }
6193
6194 _Matrix * retMatrix = new _Matrix (hDim,vDim,false,true);
6195
6196 long stackDepth = 0;
6197 _SimpleList vIndexAux;
6198 _AVLList vIndex (&vIndexAux);
6199
6200 if (f.AmISimple (stackDepth,vIndex) && (!conditionalCheck || conditionalCheck->AmISimple(stackDepth,vIndex))) {
6201 _SimpleFormulaDatum * stack = new _SimpleFormulaDatum [stackDepth+1],
6202 * varValues = new _SimpleFormulaDatum [vIndex.countitems()];
6203
6204 bool constantValue = false;
6205 hyFloat constantV = f.Compute()->Value();
6206
6207 if (f.IsConstant()) {
6208 constantValue = true;
6209 constantV = f.Compute()->Value();
6210 } else {
6211 f.ConvertToSimple (vIndex);
6212 }
6213
6214
6215 if (conditionalCheck) {
6216 conditionalCheck->ConvertToSimple(vIndex);
6217 }
6218
6219 if (constantValue && !conditionalCheck) {
6220 for (long r=0; r<hDim; r++)
6221 for (long c=0; c<vDim; c++) {
6222 retMatrix->Store (r,c,constantV);
6223 }
6224 } else {
6225
6226 long rid []= {cr->get_index(),cc->get_index(),cv->get_index()};
6227
6228 for (long k=0; k<3; k++) {
6229 rid[k] = vIndexAux.Find(rid[k]);
6230 }
6231
6232 PopulateArraysForASimpleFormula(vIndexAux, varValues);
6233
6234 for (long r=0; r<hDim; r++) {
6235
6236 if (rid[0]>=0) {
6237 varValues[rid[0]].value = r;
6238 }
6239
6240 for (long c=0; c<vDim; c++) {
6241 if (rid[1]>=0) {
6242 varValues[rid[1]].value = c;
6243 }
6244
6245 if (rid[2]>=0) {
6246 varValues[rid[2]].value = (*this)(r,c);
6247 }
6248
6249 if (conditionalCheck && CheckEqual(conditionalCheck->ComputeSimple(stack,varValues),0.0)) {
6250 if (rid[2]>=0) {
6251 retMatrix->Store (r,c,varValues[rid[2]].value);
6252 } else {
6253 retMatrix->Store (r,c, (*this)(r,c));
6254 }
6255 continue;
6256 }
6257
6258 if (constantValue) {
6259 retMatrix->Store (r,c,constantV);
6260 } else {
6261 //printf ("Formula eval (stack depth= %d) (%d, %g, %g) %g\n", stackDepth, rid[2], varValues[rid[2]], f.ComputeSimple(stack,varValues));
6262
6263 retMatrix->Store (r,c,f.ComputeSimple(stack,varValues));
6264 }
6265 }
6266 }
6267
6268 f.ConvertFromSimple (vIndex);
6269 }
6270 if (conditionalCheck) {
6271 conditionalCheck->ConvertFromSimple(vIndex);
6272 }
6273
6274 delete [] stack;
6275 delete [] varValues;
6276 } else {
6277 for (long r=0; r<hDim; r++) {
6278 cr->CheckAndSet (r,false, NULL);
6279 for (long c=0; c<vDim; c++) {
6280 cc->CheckAndSet (c,false, NULL);
6281 cv->CheckAndSet ((*this)(r,c),false, NULL);
6282 HBLObjectRef fv;
6283
6284 if (conditionalCheck) {
6285 fv = conditionalCheck->Compute();
6286 if (fv->ObjectClass() == NUMBER)
6287 if (CheckEqual (fv->Value(), 0.0)) {
6288 retMatrix->Store (r,c,cv->Value());
6289 continue;
6290 }
6291 }
6292
6293 fv = f.Compute();
6294 if (fv->ObjectClass()==NUMBER) {
6295 retMatrix->Store (r,c,fv->Value());
6296 }
6297 }
6298 }
6299 }
6300 retMatrix->AmISparse();
6301 if (conditionalCheck) {
6302 delete conditionalCheck;
6303 }
6304 return retMatrix;
6305 }
6306 }
6307 ReportWarning (_String("Invalid formula expression for element-wise matrix operations: ") & ((_FString*)p)->get_str());
6308 return new _Matrix;
6309 }
6310 }
6311
6312 long ind1 = p->Value(),
6313 ind2 = -1;
6314
6315 if (p2) {
6316 ind2 = p2->Value();
6317 // handle the row/column access operations here i.e. [R][-1] or [-1][R]
6318
6319 if (ind1 == -1 && ind2 >=0 && ind2 <vDim) { // valid column access
6320 _SimpleList hL (hDim,0,1),
6321 vL (hDim,ind2,0);
6322 return ExtractElementsByEnumeration (&hL,&vL,true);
6323 }
6324
6325 if (ind2 == -1 && ind1 >=0 && ind1 <hDim) { // valid row access
6326 _SimpleList hL (vDim,ind1,0),
6327 vL (vDim,0,1);
6328 return ExtractElementsByEnumeration (&hL,&vL);
6329 }
6330 }
6331
6332 if (hDim == 1) {
6333 if (ind2<0) {
6334 ind2 = ind1;
6335 }
6336 ind1=0;
6337 }
6338
6339 if (vDim == 1) {
6340 ind2 = 0;
6341 }
6342
6343 if (ind2<0) { // allow direct vectorlike indexing, i.e m[21] = m[3][3] (if the dim is *x6)
6344 ind2 = ind1%vDim;
6345 ind1 /=vDim;
6346 }
6347
6348 if (ind1<0 || ind1>=hDim || ind2>=vDim) {
6349 MatrixIndexError (ind1,ind2,hDim,vDim);
6350 return _returnConstantOrUseCache (0., cache);
6351 }
6352
6353 if (ind2>=0) { // element access
6354 return GetMatrixCell (ind1, ind2, cache);
6355 }
6356
6357 return _returnConstantOrUseCache (0., cache);
6358 }
6359
6360 //_____________________________________________________________________________________________
GetMatrixCell(long ind1,long ind2,HBLObjectRef cache) const6361 HBLObjectRef _Matrix::GetMatrixCell (long ind1, long ind2, HBLObjectRef cache) const {
6362 if (is_expression_based()) { // formulas
6363 if (!theIndex) {
6364 _Formula * entryFla = (((_Formula**)theData)[ind1*vDim+ind2]);
6365 if (entryFla) {
6366 return (HBLObjectRef)entryFla->Compute()->makeDynamic();
6367 } else {
6368 return _returnConstantOrUseCache (0., cache);
6369 }
6370 } else {
6371 long p = Hash (ind1, ind2);
6372 if (p<0) {
6373 return _returnConstantOrUseCache (0., cache);
6374 } else {
6375 return (HBLObjectRef)(((_Formula**)theData)[p])->Compute()->makeDynamic();
6376 }
6377 }
6378 } else {
6379 if (is_numeric()) {
6380 if (theIndex) {
6381 return _returnConstantOrUseCache ((*this)(ind1,ind2), cache);
6382 } else {
6383 return _returnConstantOrUseCache (theData[ind1*vDim+ind2], cache);
6384 }
6385
6386 } else {
6387 _MathObject* cell;
6388 if (!theIndex) {
6389 cell = (_MathObject*)GetMatrixObject (ind1*vDim+ind2)->makeDynamic();
6390 } else {
6391 long p = Hash (ind1, ind2);
6392 if (p<0) {
6393 cell = new _Constant (0.0);
6394 } else {
6395 cell = (_MathObject*)GetMatrixObject (p)->makeDynamic();
6396 }
6397 }
6398 return cell;
6399 }
6400 }
6401 }
6402
6403
6404 //_____________________________________________________________________________________________
GetFormula(long ind1,long ind2) const6405 _Formula* _Matrix::GetFormula (long ind1, long ind2) const {
6406
6407 if (hDim == 1) {
6408 if (ind2<0) {
6409 ind2 = ind1;
6410 }
6411 ind1=0;
6412 }
6413
6414 if (vDim == 1) {
6415 ind2 = 0;
6416 }
6417
6418 if (ind2<0) {
6419 ind2 = ind1%vDim;
6420 ind1/=vDim;
6421 }
6422
6423 if ( ind1<0 || ind1>=hDim || ind2>=vDim) {
6424 MatrixIndexError (ind1,ind2,hDim,vDim);
6425 return nil;
6426 }
6427
6428
6429 if (ind2>=0) { // element access
6430 if (storageType == 2) { // formulas
6431 if (!theIndex) {
6432 return (((_Formula**)theData)[ind1*vDim+ind2]);
6433 } else {
6434 long p = Hash (ind1, ind2);
6435 if (p<0) {
6436 return nil;
6437 } else {
6438 return (((_Formula**)theData)[p]);
6439 }
6440 }
6441 }
6442 }
6443
6444 return nil;
6445 }
6446
6447 //_____________________________________________________________________________________________
MCoord(HBLObjectRef p,HBLObjectRef p2,HBLObjectRef cachedResult)6448 HBLObjectRef _Matrix::MCoord (HBLObjectRef p, HBLObjectRef p2, HBLObjectRef cachedResult) {
6449 long ind1 = -1L,
6450 ind2 = -1L;
6451
6452 if (!p) {
6453 HandleApplicationError ( kErrorStringInvalidMatrixIndex );
6454 return new _MathObject;
6455 }
6456
6457 ind1 = p->Value();
6458 if (p2) {
6459 ind2 = p2->Value();
6460 }
6461
6462
6463 if (hDim == 1L) {
6464 if (ind2<0L) {
6465 ind2 = ind1;
6466 }
6467 ind1=0L;
6468 }
6469
6470 if (vDim == 1L) {
6471 ind2 = 0L;
6472 }
6473
6474 if (ind2<0L) { // allow direct vectorlike indexing, i.e m[21] = m[3][3] (if the dim is *x6)
6475 ind2 = ind1%vDim;
6476 ind1 = ind1/vDim;
6477 }
6478 _Matrix * res = nil;
6479 if (cachedResult && cachedResult->ObjectClass() == MATRIX) {
6480 res = (_Matrix*)cachedResult;
6481 if (!(res->is_numeric() && res->check_dimension(1,2))){
6482 res = nil;
6483 }
6484 }
6485 if (!res)
6486 res = new _Matrix (1L,2L,false,true);
6487
6488 res->theData[0]=ind1;
6489 res->theData[1]=ind2;
6490 return res;
6491
6492 }
6493
6494 //_____________________________________________________________________________________________
MResolve(HBLObjectRef p,HBLObjectRef p2,long & ind1,long & ind2)6495 bool _Matrix::MResolve (HBLObjectRef p, HBLObjectRef p2, long& ind1, long& ind2)
6496 {
6497 ind1 = -1;
6498 ind2 = -1;
6499
6500 if (!p) {
6501 HandleApplicationError ( kErrorStringInvalidMatrixIndex );
6502 return false;
6503 }
6504
6505 ind1 = p->Value();
6506 if (p2) {
6507 ind2 = p2->Value();
6508 }
6509
6510 return CheckCoordinates (ind1,ind2);
6511 }
6512
6513 //_____________________________________________________________________________________________
6514
CheckCoordinates(long & ind1,long & ind2)6515 bool _Matrix::CheckCoordinates (long& ind1, long& ind2)
6516 {
6517 if (hDim == 1) {
6518 if (ind2<0) {
6519 ind2 = ind1;
6520 }
6521 ind1=0;
6522 }
6523
6524 if (vDim == 1) {
6525 ind2 = 0;
6526 }
6527
6528 if (ind2<0) { // allow direct vectorlike indexing, i.e m[21] = m[3][3] (if the dim is *x6)
6529 if (vDim > 1) {
6530 ind2 = ind1%vDim;
6531 ind1/= vDim;
6532 } else {
6533 ind2 = 0;
6534 }
6535 }
6536
6537 if (ind1<0 || ind1>=hDim || ind2>=vDim) {
6538 MatrixIndexError (ind1,ind2, hDim, vDim);
6539 return false;
6540 }
6541 return true;
6542 }
6543
6544
6545 //_____________________________________________________________________________________________
MStore(long ind1,long ind2,_Formula & f,long opCode)6546 void _Matrix::MStore (long ind1, long ind2, _Formula& f, long opCode) {
6547 if (ind2>=0) { // element storage
6548 if (is_expression_based()) { // formulas
6549 if (opCode == HY_OP_CODE_ADD) {
6550 _Formula * addOn = GetFormula(ind1,ind2);
6551 if (addOn) {
6552 StoreFormula (ind1,ind2,*_Formula::PatchFormulasTogether(*addOn, f, HY_OP_CODE_ADD),false);
6553 return;
6554 }
6555 }
6556 StoreFormula (ind1,ind2,f);
6557 } else {
6558 if (!f.IsAConstant()) {
6559 Convert2Formulas();
6560 StoreFormula (ind1,ind2,f);
6561 } else {
6562 HBLObjectRef res = f.Compute();
6563 hyFloat toStore = res->Value();
6564 if (opCode == HY_OP_CODE_ADD) {
6565 toStore += (*this)(ind1,ind2);
6566 }
6567 Store(ind1,ind2,toStore);
6568 }
6569 }
6570 }
6571 }
6572
6573 //_____________________________________________________________________________________________
MStore(long ind1,long ind2,HBLObjectRef value,long opCode)6574 void _Matrix::MStore (long ind1, long ind2, HBLObjectRef value, long opCode) {
6575 if (ind2>=0) { // element storage
6576 if (is_expression_based()) { // formulas
6577 value->AddAReference();
6578 if (opCode == HY_OP_CODE_ADD) {
6579 _Formula * addOn = GetFormula(ind1,ind2);
6580 if (addOn) {
6581 StoreFormula (ind1,ind2,*_Formula::PatchFormulasTogether(*addOn, value, HY_OP_CODE_ADD),false);
6582 return;
6583 }
6584 }
6585 _Formula * f = new _Formula (value);
6586 StoreFormula (ind1,ind2,*f,false);
6587 } else {
6588 if (value->ObjectClass() != NUMBER) {
6589 Convert2Formulas();
6590 value->AddAReference();
6591 _Formula * f = new _Formula (value);
6592 StoreFormula (ind1,ind2,*f,false);
6593 } else {
6594 hyFloat toStore = value->Value();
6595 if (opCode == HY_OP_CODE_ADD) {
6596 toStore += (*this)(ind1,ind2);
6597 }
6598 Store(ind1,ind2,toStore);
6599 }
6600 }
6601 }
6602 }
6603
6604 //_____________________________________________________________________________________________
MStore(HBLObjectRef p,HBLObjectRef p2,_Formula & f,long opCode)6605 void _Matrix::MStore (HBLObjectRef p, HBLObjectRef p2, _Formula& f, long opCode)
6606 {
6607 long ind1, ind2;
6608 if (MResolve (p,p2, ind1,ind2)) {
6609 MStore (ind1,ind2,f, opCode);
6610 }
6611 }
6612
6613 //_____________________________________________________________________________________________
MStore(HBLObjectRef p,HBLObjectRef p2,HBLObjectRef poly)6614 void _Matrix::MStore (HBLObjectRef p, HBLObjectRef p2, HBLObjectRef poly)
6615 {
6616 long ind1, ind2;
6617 if (MResolve (p,p2, ind1,ind2)) {
6618 MStore (ind1,ind2,poly);
6619 }
6620
6621 }
6622 //_____________________________________________________________________________________________
MStore(long ind1,long ind2,HBLObjectRef poly)6623 void _Matrix::MStore (long ind1, long ind2, HBLObjectRef poly) {
6624 if (ind2>=0) { // element storage
6625 if (storageType == 0) { // formulas
6626 StoreObject (ind1,ind2,poly,true);
6627 if (AUTO_PAD_DIAGONAL) {
6628 UpdateDiag (ind1,ind2,poly);
6629 }
6630 } else {
6631 _Polynomial* pp = (_Polynomial*)poly;
6632 poly = pp->IsANumber();
6633 if (!poly) { // just a number
6634 storageType==1?ConvertNumbers2Poly():ConvertFormulas2Poly();
6635 StoreObject (ind1,ind2,pp,true);
6636 } else {
6637 (*this)[Hash(ind1,ind2)] = poly->Value();
6638 }
6639 }
6640 }
6641 }
6642
6643
6644 //_____________________________________________________________________________________________
operator [](long i)6645 hyFloat& _Matrix::operator [] (long i) {
6646 if (is_dense()) {
6647 return theData [i];
6648 }
6649
6650 unsigned long r = (unsigned long)i / vDim,
6651 c = i - vDim * r;
6652
6653 long lIndex = Hash (r, c);
6654 if (lIndex == -1) {
6655 IncreaseStorage();
6656 lIndex = Hash (r, c);
6657 }
6658 if (lIndex<0) {
6659 theIndex[-lIndex-2] = i;
6660 return ((hyFloat*)theData)[-lIndex-2];
6661 } else {
6662 return ((hyFloat*)theData)[lIndex];
6663 }
6664 }
6665
6666 //_____________________________________________________________________________________________
Store(long i,long j,hyFloat value)6667 void _Matrix::Store (long i, long j, hyFloat value) {
6668 if (storageType!=1) {
6669 return;
6670 }
6671
6672 long lIndex;
6673
6674 if (theIndex) {
6675 lIndex = Hash (i, j);
6676
6677 if (lIndex == -1) {
6678 IncreaseStorage();
6679 lIndex = Hash (i, j);
6680 }
6681 } else {
6682 lIndex = i*vDim + j;
6683 }
6684
6685 if (lIndex<0) {
6686 theIndex[-lIndex-2] = i*vDim+j;
6687 ((hyFloat*)theData)[-lIndex-2] = value;
6688 } else {
6689 ((hyFloat*)theData)[lIndex] = value;
6690 }
6691
6692 }
6693
6694 //_____________________________________________________________________________________________
StoreObject(long i,long j,_MathObject * value,bool dup)6695 void _Matrix::StoreObject (long i, long j, _MathObject* value, bool dup) {
6696 if (storageType) {
6697 return;
6698 }
6699
6700 long lIndex = Hash (i, j);
6701 if (lIndex == -1) {
6702 IncreaseStorage();
6703 lIndex = Hash (i, j);
6704 }
6705
6706 if (dup) {
6707 value = (_MathObject*) value->makeDynamic();
6708 }
6709 if (lIndex<0) {
6710 theIndex[-lIndex-2] = i*vDim+j;
6711 ((_MathObject**)theData)[-lIndex-2] = value;
6712 } else {
6713 DeleteObject (GetMatrixObject(lIndex));
6714 ((_MathObject**)theData)[lIndex] = value;
6715 }
6716 if (AUTO_PAD_DIAGONAL) { // correct the diagonal entry
6717 }
6718
6719 }
6720 //_____________________________________________________________________________________________
6721
UpdateDiag(long i,long j,_MathObject * value)6722 void _Matrix::UpdateDiag (long i,long j, _MathObject* value)
6723 {
6724 if (i!=j) {
6725 _MathObject * diagCell = nil, *newCell;
6726 if (!theIndex) {
6727 diagCell = GetMatrixObject(i*hDim+i);
6728 } else {
6729 long lIndex = Hash (i,i);
6730 if (lIndex>=0) {
6731 diagCell = GetMatrixObject(lIndex);
6732 }
6733 }
6734 if (!diagCell) {
6735 newCell = value->Minus();
6736 } else {
6737 newCell = diagCell->Sub(value);
6738 }
6739 StoreObject(i,i,newCell,false);
6740 }
6741 }
6742 //_____________________________________________________________________________________________
StoreObject(long k,_MathObject * value,bool dup)6743 void _Matrix::StoreObject (long k, _MathObject* value, bool dup)
6744 {
6745 StoreObject (k/vDim, k%vDim, value, dup);
6746 }
6747
6748 //_____________________________________________________________________________________________
StoreFormula(long i,long j,_Formula & f,bool copyF,bool simplify)6749 void _Matrix::StoreFormula (long i, long j, _Formula& f, bool copyF, bool simplify) {
6750
6751 auto do_simplify = [] (_Formula *f) -> void {
6752 _Polynomial* is_poly = (_Polynomial*)f->ConstructPolynomial();
6753 if (is_poly) {
6754 _Formula pf (is_poly);
6755 f->Duplicate(&pf);
6756 }
6757 f->SimplifyConstants();
6758 };
6759
6760 if (is_expression_based()) {
6761 long lIndex = Hash (i, j);
6762 if (lIndex == -1) {
6763 IncreaseStorage();
6764 lIndex = Hash (i, j);
6765 }
6766
6767 //printf ("In (%d) %s\n",simplify, _String ((_String*)f.toStr (kFormulaStringConversionNormal)).get_str());
6768
6769 if (lIndex<0) {
6770 theIndex[-lIndex-2] = i*vDim+j;
6771 ((_Formula**)theData)[-lIndex-2] = copyF?(_Formula*)f.makeDynamic():&f;
6772 if (simplify) {
6773 do_simplify (((_Formula**)theData)[-lIndex-2]);
6774 //((_Formula**)theData)[-lIndex-2]->SimplifyConstants();
6775 }
6776 } else {
6777 if (((_Formula**)theData)[lIndex]!=(_Formula*)ZEROPOINTER) {
6778 delete ((_Formula**)theData)[lIndex];
6779 }
6780 ((_Formula**)theData)[lIndex] = copyF?(_Formula*)f.makeDynamic():&f;
6781 if (simplify) {
6782 do_simplify (((_Formula**)theData)[lIndex]);
6783 //((_Formula**)theData)[lIndex]->SimplifyConstants();
6784 }
6785 }
6786 //printf ("Stored %s\n", _String ((_String*)(((_Formula**)theData)[lIndex >= 0 ? lIndex : -lIndex-2])->toStr (kFormulaStringConversionNormal)).get_str());
6787
6788
6789 CheckIfSparseEnough();
6790 }
6791 }
6792
6793 //_____________________________________________________________________________________________
6794
6795
Swap(_Matrix & m)6796 void _Matrix::Swap (_Matrix& m){
6797 Exchange(theData,m.theData);
6798 Exchange(hDim,m.hDim);
6799 Exchange(vDim,m.vDim);
6800 Exchange(lDim,m.lDim);
6801 Exchange(theIndex,m.theIndex);
6802 Exchange(storageType,m.storageType);
6803 Exchange(bufferPerRow,m.bufferPerRow);
6804 Exchange(overflowBuffer,m.overflowBuffer);
6805 Exchange(allocationBlock,m.allocationBlock);
6806 Exchange(theValue,m.theValue);
6807 Exchange(cmd,m.cmd);
6808 }
6809
6810 //_____________________________________________________________________________________________
6811
AplusBx(_Matrix & B,hyFloat x)6812 void _Matrix::AplusBx (_Matrix& B, hyFloat x)
6813 {
6814 _Matrix temp (B);
6815 temp *= x;
6816 *this+=temp;
6817 }
6818
6819 //#define _SLKP_USE_SSE_INTRINSICS
6820
6821 //_____________________________________________________________________________________________
Sqr(hyFloat * _hprestrict_ stash)6822 hyFloat _Matrix::Sqr (hyFloat* _hprestrict_ stash) {
6823
6824 hyFloat diff = 0.;
6825 if (hDim != vDim) {
6826 return diff;
6827 }
6828 // not a square matrix
6829
6830 if (! (is_dense() && is_numeric()) ) {
6831 // sparse or non-numeric matrix
6832 _Matrix temp (hDim, vDim, storageType==0?theIndex!=nil:false, storageType);
6833 Multiply (temp, *this);
6834 Swap(temp);
6835 return DBL_EPSILON * 1.e4;
6836 } else {
6837 if (hDim==4) {
6838 // special case for nucleotides
6839 for (unsigned long i=0UL, k = 0UL; i<16; i+=4) {
6840 for (unsigned long j=0UL; j<4UL; j++, k++) {
6841 hyFloat p1 = theData[i] * theData [j];
6842 hyFloat p2 = theData[i+1] * theData [j+4];
6843 p1 += theData[i+2] * theData [j+8];
6844 p2 += theData[i+3] * theData [j+12];
6845 stash[k] = p1+p2;
6846 }
6847 }
6848 } else {
6849 long loopBound = (vDim >> 2) << 2;
6850 //vDim - vDim % 4;
6851 // loop interchange rocks!
6852
6853 #ifdef _SLKP_USE_ARM_NEON
6854 #define DO_GROUP_OP0(X,Y,k) Y = vld1q_f64(theData + col_offset + k); X = vmulq_f64(A4, Y);
6855 #define DO_GROUP_OP1(X,Y,k) X = vld1q_f64(dest + row_offset + k); Y = vld1q_f64(theData + col_offset + k); X = vfmaq_f64 (X,A4,Y);
6856 #define DO_GROUP_OP2(X,k) vst1q_f64 (dest + row_offset + k,X);
6857
6858 if (true) {
6859 hyFloat * _hprestrict_ dest = stash;
6860
6861 long ti = 0L,
6862 row_offset = 0L;
6863
6864 if (loopBound == 60UL) {
6865 if (hDim == 61) { // special case for universal code
6866
6867 for (long r = 0; r < 61; r++, row_offset += 61) {
6868 long col_offset = 0L;
6869 { // row 1
6870 float64x2_t A4 = vdupq_n_f64 (theData[ti]);
6871 float64x2_t X1, X2, X3, X4,
6872 Y1, Y2, Y3, Y4;
6873
6874 DO_GROUP_OP0 (X1, Y1, 0); DO_GROUP_OP0 (X2, Y2, 2); DO_GROUP_OP0 (X3, Y3, 4); DO_GROUP_OP0 (X4, Y4, 6);
6875 DO_GROUP_OP2 (X1,0); DO_GROUP_OP2 (X2,2); DO_GROUP_OP2 (X3,4); DO_GROUP_OP2 (X4,6);
6876 DO_GROUP_OP0 (X1, Y1, 8); DO_GROUP_OP0 (X2, Y2, 10); DO_GROUP_OP0 (X3, Y3, 12); DO_GROUP_OP0 (X4, Y4, 14);
6877 DO_GROUP_OP2 (X1,8); DO_GROUP_OP2 (X2,10); DO_GROUP_OP2 (X3,12); DO_GROUP_OP2 (X4,14);
6878 DO_GROUP_OP0 (X1, Y1, 16); DO_GROUP_OP0 (X2, Y2, 18); DO_GROUP_OP0 (X3, Y3, 20); DO_GROUP_OP0 (X4, Y4, 22);
6879 DO_GROUP_OP2 (X1,16); DO_GROUP_OP2 (X2,18); DO_GROUP_OP2 (X3,20); DO_GROUP_OP2 (X4,22);
6880 DO_GROUP_OP0 (X1, Y1, 24); DO_GROUP_OP0 (X2, Y2, 26); DO_GROUP_OP0 (X3, Y3, 28); DO_GROUP_OP0 (X4, Y4, 30);
6881 DO_GROUP_OP2 (X1,24); DO_GROUP_OP2 (X2,26); DO_GROUP_OP2 (X3,28); DO_GROUP_OP2 (X4,30);
6882 DO_GROUP_OP0 (X1, Y1, 32); DO_GROUP_OP0 (X2, Y2, 34); DO_GROUP_OP0 (X3, Y3, 36); DO_GROUP_OP0 (X4, Y4, 38);
6883 DO_GROUP_OP2 (X1,32); DO_GROUP_OP2 (X2,34); DO_GROUP_OP2 (X3,36); DO_GROUP_OP2 (X4,38);
6884 DO_GROUP_OP0 (X1, Y1, 40); DO_GROUP_OP0 (X2, Y2, 42); DO_GROUP_OP0 (X3, Y3, 44); DO_GROUP_OP0 (X4, Y4, 46);
6885 DO_GROUP_OP2 (X1,40); DO_GROUP_OP2 (X2,42); DO_GROUP_OP2 (X3,44); DO_GROUP_OP2 (X4,46);
6886 DO_GROUP_OP0 (X1, Y1, 48); DO_GROUP_OP0 (X2, Y2, 50); DO_GROUP_OP0 (X3, Y3, 52); DO_GROUP_OP0 (X4, Y4, 54);
6887 DO_GROUP_OP2 (X1,48); DO_GROUP_OP2 (X2,50); DO_GROUP_OP2 (X3,52); DO_GROUP_OP2 (X4,54);
6888 DO_GROUP_OP0 (X1, Y1, 56); DO_GROUP_OP0 (X2, Y2, 58);
6889 DO_GROUP_OP2 (X1,56); DO_GROUP_OP2 (X2,58);
6890
6891
6892 dest[row_offset + 60] = theData[ti] * theData[60];
6893 col_offset = 61;
6894 ti++;
6895 }
6896
6897 for (long c = 1; c < 61; c++, ti++, col_offset += 61) {
6898 float64x2_t A4 = vdupq_n_f64 (theData[ti]);
6899 float64x2_t X1, X2, X3, X4,
6900 Y1, Y2, Y3, Y4;
6901
6902 DO_GROUP_OP1 (X1, Y1, 0); DO_GROUP_OP1 (X2, Y2, 2); DO_GROUP_OP1 (X3, Y3, 4); DO_GROUP_OP1 (X4, Y4, 6);
6903 DO_GROUP_OP2 (X1,0); DO_GROUP_OP2 (X2,2); DO_GROUP_OP2 (X3,4); DO_GROUP_OP2 (X4,6);
6904 DO_GROUP_OP1 (X1, Y1, 8); DO_GROUP_OP1 (X2, Y2, 10); DO_GROUP_OP1 (X3, Y3, 12); DO_GROUP_OP1 (X4, Y4, 14);
6905 DO_GROUP_OP2 (X1,8); DO_GROUP_OP2 (X2,10); DO_GROUP_OP2 (X3,12); DO_GROUP_OP2 (X4,14);
6906 DO_GROUP_OP1 (X1, Y1, 16); DO_GROUP_OP1 (X2, Y2, 18); DO_GROUP_OP1 (X3, Y3, 20); DO_GROUP_OP1 (X4, Y4, 22);
6907 DO_GROUP_OP2 (X1,16); DO_GROUP_OP2 (X2,18); DO_GROUP_OP2 (X3,20); DO_GROUP_OP2 (X4,22);
6908 DO_GROUP_OP1 (X1, Y1, 24); DO_GROUP_OP1 (X2, Y2, 26); DO_GROUP_OP1 (X3, Y3, 28); DO_GROUP_OP1 (X4, Y4, 30);
6909 DO_GROUP_OP2 (X1,24); DO_GROUP_OP2 (X2,26); DO_GROUP_OP2 (X3,28); DO_GROUP_OP2 (X4,30);
6910 DO_GROUP_OP1 (X1, Y1, 32); DO_GROUP_OP1 (X2, Y2, 34); DO_GROUP_OP1 (X3, Y3, 36); DO_GROUP_OP1 (X4, Y4, 38);
6911 DO_GROUP_OP2 (X1,32); DO_GROUP_OP2 (X2,34); DO_GROUP_OP2 (X3,36); DO_GROUP_OP2 (X4,38);
6912 DO_GROUP_OP1 (X1, Y1, 40); DO_GROUP_OP1 (X2, Y2, 42); DO_GROUP_OP1 (X3, Y3, 44); DO_GROUP_OP1 (X4, Y4, 46);
6913 DO_GROUP_OP2 (X1,40); DO_GROUP_OP2 (X2,42); DO_GROUP_OP2 (X3,44); DO_GROUP_OP2 (X4,46);
6914 DO_GROUP_OP1 (X1, Y1, 48); DO_GROUP_OP1 (X2, Y2, 50); DO_GROUP_OP1 (X3, Y3, 52); DO_GROUP_OP1 (X4, Y4, 54);
6915 DO_GROUP_OP2 (X1,48); DO_GROUP_OP2 (X2,50); DO_GROUP_OP2 (X3,52); DO_GROUP_OP2 (X4,54);
6916 DO_GROUP_OP1 (X1, Y1, 56); DO_GROUP_OP1 (X2, Y2, 58);
6917 DO_GROUP_OP2 (X1,56); DO_GROUP_OP2 (X2,58);
6918
6919 dest[row_offset + 60] += theData[ti] * theData[col_offset + 60];
6920 }
6921 }
6922 } else {
6923 for (long r = 0; r < hDim; r++, row_offset += vDim) {
6924 long col_offset = 0L;
6925 { // row 1
6926 float64x2_t A4 = vdupq_n_f64 (theData[ti]);
6927 float64x2_t X1, X2, X3, X4,
6928 Y1, Y2, Y3, Y4;
6929
6930 DO_GROUP_OP0 (X1, Y1, 0); DO_GROUP_OP0 (X2, Y2, 2); DO_GROUP_OP0 (X3, Y3, 4); DO_GROUP_OP0 (X4, Y4, 6);
6931 DO_GROUP_OP2 (X1,0); DO_GROUP_OP2 (X2,2); DO_GROUP_OP2 (X3,4); DO_GROUP_OP2 (X4,6);
6932 DO_GROUP_OP0 (X1, Y1, 8); DO_GROUP_OP0 (X2, Y2, 10); DO_GROUP_OP0 (X3, Y3, 12); DO_GROUP_OP0 (X4, Y4, 14);
6933 DO_GROUP_OP2 (X1,8); DO_GROUP_OP2 (X2,10); DO_GROUP_OP2 (X3,12); DO_GROUP_OP2 (X4,14);
6934 DO_GROUP_OP0 (X1, Y1, 16); DO_GROUP_OP0 (X2, Y2, 18); DO_GROUP_OP0 (X3, Y3, 20); DO_GROUP_OP0 (X4, Y4, 22);
6935 DO_GROUP_OP2 (X1,16); DO_GROUP_OP2 (X2,18); DO_GROUP_OP2 (X3,20); DO_GROUP_OP2 (X4,22);
6936 DO_GROUP_OP0 (X1, Y1, 24); DO_GROUP_OP0 (X2, Y2, 26); DO_GROUP_OP0 (X3, Y3, 28); DO_GROUP_OP0 (X4, Y4, 30);
6937 DO_GROUP_OP2 (X1,24); DO_GROUP_OP2 (X2,26); DO_GROUP_OP2 (X3,28); DO_GROUP_OP2 (X4,30);
6938 DO_GROUP_OP0 (X1, Y1, 32); DO_GROUP_OP0 (X2, Y2, 34); DO_GROUP_OP0 (X3, Y3, 36); DO_GROUP_OP0 (X4, Y4, 38);
6939 DO_GROUP_OP2 (X1,32); DO_GROUP_OP2 (X2,34); DO_GROUP_OP2 (X3,36); DO_GROUP_OP2 (X4,38);
6940 DO_GROUP_OP0 (X1, Y1, 40); DO_GROUP_OP0 (X2, Y2, 42); DO_GROUP_OP0 (X3, Y3, 44); DO_GROUP_OP0 (X4, Y4, 46);
6941 DO_GROUP_OP2 (X1,40); DO_GROUP_OP2 (X2,42); DO_GROUP_OP2 (X3,44); DO_GROUP_OP2 (X4,46);
6942 DO_GROUP_OP0 (X1, Y1, 48); DO_GROUP_OP0 (X2, Y2, 50); DO_GROUP_OP0 (X3, Y3, 52); DO_GROUP_OP0 (X4, Y4, 54);
6943 DO_GROUP_OP2 (X1,48); DO_GROUP_OP2 (X2,50); DO_GROUP_OP2 (X3,52); DO_GROUP_OP2 (X4,54);
6944 DO_GROUP_OP0 (X1, Y1, 56); DO_GROUP_OP0 (X2, Y2, 58);
6945 DO_GROUP_OP2 (X1,56); DO_GROUP_OP2 (X2,58);
6946
6947
6948 for (long k = 60; k < vDim; k++) {
6949 dest[row_offset + k] = theData[ti] * theData[k];
6950 }
6951 col_offset = vDim;
6952 ti++;
6953 }
6954
6955 for (long c = 1; c < hDim; c++, ti++, col_offset += vDim) {
6956 float64x2_t A4 = vdupq_n_f64 (theData[ti]);
6957 float64x2_t X1, X2, X3, X4,
6958 Y1, Y2, Y3, Y4;
6959
6960 DO_GROUP_OP1 (X1, Y1, 0); DO_GROUP_OP1 (X2, Y2, 2); DO_GROUP_OP1 (X3, Y3, 4); DO_GROUP_OP1 (X4, Y4, 6);
6961 DO_GROUP_OP2 (X1,0); DO_GROUP_OP2 (X2,2); DO_GROUP_OP2 (X3,4); DO_GROUP_OP2 (X4,6);
6962 DO_GROUP_OP1 (X1, Y1, 8); DO_GROUP_OP1 (X2, Y2, 10); DO_GROUP_OP1 (X3, Y3, 12); DO_GROUP_OP1 (X4, Y4, 14);
6963 DO_GROUP_OP2 (X1,8); DO_GROUP_OP2 (X2,10); DO_GROUP_OP2 (X3,12); DO_GROUP_OP2 (X4,14);
6964 DO_GROUP_OP1 (X1, Y1, 16); DO_GROUP_OP1 (X2, Y2, 18); DO_GROUP_OP1 (X3, Y3, 20); DO_GROUP_OP1 (X4, Y4, 22);
6965 DO_GROUP_OP2 (X1,16); DO_GROUP_OP2 (X2,18); DO_GROUP_OP2 (X3,20); DO_GROUP_OP2 (X4,22);
6966 DO_GROUP_OP1 (X1, Y1, 24); DO_GROUP_OP1 (X2, Y2, 26); DO_GROUP_OP1 (X3, Y3, 28); DO_GROUP_OP1 (X4, Y4, 30);
6967 DO_GROUP_OP2 (X1,24); DO_GROUP_OP2 (X2,26); DO_GROUP_OP2 (X3,28); DO_GROUP_OP2 (X4,30);
6968 DO_GROUP_OP1 (X1, Y1, 32); DO_GROUP_OP1 (X2, Y2, 34); DO_GROUP_OP1 (X3, Y3, 36); DO_GROUP_OP1 (X4, Y4, 38);
6969 DO_GROUP_OP2 (X1,32); DO_GROUP_OP2 (X2,34); DO_GROUP_OP2 (X3,36); DO_GROUP_OP2 (X4,38);
6970 DO_GROUP_OP1 (X1, Y1, 40); DO_GROUP_OP1 (X2, Y2, 42); DO_GROUP_OP1 (X3, Y3, 44); DO_GROUP_OP1 (X4, Y4, 46);
6971 DO_GROUP_OP2 (X1,40); DO_GROUP_OP2 (X2,42); DO_GROUP_OP2 (X3,44); DO_GROUP_OP2 (X4,46);
6972 DO_GROUP_OP1 (X1, Y1, 48); DO_GROUP_OP1 (X2, Y2, 50); DO_GROUP_OP1 (X3, Y3, 52); DO_GROUP_OP1 (X4, Y4, 54);
6973 DO_GROUP_OP2 (X1,48); DO_GROUP_OP2 (X2,50); DO_GROUP_OP2 (X3,52); DO_GROUP_OP2 (X4,54);
6974 DO_GROUP_OP1 (X1, Y1, 56); DO_GROUP_OP1 (X2, Y2, 58);
6975 DO_GROUP_OP2 (X1,56); DO_GROUP_OP2 (X2,58);
6976
6977 for (long k = 60; k < vDim; k++) {
6978 dest[row_offset + k] += theData[ti] * theData[col_offset + k];
6979 }
6980 }
6981 }
6982 }
6983 } else {
6984 for (long r = 0; r < hDim; r++, row_offset += vDim) {
6985 long col_offset = 0L;
6986 { // row 1
6987 float64x2_t A4 = vdupq_n_f64 (theData[ti]);
6988 #pragma GCC unroll 4
6989 #pragma clang loop vectorize(enable)
6990 #pragma clang loop interleave(enable)
6991 #pragma clang loop unroll(enable)
6992 #pragma GCC ivdep
6993 #pragma ivdep
6994 for (long k = 0; k < loopBound; k+=4) {
6995 float64x2_t D4, B4, X1, X2;
6996 DO_GROUP_OP0 (D4, B4, k);
6997 DO_GROUP_OP0 (X1, X2, k+2);
6998 DO_GROUP_OP2 (D4,k);
6999 DO_GROUP_OP2 (X1,k+2);
7000 }
7001
7002 for (long k = loopBound; k < vDim; k++) {
7003 dest[row_offset + k] = theData[ti] * theData[k];
7004 }
7005 col_offset = vDim;
7006 ti++;
7007 }
7008
7009 for (long c = 1; c < hDim; c++, ti++, col_offset += vDim) {
7010 float64x2_t A4 = vdupq_n_f64 (theData[ti]);
7011 for (long k = 0; k < loopBound; k+=4) {
7012 float64x2_t D4, B4, X1, X2;
7013 DO_GROUP_OP1 (D4, B4, k);
7014 DO_GROUP_OP1 (X1, X2, k+2);
7015 DO_GROUP_OP2 (D4,k);
7016 DO_GROUP_OP2 (X1,k+2);
7017 }
7018
7019 for (long k = loopBound; k < vDim; k++) {
7020 dest[row_offset + k] += theData[ti] * theData[col_offset + k];
7021 }
7022 }
7023 }
7024 }
7025 } else {
7026
7027 #endif
7028
7029 #ifdef _SLKP_USE_AVX_INTRINSICS
7030 #define DO_GROUP_OP0(X,Y,k) Y = _mm256_loadu_pd(theData + col_offset + k); X = _mm256_mul_pd(A4, Y);
7031 #ifdef _SLKP_USE_FMA3_INTRINSICS
7032 #define DO_GROUP_OP1(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k); Y = _mm256_loadu_pd(theData + col_offset + k); X = _mm256_fmadd_pd (A4,Y,X);
7033 #define DO_GROUP_OP2(X,k) _mm256_storeu_pd (dest + row_offset + k,X);
7034 #else
7035 #define DO_GROUP_OP1(X,Y,k) X = _mm256_loadu_pd(dest + row_offset + k); Y = _mm256_loadu_pd(theData + col_offset + k); X = _mm256_add_pd (X, _mm256_mul_pd(A4, Y));
7036 #define DO_GROUP_OP2(X,k) _mm256_storeu_pd (dest + row_offset + k,X);
7037 #endif
7038 if (true) {
7039 hyFloat * _hprestrict_ dest = stash;
7040
7041 long ti = 0L,
7042 row_offset = 0L;
7043
7044 if (loopBound == 60UL) { // codons
7045 if (hDim == 61) { // special case universal genetic code
7046 for (long r = 0; r < 61; r++, row_offset += 61) {
7047 long col_offset = 0L;
7048
7049 { // handle first row separately to zero out the dest entries
7050 __m256d A4 = _mm256_set1_pd(theData[ti]);
7051 __m256d D4_1, D4_2, D4_3, D4_4;
7052 __m256d B4_1, B4_2, B4_3, B4_4;
7053
7054 DO_GROUP_OP0 (D4_1,B4_1,0);
7055 DO_GROUP_OP0 (D4_2,B4_2,4);
7056 DO_GROUP_OP0 (D4_3,B4_3,8);
7057 DO_GROUP_OP0 (D4_4,B4_4,12);
7058 DO_GROUP_OP2 (D4_1,0);
7059 DO_GROUP_OP2 (D4_2,4);
7060 DO_GROUP_OP2 (D4_3,8);
7061 DO_GROUP_OP2 (D4_4,12);
7062
7063 DO_GROUP_OP0 (D4_1,B4_1,16);
7064 DO_GROUP_OP0 (D4_2,B4_2,20);
7065 DO_GROUP_OP0 (D4_3,B4_3,24);
7066 DO_GROUP_OP0 (D4_4,B4_4,28);
7067 DO_GROUP_OP2 (D4_1,16);
7068 DO_GROUP_OP2 (D4_2,20);
7069 DO_GROUP_OP2 (D4_3,24);
7070 DO_GROUP_OP2 (D4_4,28);
7071
7072 DO_GROUP_OP0 (D4_1,B4_1,32);
7073 DO_GROUP_OP0 (D4_2,B4_2,36);
7074 DO_GROUP_OP0 (D4_3,B4_3,40);
7075 DO_GROUP_OP0 (D4_4,B4_4,44);
7076 DO_GROUP_OP2 (D4_1,32);
7077 DO_GROUP_OP2 (D4_2,36);
7078 DO_GROUP_OP2 (D4_3,40);
7079 DO_GROUP_OP2 (D4_4,44);
7080
7081 DO_GROUP_OP0 (D4_1,B4_1,48);
7082 DO_GROUP_OP0 (D4_2,B4_2,52);
7083 DO_GROUP_OP0 (D4_3,B4_3,56);
7084 DO_GROUP_OP2 (D4_1,48);
7085 DO_GROUP_OP2 (D4_2,52);
7086 DO_GROUP_OP2 (D4_3,56);
7087
7088 dest[row_offset + 60] = theData[ti] * theData[60];
7089 ti++;
7090 col_offset = vDim;
7091 }
7092 for (long c = 1; c < 61; c++, ti++, col_offset += 61) {
7093 __m256d A4 = _mm256_set1_pd(theData[ti]);
7094
7095 __m256d D4_1, D4_2, D4_3, D4_4;
7096 __m256d B4_1, B4_2, B4_3, B4_4;
7097
7098 DO_GROUP_OP1 (D4_1,B4_1,0);
7099 DO_GROUP_OP1 (D4_2,B4_2,4);
7100 DO_GROUP_OP1 (D4_3,B4_3,8);
7101 DO_GROUP_OP1 (D4_4,B4_4,12);
7102 DO_GROUP_OP2 (D4_1,0);
7103 DO_GROUP_OP2 (D4_2,4);
7104 DO_GROUP_OP2 (D4_3,8);
7105 DO_GROUP_OP2 (D4_4,12);
7106
7107 DO_GROUP_OP1 (D4_1,B4_1,16);
7108 DO_GROUP_OP1 (D4_2,B4_2,20);
7109 DO_GROUP_OP1 (D4_3,B4_3,24);
7110 DO_GROUP_OP1 (D4_4,B4_4,28);
7111 DO_GROUP_OP2 (D4_1,16);
7112 DO_GROUP_OP2 (D4_2,20);
7113 DO_GROUP_OP2 (D4_3,24);
7114 DO_GROUP_OP2 (D4_4,28);
7115
7116 DO_GROUP_OP1 (D4_1,B4_1,32);
7117 DO_GROUP_OP1 (D4_2,B4_2,36);
7118 DO_GROUP_OP1 (D4_3,B4_3,40);
7119 DO_GROUP_OP1 (D4_4,B4_4,44);
7120 DO_GROUP_OP2 (D4_1,32);
7121 DO_GROUP_OP2 (D4_2,36);
7122 DO_GROUP_OP2 (D4_3,40);
7123 DO_GROUP_OP2 (D4_4,44);
7124
7125 DO_GROUP_OP1 (D4_1,B4_1,48);
7126 DO_GROUP_OP1 (D4_2,B4_2,52);
7127 DO_GROUP_OP1 (D4_3,B4_3,56);
7128 DO_GROUP_OP2 (D4_1,48);
7129 DO_GROUP_OP2 (D4_2,52);
7130 DO_GROUP_OP2 (D4_3,56);
7131
7132 dest[row_offset + 60] += theData[ti] * theData[col_offset + 60];
7133
7134 }
7135 }
7136
7137 } else {
7138 for (long r = 0; r < hDim; r++, row_offset += vDim) {
7139 long col_offset = 0L;
7140
7141 { // handle first row separately to zero out the dest entries
7142 __m256d A4 = _mm256_set1_pd(theData[ti]);
7143 __m256d D4_1, D4_2, D4_3, D4_4;
7144 __m256d B4_1, B4_2, B4_3, B4_4;
7145
7146 DO_GROUP_OP0 (D4_1,B4_1,0);
7147 DO_GROUP_OP0 (D4_2,B4_2,4);
7148 DO_GROUP_OP0 (D4_3,B4_3,8);
7149 DO_GROUP_OP0 (D4_4,B4_4,12);
7150 DO_GROUP_OP2 (D4_1,0);
7151 DO_GROUP_OP2 (D4_2,4);
7152 DO_GROUP_OP2 (D4_3,8);
7153 DO_GROUP_OP2 (D4_4,12);
7154
7155 DO_GROUP_OP0 (D4_1,B4_1,16);
7156 DO_GROUP_OP0 (D4_2,B4_2,20);
7157 DO_GROUP_OP0 (D4_3,B4_3,24);
7158 DO_GROUP_OP0 (D4_4,B4_4,28);
7159 DO_GROUP_OP2 (D4_1,16);
7160 DO_GROUP_OP2 (D4_2,20);
7161 DO_GROUP_OP2 (D4_3,24);
7162 DO_GROUP_OP2 (D4_4,28);
7163
7164 DO_GROUP_OP0 (D4_1,B4_1,32);
7165 DO_GROUP_OP0 (D4_2,B4_2,36);
7166 DO_GROUP_OP0 (D4_3,B4_3,40);
7167 DO_GROUP_OP0 (D4_4,B4_4,44);
7168 DO_GROUP_OP2 (D4_1,32);
7169 DO_GROUP_OP2 (D4_2,36);
7170 DO_GROUP_OP2 (D4_3,40);
7171 DO_GROUP_OP2 (D4_4,44);
7172
7173 DO_GROUP_OP0 (D4_1,B4_1,48);
7174 DO_GROUP_OP0 (D4_2,B4_2,52);
7175 DO_GROUP_OP0 (D4_3,B4_3,56);
7176 DO_GROUP_OP2 (D4_1,48);
7177 DO_GROUP_OP2 (D4_2,52);
7178 DO_GROUP_OP2 (D4_3,56);
7179
7180 for (long k = loopBound; k < vDim; k++) {
7181 dest[row_offset + k] = theData[ti] * theData[k];
7182 }
7183 ti++;
7184 col_offset = vDim;
7185 }
7186 for (long c = 1; c < hDim; c++, ti++, col_offset += vDim) {
7187 __m256d A4 = _mm256_set1_pd(theData[ti]);
7188
7189 __m256d D4_1, D4_2, D4_3, D4_4;
7190 __m256d B4_1, B4_2, B4_3, B4_4;
7191
7192 DO_GROUP_OP1 (D4_1,B4_1,0);
7193 DO_GROUP_OP1 (D4_2,B4_2,4);
7194 DO_GROUP_OP1 (D4_3,B4_3,8);
7195 DO_GROUP_OP1 (D4_4,B4_4,12);
7196 DO_GROUP_OP2 (D4_1,0);
7197 DO_GROUP_OP2 (D4_2,4);
7198 DO_GROUP_OP2 (D4_3,8);
7199 DO_GROUP_OP2 (D4_4,12);
7200
7201 DO_GROUP_OP1 (D4_1,B4_1,16);
7202 DO_GROUP_OP1 (D4_2,B4_2,20);
7203 DO_GROUP_OP1 (D4_3,B4_3,24);
7204 DO_GROUP_OP1 (D4_4,B4_4,28);
7205 DO_GROUP_OP2 (D4_1,16);
7206 DO_GROUP_OP2 (D4_2,20);
7207 DO_GROUP_OP2 (D4_3,24);
7208 DO_GROUP_OP2 (D4_4,28);
7209
7210 DO_GROUP_OP1 (D4_1,B4_1,32);
7211 DO_GROUP_OP1 (D4_2,B4_2,36);
7212 DO_GROUP_OP1 (D4_3,B4_3,40);
7213 DO_GROUP_OP1 (D4_4,B4_4,44);
7214 DO_GROUP_OP2 (D4_1,32);
7215 DO_GROUP_OP2 (D4_2,36);
7216 DO_GROUP_OP2 (D4_3,40);
7217 DO_GROUP_OP2 (D4_4,44);
7218
7219 DO_GROUP_OP1 (D4_1,B4_1,48);
7220 DO_GROUP_OP1 (D4_2,B4_2,52);
7221 DO_GROUP_OP1 (D4_3,B4_3,56);
7222 DO_GROUP_OP2 (D4_1,48);
7223 DO_GROUP_OP2 (D4_2,52);
7224 DO_GROUP_OP2 (D4_3,56);
7225
7226 for (long k = loopBound; k < vDim; k++) {
7227 dest[row_offset + k] += theData[ti] * theData[col_offset + k];
7228 }
7229 }
7230 }
7231 }
7232 } else { // something else
7233 for (long r = 0; r < hDim; r++, row_offset += vDim) {
7234 long col_offset = 0L;
7235
7236 {
7237 __m256d A4 = _mm256_set1_pd(theData[ti]);
7238 for (long k = 0; k < loopBound; k+=4) {
7239 __m256d D4, B4;
7240 DO_GROUP_OP0 (D4, B4, k);
7241 DO_GROUP_OP2 (D4,k);
7242 }
7243
7244 for (long k = loopBound; k < vDim; k++) {
7245 dest[row_offset + k] = theData[ti] * theData[k];
7246 }
7247 col_offset = vDim;
7248 ti++;
7249 }
7250
7251 for (long c = 1; c < hDim; c++, ti++, col_offset += vDim) {
7252 __m256d A4 = _mm256_set1_pd(theData[ti]);
7253 for (long k = 0; k < loopBound; k+=4) {
7254 __m256d D4, B4;
7255 DO_GROUP_OP1 (D4, B4, k);
7256 DO_GROUP_OP2 (D4,k);
7257 }
7258
7259 for (long k = loopBound; k < vDim; k++) {
7260 dest[row_offset + k] += theData[ti] * theData[col_offset + k];
7261 }
7262 }
7263 }
7264 }
7265 } else {
7266
7267 #endif
7268
7269
7270 hyFloat * _hprestrict_ column = stash+lDim;
7271 hyFloat const * source = theData;
7272
7273 for (long j = 0; j < vDim; j++) {
7274 for (long c = 0; c < vDim; c++) {
7275 column[c] = source[j + c * vDim];
7276 }
7277
7278 #ifdef _SLKP_USE_AVX_INTRINSICS
7279 if (vDim == 61UL) {
7280 for (unsigned long i = 0; i < lDim; i += 61) {
7281 hyFloat * _hprestrict_ row = theData + i;
7282
7283
7284 __m256d sum256;
7285
7286 #ifdef _SLKP_USE_FMA3_INTRINSICS
7287
7288
7289 sum256 = _mm256_fmadd_pd (_mm256_loadu_pd (row), _mm256_loadu_pd (column),
7290 _mm256_fmadd_pd (_mm256_loadu_pd (row+4), _mm256_loadu_pd (column+4),
7291 _mm256_mul_pd (_mm256_loadu_pd (row+8), _mm256_loadu_pd (column+8))));
7292
7293 for (unsigned long k = 12UL; k < 60UL; k += 12UL) {
7294 sum256 = _mm256_fmadd_pd (_mm256_loadu_pd (row+k), _mm256_loadu_pd (column+k),
7295 _mm256_fmadd_pd (_mm256_loadu_pd (row+k+4), _mm256_loadu_pd (column+k+4),
7296 _mm256_fmadd_pd (_mm256_loadu_pd (row+k+8), _mm256_loadu_pd (column+k+8), sum256))
7297 );
7298 }
7299 #else
7300
7301 sum256 = _mm256_setzero_pd();
7302 for (unsigned long k = 0UL; k < 60UL; k += 12UL) {
7303 __m256d term0 = _mm256_mul_pd (_mm256_loadu_pd (row+k), _mm256_loadu_pd (column+k));
7304 __m256d term1 = _mm256_mul_pd (_mm256_loadu_pd (row+k+4), _mm256_loadu_pd (column+k+4));
7305 __m256d term2 = _mm256_mul_pd (_mm256_loadu_pd (row+k+8), _mm256_loadu_pd (column+k+8));
7306
7307 __m256d sum01 = _mm256_add_pd(term0,term1);
7308 __m256d plus2 = _mm256_add_pd(term2, sum256);
7309
7310 sum256 = _mm256_add_pd (sum01, plus2);
7311 }
7312 #endif
7313
7314 stash[i+j] = _avx_sum_4(sum256) + row[60] * column [60];
7315
7316 }
7317
7318 } else {
7319 for (unsigned long i = 0; i < lDim; i += vDim) {
7320 hyFloat * row = theData + i;
7321
7322 __m256d sum256 = _mm256_setzero_pd();
7323
7324 long k;
7325
7326 for (k = 0; k < loopBound; k += 4) {
7327 #ifdef _SLKP_USE_FMA3_INTRINSICS
7328 sum256 = _mm256_fmadd_pd (_mm256_loadu_pd (row+k), _mm256_loadu_pd (column+k), sum256);
7329 #else
7330 sum256 = _mm256_add_pd (_mm256_mul_pd (_mm256_loadu_pd (row+k), _mm256_loadu_pd (column+k)), sum256);
7331 #endif
7332 }
7333
7334 hyFloat result = _avx_sum_4(sum256);
7335
7336 for (; k < vDim; k++) {
7337 result += row[k] * column [k];
7338 }
7339
7340 stash[i+j] = result;
7341
7342 }
7343 }
7344 #ifdef _SLKP_USE_AVX_INTRINSICS
7345 }
7346 #endif
7347 #else
7348 for (long i = 0; i < lDim; i += vDim) {
7349 hyFloat * row = theData + i,
7350 buffer [4] = {0.,0.,0.,0.};
7351
7352
7353 unsigned long k;
7354
7355 for (k = 0UL; k < loopBound; k += 4UL) {
7356 buffer [0] += row[k] * column [k];
7357 buffer [1] += row[k+1] * column [k+1];
7358 buffer [2] += row[k+2] * column [k+2];
7359 buffer [3] += row[k+3] * column [k+3];
7360 }
7361
7362 for (; k < vDim; k++) {
7363 buffer[0] += row[k] * column [k];
7364 }
7365
7366 stash[i+j] = (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]);
7367 }
7368 #endif
7369 }
7370 }
7371 #ifdef _SLKP_USE_ARM_NEON
7372 }
7373 #endif
7374
7375 long lDimmod4 = (lDim >> 2) << 2;
7376 hyFloat diffs[4] = {0.0,0.0,0.0,0.0};
7377
7378 for (long s = 0; s < lDimmod4; s+=4) {
7379 hyFloat d1 = fabs (theData[s ] - stash[s ]);
7380 hyFloat d2 = fabs (theData[s+1] - stash[s+1]);
7381 hyFloat d3 = fabs (theData[s+2] - stash[s+2]);
7382 hyFloat d4 = fabs (theData[s+3] - stash[s+3]);
7383 if (d1 > diffs[0]) diffs[0] = d1;
7384 if (d2 > diffs[1]) diffs[1] = d2;
7385 if (d3 > diffs[2]) diffs[2] = d3;
7386 if (d4 > diffs[3]) diffs[3] = d4;
7387 }
7388
7389 for (long s = lDimmod4; s < lDim; s++) {
7390 hyFloat d1 = fabs (theData[s] - stash[s]);
7391 if (d1 > diffs[0]) diffs[0] = d1;
7392 }
7393
7394 diff = MAX (MAX (diffs[0], diffs[1]), MAX (diffs[2], diffs[3]));
7395
7396 memcpy (theData, stash, lDim * sizeof (hyFloat));
7397
7398 /*for (long s = 0; s < lDim; s++) {
7399 StoreIfGreater(diff, fabs (theData[s] - stash[s]));
7400 theData[s] = stash[s];
7401 }*/
7402 }
7403 return diff;
7404 }
7405 //_____________________________________________________________________________________________
AgreeObjects(_Matrix & m)7406 void _Matrix::AgreeObjects (_Matrix& m)
7407 {
7408 if (storageType==2) {
7409 if (toPolyOrNot!=0.0) {
7410 ConvertFormulas2Poly ();
7411 } else {
7412 Evaluate(true);
7413 }
7414 }
7415
7416 if (m.storageType==2) {
7417 if (toPolyOrNot!=0.0) {
7418 m.ConvertFormulas2Poly ();
7419 } else {
7420 m.Evaluate(true);
7421 }
7422 }
7423
7424 if (storageType!=m.storageType) {
7425 if (toPolyOrNot) {
7426 if (storageType == 1) {
7427 ConvertNumbers2Poly ();
7428 } else {
7429 m.ConvertNumbers2Poly ();
7430 }
7431 } else {
7432 if (storageType == 1) {
7433 m.Evaluate (true);
7434 } else {
7435 Evaluate ();
7436 }
7437 }
7438 }
7439 }
7440 //_____________________________________________________________________________________________
ConvertFormulas2Poly(bool force2numbers)7441 void _Matrix::ConvertFormulas2Poly (bool force2numbers)
7442 {
7443 bool conversionFlag = true;
7444 _MathObject** tempStorage = (_MathObject**)MatrixMemAllocate(sizeof(void*)*lDim);
7445
7446 long i;
7447
7448 for (i=0; i<lDim; i++) {
7449 tempStorage[i]=ZEROPOINTER;
7450 }
7451
7452
7453 if (theIndex) { // sparse
7454 for (i=0; i<lDim; i++) {
7455 if (IsNonEmpty(i)) {
7456 HBLObjectRef polyCell = ((_Formula**)theData)[i]->ConstructPolynomial();
7457 if (polyCell) { // valid polynomial conversion
7458 tempStorage[i] = (HBLObjectRef)polyCell;
7459 polyCell->AddAReference();
7460 } else {
7461 conversionFlag = false;
7462 break;
7463 }
7464 }
7465 }
7466 if (conversionFlag) {
7467 // check for "*" entries
7468 for (i=0; i<lDim; i++) {
7469 if (IsNonEmpty(i)) {
7470 if (((_Formula**)theData)[i]->IsEmpty()) { // "*" entry
7471 long r = theIndex[i]/vDim, c = theIndex[i]%vDim;
7472 _Polynomial diag;
7473 for (long j=0; j<vDim; j++) {
7474 if (j==c) {
7475 continue;
7476 }
7477 long h = Hash (r,j);
7478 if (h>=0) {
7479 _Polynomial * temp = (_Polynomial *)diag.Sub(tempStorage[h], nil);
7480 diag.Duplicate (temp);
7481 DeleteObject (temp);
7482 }
7483 }
7484 DeleteObject(tempStorage[i]);
7485 tempStorage[i]=(_Polynomial*)diag.makeDynamic();
7486 }
7487 }
7488 }
7489 }
7490 } else {
7491 for (long i=0; i<lDim; i++) {
7492 _Formula* f = ((_Formula**)theData)[i];
7493 if (f->IsEmpty()) {
7494 continue;
7495 }
7496 HBLObjectRef polyCell = f->ConstructPolynomial();
7497 if (polyCell) { // valid polynomial conversion
7498 tempStorage[i] = (HBLObjectRef)polyCell;
7499 polyCell->AddAReference();
7500 } else {
7501 conversionFlag = false;
7502 break;
7503 }
7504 }
7505 if (conversionFlag) {
7506 // check for "*" entries
7507 for (long i=0; i<lDim; i++) {
7508 if (((_Formula**)theData)[i]->IsEmpty()) { // "*" entry
7509 long r = i/vDim;
7510 _Polynomial diag;
7511 for (long j=vDim*r; j<vDim*(r+1); j++) {
7512 if (j==i) {
7513 continue;
7514 }
7515 _Polynomial * temp = (_Polynomial *)diag.Sub(tempStorage[j], nil);
7516 diag.Duplicate (temp);
7517 DeleteObject (temp);
7518 }
7519 DeleteObject(tempStorage[i]);
7520 tempStorage[i]=(_Polynomial*)diag.makeDynamic();
7521 }
7522 }
7523 }
7524 }
7525
7526 if (conversionFlag) { // successful conversion
7527 ClearFormulae();
7528 MatrixMemFree (theData);
7529 theData = (hyFloat*) tempStorage;
7530 storageType = 0;
7531 if (!theIndex) {
7532 _Polynomial zero_polynomial;
7533 for (i=0; i<lDim; i++)
7534 if (!GetMatrixObject (i)) {
7535 StoreObject (i,&zero_polynomial,true);
7536 }
7537 }
7538 } else {
7539 for (long i=0; i<lDim; i++) {
7540 DeleteObject (tempStorage[i]);
7541 }
7542 MatrixMemFree (tempStorage);
7543 if (force2numbers) {
7544 Evaluate(true);
7545 }
7546 }
7547
7548 }
7549
7550 //_____________________________________________________________________________________________
ConvertNumbers2Poly(void)7551 void _Matrix::ConvertNumbers2Poly (void)
7552 {
7553 _MathObject ** tempStorage = (_MathObject**)MatrixMemAllocate (lDim*sizeof (void*));
7554 if (!theIndex) {
7555 for (long i=0; i<lDim; i++) {
7556 tempStorage[i]=new _Polynomial (theData[i]);
7557 }
7558 } else {
7559 for (long i=0; i<lDim; i++)
7560 if (IsNonEmpty (i)) {
7561 tempStorage[i]=new _Polynomial (theData[i]);
7562 } else {
7563 tempStorage[i] = nil;
7564 }
7565 }
7566 MatrixMemFree (theData);
7567 theData = (hyFloat*) tempStorage;
7568 storageType = 0;
7569 }
7570
7571
7572 //_____________________________________________________________________________________________
operator +=(_Matrix & m)7573 void _Matrix::operator += (_Matrix& m)
7574 {
7575 AgreeObjects (m);
7576 if ((!m.theIndex) && theIndex) {
7577 CheckIfSparseEnough(true);
7578 }
7579 AddMatrix (*this,m);
7580 }
7581
7582 //______________________________________________________________
7583
CompareRows(const long row1,const long row2)7584 long _Matrix::CompareRows (const long row1, const long row2) {
7585 for (long column_id = 0; column_id < vDim; column_id ++) {
7586 hyFloat v1 = theData[row1*vDim+column_id],
7587 v2 = theData[row2*vDim+column_id];
7588 if (!CheckEqual (v1,v2)) {
7589 return (v1 < v2)?-1L:1L;
7590 }
7591 }
7592 return 0L;
7593 }
7594
7595 //______________________________________________________________
7596
SwapRows(const long row1,const long row2)7597 void _Matrix::SwapRows (const long row1, const long row2) {
7598 long idx1 = row1*vDim,
7599 idx2 = row2*vDim;
7600 for (long column_id = 0; column_id < vDim; column_id ++) {
7601 hyFloat t = theData[idx1];
7602 theData[idx1++] = theData[idx2];
7603 theData[idx2++] = t;
7604 }
7605 }
7606 //______________________________________________________________
7607
RecursiveIndexSort(long from,long to,_SimpleList * index)7608 void _Matrix::RecursiveIndexSort (long from, long to, _SimpleList* index) {
7609 long middle = (from+to) >> 1,
7610 bottommove = 1L,
7611 topmove = 1L;
7612
7613 /*
7614 Use '+' to denote an element that is gretae than 'M' (the 'middle' element)
7615 and '-' to denote an element than is less than 'M'
7616
7617 Initially we may have something like
7618
7619 --++--+M--+++--++-
7620 and we want to end up with
7621 ---------M+++++++
7622
7623 Initially, we arrange the elements as
7624
7625 ----+++M-----++++++, and then swap 'bottommove' pluses (of which there are 3 in this case)
7626 with 'topmove' minuses (of which there are 5)
7627
7628 */
7629
7630
7631 if (middle)
7632 while (middle-bottommove>=from && CompareRows (middle-bottommove, middle) > 0L) {
7633 bottommove++;
7634 }
7635 if (from<to)
7636 while (middle+topmove<=to && CompareRows (middle+topmove,middle) < 0L) {
7637 topmove++;
7638 }
7639
7640 for (long i=from; i<middle-bottommove; i++)
7641 if (CompareRows (i, middle) >= 0L) {
7642 SwapRows (middle-bottommove, i);
7643 index->Swap(middle-bottommove,i);
7644 bottommove++;
7645
7646 while (middle-bottommove>=from && CompareRows (middle-bottommove, middle) > 0L) {
7647 bottommove++;
7648 }
7649 }
7650
7651 {
7652 for (long i=middle+topmove+1; i<=to; i++)
7653 if (CompareRows(i,middle) <= 0L) {
7654 SwapRows (i, middle+topmove);
7655 index->Swap(i, middle+topmove);
7656
7657 topmove++;
7658 while (middle+topmove<=to && CompareRows (middle+topmove,middle) < 0L) {
7659 topmove++;
7660 }
7661 }
7662 }
7663
7664 if (topmove==bottommove) {
7665 for (long i=1; i<bottommove; i++) {
7666 SwapRows(middle+i, middle-i);
7667 index->Swap (middle+i, middle-i);
7668 }
7669 } else if (topmove>bottommove) {
7670 long shift = topmove-bottommove;
7671 // in the example above, shift = 2
7672
7673 for (long i=1; i<bottommove; i++) {
7674 SwapRows (middle-i, middle+i+shift);
7675 index->Swap(middle-i, middle+i+shift);
7676 }
7677 // at the end of this loop, the example above will look like
7678 // -------M--+++++++++, so now if we swap 'M' with the last '-', we'll arrive at the desired configuration
7679
7680 SwapRows (middle, middle+shift);
7681 index->Swap (middle, middle+shift);
7682 middle+=shift;
7683
7684 } else {
7685 long shift = bottommove-topmove;
7686 for (long i=1; i<topmove; i++) {
7687 SwapRows (middle+i, middle-i-shift);
7688 index->Swap (middle+i, middle-i-shift);
7689 }
7690
7691 SwapRows (middle, middle-shift);
7692 index->Swap (middle, middle-shift);
7693 middle-=shift;
7694 }
7695
7696 if (to>middle+1) {
7697 RecursiveIndexSort (middle+1,to, index);
7698 }
7699 if (from<middle-1) {
7700 RecursiveIndexSort (from,middle-1, index);
7701 }
7702 }
7703
7704 //_____________________________________________________________________________________________
SortMatrixOnColumn(HBLObjectRef mp,HBLObjectRef cache)7705 HBLObjectRef _Matrix::SortMatrixOnColumn (HBLObjectRef mp, HBLObjectRef cache)
7706 {
7707 if (storageType!=1) {
7708 HandleApplicationError ("Only numeric matrices can be sorted");
7709 return new _MathObject();
7710 }
7711
7712 if (theData == nil) {
7713 return new _Matrix (0,0);
7714 }
7715
7716 _SimpleList sortOn;
7717
7718 if (mp->ObjectClass () != NUMBER || mp->Value() < 0.0 || mp->Value () > GetVDim()-1) {
7719 bool goodMe = false;
7720 if (mp->ObjectClass () == MATRIX) {
7721 _Matrix * sortOnM = (_Matrix*)((_Matrix*)mp)->ComputeNumeric();
7722 long sortBy = sortOnM->GetHDim()*sortOnM->GetVDim(),
7723 maxColumnID = GetVDim();
7724
7725 for (long k=0; k<sortBy; k=k+1) {
7726 long idx = (*sortOnM)[k];
7727 if (idx < 0 || idx >= maxColumnID) {
7728 HandleApplicationError (_String("Invalid column index to sort on in call to ") & __PRETTY_FUNCTION__ & " : " & idx);
7729 return new _MathObject();
7730 }
7731 sortOn << idx;
7732 }
7733 goodMe = sortOn.lLength;
7734 }
7735 if (!goodMe) {
7736 HandleApplicationError (_String ("Invalid column index to sort the matrix on:") & _String((_String*)mp->toStr()).Enquote());
7737 return new _MathObject;
7738 }
7739 } else {
7740 sortOn << mp->Value();
7741 }
7742
7743 // TODO SLKP 20111109 -- replace with a generic sort function
7744 // the code below is BROKEN
7745
7746 _SimpleList idx (hDim,0,1);
7747 _Matrix theColumn (hDim,sortOn.lLength,false,true);
7748
7749 for (unsigned long col2Sort = 0; col2Sort < sortOn.lLength; col2Sort++) {
7750 long colIdx = sortOn.list_data[col2Sort];
7751
7752 if (theIndex)
7753 for (long k=0; k<hDim; k++) {
7754 theColumn.theData[col2Sort+k*sortOn.lLength] = (*this)(k, colIdx);
7755 }
7756 else
7757 for (long k=0, j = colIdx; k<hDim; k++, j+=vDim) {
7758 theColumn.theData[col2Sort+k*sortOn.lLength] = theData[j];
7759 }
7760
7761 }
7762
7763 theColumn.RecursiveIndexSort (0, hDim-1, &idx);
7764 /*for (long i = 1; i < hDim; i++) {
7765 if (theColumn.theData[i-1] > theColumn.theData[i]) {
7766 HandleApplicationError("Resulting matrix is not properly sorted");
7767 }
7768 }*/
7769
7770
7771 _Matrix *result = (_Matrix*)_returnMatrixOrUseCache(hDim, vDim, _NUMERICAL_TYPE, theIndex, cache);
7772
7773 if (theIndex) {
7774 _SimpleList revIdx (hDim,0,1);
7775 SortLists (&idx, &revIdx);
7776 for (long r=0; r<lDim; r++) {
7777 long oi = theIndex[r];
7778
7779 if (oi >= 0) {
7780 long v = oi%vDim,
7781 h = oi/vDim,
7782 ni = revIdx.list_data[h]*vDim+v;
7783
7784 (*result)[ni] = theData[r];
7785 }
7786 }
7787 } else
7788 for (long r=0; r<hDim; r++) {
7789 long remapped = idx.list_data[r];
7790 remapped *= vDim;
7791 for (long c=r*vDim; c<r*vDim+vDim; c++, remapped++) {
7792 result->theData[c] = theData[remapped];
7793 }
7794 }
7795
7796
7797 return result;
7798 }
7799
7800 //_____________________________________________________________________________________________
PoissonLL(HBLObjectRef mp,HBLObjectRef cache)7801 HBLObjectRef _Matrix::PoissonLL (HBLObjectRef mp, HBLObjectRef cache)
7802 {
7803 if (!is_numeric()) {
7804 HandleApplicationError ("Only numeric matrices can be passed to Poisson Log-Likelihood");
7805 return new _MathObject;
7806 }
7807
7808 if (mp->ObjectClass () != NUMBER || mp->Value() < 0.0) {
7809 HandleApplicationError (_String ("Invalid Poisson distribution parameter") & (_String((_String*)mp->toStr())).Enquote());
7810 return new _MathObject;
7811 }
7812
7813 hyFloat loglik = 0.0,
7814 *logFactorials = new hyFloat [101],
7815 lambda = mp->Value(),
7816 logLambda = log (lambda),
7817 log2p = log (sqrt(8.*atan(1.)));
7818
7819
7820 logFactorials[0] = 0.;
7821 logFactorials[1] = 0.;
7822
7823 long maxFactorialDone = 1;
7824
7825 for (long idx = 0; idx < lDim; idx++) {
7826 long cellValue = 0;
7827 if (theIndex) {
7828 cellValue = theIndex[idx];
7829 if (cellValue<0) {
7830 continue;
7831 }
7832
7833 cellValue = theData[cellValue];
7834 } else {
7835 cellValue = theData[idx];
7836 }
7837
7838 if (cellValue>=0) {
7839 if (maxFactorialDone>=cellValue) {
7840 loglik += logLambda * cellValue - lambda - logFactorials [cellValue];
7841 } else {
7842 if (cellValue<=100) {
7843 for (long idx2 = maxFactorialDone+1; idx2 <= cellValue; idx2++) {
7844 logFactorials[idx2] = logFactorials[idx2-1]+log((hyFloat)idx2);
7845 }
7846 loglik += logLambda * cellValue - lambda - logFactorials [cellValue];
7847 maxFactorialDone = cellValue;
7848 } else
7849 // use Stirling's formula
7850 {
7851 loglik += logLambda * cellValue - lambda + cellValue - (cellValue+0.5)*log((hyFloat)cellValue)-log2p;
7852 }
7853 }
7854 }
7855 }
7856
7857 delete [] logFactorials;
7858
7859 return _returnConstantOrUseCache(loglik, cache);
7860 }
7861
7862
7863 //_____________________________________________________________________________________________
PathLogLikelihood(HBLObjectRef mp,HBLObjectRef cache)7864 HBLObjectRef _Matrix::PathLogLikelihood (HBLObjectRef mp, HBLObjectRef cache) {
7865 try {
7866 _Matrix *m = nil;
7867
7868 if (! is_numeric() || hDim != 3) {
7869 throw (_String("First argument must be a numeric 3xN matrix"));
7870 } else {
7871 //errMsg = "Second argument in call to < (PathLogLikelihood) must be a square matrix";
7872 if (mp->ObjectClass () == MATRIX) {
7873 m = (_Matrix*)mp->Compute();
7874 if (m->GetHDim() != m->GetVDim()) {
7875 throw (_String("Second argument must be a square matrix"));
7876 }
7877 } else {
7878 throw (_String("Second argument must be a matrix"));
7879 }
7880 }
7881
7882
7883 CheckIfSparseEnough (true);
7884
7885 hyFloat res = 0.0;
7886 long maxDim = m->GetHDim();
7887
7888 for (unsigned long step = 0UL; step < vDim; step++) {
7889
7890 long i1 = get (0,step),
7891 i2 = get (1,step);
7892 hyFloat t = get (2,step);
7893
7894 if (i1<0 || i2 < 0 || i1 >= maxDim || i2 >= maxDim || t<0.0) {
7895 throw (_String ("An invalid transition in step ") & _String ((long)(step+1L)) & " of the chain: " & i1 & " to " & i2 & " in time " & t);
7896 }
7897
7898 _Matrix rateMx (*m);
7899 rateMx *= t;
7900 _Matrix * tMatrix = rateMx.Exponentiate ();
7901 t = tMatrix->theData[maxDim*i1+i2];
7902 DeleteObject (tMatrix);
7903
7904 if (t>0.0) {
7905 res += log (t);
7906 } else {
7907 return _returnConstantOrUseCache(-1.e300, cache);
7908 }
7909 }
7910 return _returnConstantOrUseCache(res, cache);
7911 } catch (const _String& err) {
7912 HandleApplicationError (err);
7913 return new _MathObject;
7914 }
7915
7916 }
7917
7918 //_____________________________________________________________________________________________
pFDR(HBLObjectRef classes,HBLObjectRef cache)7919 HBLObjectRef _Matrix::pFDR (HBLObjectRef classes, HBLObjectRef cache) {
7920 try {
7921 long steps = 20,
7922 iter_count = 500;
7923
7924 hyFloat p_value = 0.0,
7925 max_lambda = 0.0;
7926
7927
7928 if (theIndex) {
7929 CheckIfSparseEnough (true);
7930 }
7931
7932 if (!is_numeric()) {
7933 throw _String("Only numeric matrices can be passed to && (pFDR)");
7934 } else {
7935 if (!(is_column() || is_row()) || is_empty()) {
7936 throw _String("The first argument of && (pFDR) must be an Nx1/1xN matrix.");
7937 } else if (classes->ObjectClass () != NUMBER || classes->Value() > 1. || (p_value = classes->Value()) < 0.0) {
7938 throw _String ("Invalid baseline p-value (must be in (0,1)):") & _String((_String*)classes->toStr());
7939 } else {
7940 for (unsigned long i=0UL; i<lDim; i++) {
7941 hyFloat p_count = theData[i];
7942 if (p_count < 0.0 || p_count > 1.0) {
7943 throw _String ("Invalid p-value entry in matrix passed to pFDR (must be a positive integer):") & p_count;
7944 }
7945 StoreIfGreater(max_lambda, p_count);
7946 }
7947 }
7948 }
7949
7950
7951 _Matrix lamdbaRange (steps,1,false,true),
7952 pFDRs (steps,1,false,true);
7953
7954 hyFloat anLamdba = 0.0,
7955 minPFDR = 5.0,
7956 uberPFDR = 0.0,
7957 uberPFDRUpperLimit = 0.0,
7958 minMSE = 1.e100,
7959 aStep = 1.0/steps;
7960
7961
7962 unsigned long k = 0;
7963 while (anLamdba<1.0) {
7964 lamdbaRange.theData[k] = anLamdba;
7965
7966 if ((pFDRs.theData[k] = computePFDR (anLamdba, p_value))<minPFDR) {
7967 minPFDR = pFDRs.theData[k];
7968 }
7969
7970 k++;
7971 anLamdba += aStep;
7972 }
7973
7974 for (unsigned long k=0UL; k<steps; k++) {
7975 hyFloat mse = 0.0;
7976 _Matrix ITpDFR (iter_count,1,false,true);
7977
7978 for (unsigned long it = 0; it < iter_count; it++) {
7979 _Matrix sampledPs (lDim,1,false,true);
7980 _SimpleList sample (lDim,0,1);
7981 sample.PermuteWithReplacement (1);
7982
7983 for (long el = 0; el < lDim; el++) {
7984 sampledPs.theData[el] = theData[sample.list_data[el]];
7985 }
7986
7987 ITpDFR.theData[it] = sampledPs.computePFDR (lamdbaRange.theData[k], p_value);
7988 mse += (ITpDFR.theData[it]-minPFDR)*(ITpDFR.theData[it]-minPFDR);
7989 }
7990
7991 mse /= iter_count;
7992
7993 if (mse < minMSE) {
7994 minMSE = mse;
7995 uberPFDR = pFDRs.theData[k];
7996 _Constant zer (0.0);
7997 _Matrix* sorted = (_Matrix*)ITpDFR.SortMatrixOnColumn (&zer, nil);
7998 uberPFDRUpperLimit = sorted->theData[((long)(0.95*iter_count))];
7999 DeleteObject (sorted);
8000 }
8001 }
8002
8003 _Matrix * resMx = (_Matrix *) _returnMatrixOrUseCache (2,1,_NUMERICAL_TYPE,false, cache);
8004 resMx->theData[0] = uberPFDR;
8005 resMx->theData[1] = uberPFDRUpperLimit;
8006 return resMx;
8007 } catch (const _String& err) {
8008 HandleApplicationError (err);
8009 return new _MathObject;
8010 }
8011 }
8012
8013 //_____________________________________________________________________________________________
computePFDR(hyFloat lambda,hyFloat gamma)8014 hyFloat _Matrix::computePFDR (hyFloat lambda, hyFloat gamma)
8015 // assumes a non-sparse row/column matrix
8016 {
8017 long rejected = 0,
8018 null = 0;
8019
8020 for (long idx = 0; idx < lDim; idx++) {
8021 if (theData[idx] <= gamma) {
8022 rejected++;
8023 }
8024 if (theData[idx] > lambda) {
8025 null++;
8026 }
8027 }
8028
8029 if (null) {
8030 hyFloat pi_0 = null/(lDim*(1.-lambda)),
8031 pr_p = 0;
8032
8033 if (rejected) {
8034 pr_p = rejected/(hyFloat)lDim;
8035 } else {
8036 pr_p = 1./(hyFloat)lDim;
8037 }
8038
8039 return pi_0 * gamma / (pr_p /** (1.-exp(log(1.-gamma)*lDim))*/);
8040
8041 } else {
8042 return 1;
8043 }
8044 }
8045
8046 //_____________________________________________________________________________________________
8047
Random(HBLObjectRef kind,HBLObjectRef cache)8048 HBLObjectRef _Matrix::Random (HBLObjectRef kind, HBLObjectRef cache) {
8049
8050 try {
8051 long columns = GetVDim(),
8052 rows = GetHDim();
8053
8054 if (kind->ObjectClass() == NUMBER) {
8055 bool resample = (kind->Compute()->Value()>0);
8056
8057 _SimpleList remapped (columns,0,1);
8058
8059 if (resample) {
8060 remapped.PermuteWithReplacement(1);
8061 } else {
8062 remapped.Permute(1);
8063 }
8064
8065
8066 if (is_numeric()) { // numeric matrix
8067 _Matrix * res = (_Matrix *)_returnMatrixOrUseCache(rows, columns,_NUMERICAL_TYPE, theIndex != nil,cache);
8068
8069 if (is_dense())
8070 for (unsigned long vv = 0; vv<lDim; vv+=columns)
8071 for (unsigned long k2=0; k2<remapped.lLength; k2++) {
8072 res->theData[vv+k2] = theData[vv+remapped.list_data[k2]];
8073 }
8074 else {
8075 for (unsigned long vv = 0; vv< rows; vv++)
8076 for (unsigned long k=0; k<remapped.lLength; k++) {
8077 long ki = remapped.list_data[k];
8078 if ((ki = Hash (vv,ki)) >= 0) {
8079 res->Store (vv,k,theData[ki]);
8080 }
8081 }
8082 }
8083 return res;
8084 } else { // formula matrix
8085 if (is_expression_based()) {
8086 _Matrix * res = new _Matrix (rows, columns,theIndex != nil,false);
8087
8088 for (unsigned long vv = 0UL; vv< rows; vv++)
8089 for (unsigned long k=0UL; k<remapped.lLength; k++) {
8090 _Formula * ff = GetFormula (vv,remapped.get (k));
8091 if (ff) {
8092 res->StoreFormula (vv, k, *ff);
8093 }
8094 }
8095 return res;
8096 }
8097 }
8098 }
8099
8100 else if (kind->ObjectClass() == ASSOCIATIVE_LIST) {
8101 //ReportWarning (_String("_Matrix::Random() with associative list as first argument."));
8102
8103 // Associative list should contain following arguments:
8104 // "PDF" - string corresponding to p.d.f. ("Gamma", "Normal")
8105 // "ARG0" ... "ARGn" - whatever parameter arguments (matrices) are required for the p.d.f.
8106
8107 _AssociativeList * pdfArgs = (_AssociativeList *)kind;
8108 _List * keys = pdfArgs->GetKeys();
8109 _String pdfkey ("PDF"),
8110 * arg0 = (_String *)pdfArgs->GetByKey(pdfkey,STRING);
8111 DeleteObject (keys);
8112
8113
8114 if (arg0) {
8115 _String pdf ((_String*)arg0->toStr()),
8116 arg ("ARG0");
8117
8118 long pdfCode = _HY_MatrixRandomValidPDFs.GetValueFromString (pdf);
8119
8120 switch (pdfCode) {
8121 case _HY_MATRIX_RANDOM_DIRICHLET:
8122 return (_Matrix *) DirichletDeviate();
8123 case _HY_MATRIX_RANDOM_GAUSSIAN:
8124 return (_Matrix *) GaussianDeviate (*(_Matrix *) pdfArgs->GetByKey (arg, MATRIX));
8125 case _HY_MATRIX_RANDOM_WISHART:
8126 return (_Matrix *) WishartDeviate (*(_Matrix *) pdfArgs->GetByKey (arg, MATRIX));
8127 case _HY_MATRIX_RANDOM_INVERSE_WISHART:
8128 return (_Matrix *) InverseWishartDeviate (*(_Matrix *) pdfArgs->GetByKey (arg, MATRIX));
8129 case _HY_MATRIX_RANDOM_MULTINOMIAL:
8130 return (_Matrix *) MultinomialSample ((_Constant *) pdfArgs->GetByKey (arg, NUMBER));
8131 default:
8132 throw _String("String argument passed to Random not a supported PDF: ") & pdf.Enquote();
8133 }
8134 } else {
8135 throw _String("Expecting 'PDF' key in associative list argument passed to Random(), received: ") & *arg0;
8136 }
8137
8138 } else if (kind->ObjectClass () == STRING) {
8139 _String key = ((_FString*)kind->Compute())->get_str();
8140 if (key == _String("LHS")) {
8141 // latin hypercube sampling: samples are in ROWS
8142 _Matrix * lhc = new _Matrix ( rows, columns, false, true);
8143
8144 _SimpleList permutation ( rows ,0,1);
8145
8146 for (unsigned long c = 0; c < columns; c++) {
8147 permutation.Permute (1);
8148 for (long r = 0; r < rows ; r++) {
8149 lhc->set(r,c) = get (permutation.get(r),c);
8150 }
8151 }
8152
8153 return lhc;
8154 }
8155 throw _String ("Invalid string argument passed to matrix Random :") & key;
8156 } else {
8157 throw _String ("Invalid argument passes to matrix Random (should be a number, an associative list or a string):") & _String((_String*)kind->toStr());
8158 }
8159 } catch (_String const& err) {
8160 HandleApplicationError (err);
8161 }
8162 return new _Matrix (1,1);
8163 }
8164
8165 //_____________________________________________________________________________________________
K_Means(HBLObjectRef classes,HBLObjectRef cache)8166 HBLObjectRef _Matrix::K_Means (HBLObjectRef classes, HBLObjectRef cache) {
8167 // K-means clustering on scalar data
8168 /*
8169
8170 this : Nx2 matrix {{value1, count1}{value2, count 2}...}}
8171 classes : 2x1 matrix {{cluster count}{number of random restarts}}
8172
8173 reutn : 2 x Max (cluster count, 2) {{cluster mean 1, cluster mean 2, ... , cluster mean N}{total L^2 error, how many restarts hit the min}}
8174
8175 */
8176
8177 // TODO: 20171026 SLKP revised. check correctness
8178
8179 try {
8180 _Matrix * arg;
8181 long cluster_count,
8182 iter_count,
8183 sample_count = 0L;
8184
8185 if (theIndex) {
8186 CheckIfSparseEnough (true);
8187 }
8188
8189 if (!is_numeric()) {
8190 throw _String("Only numeric matrices can be passed to <= (K-means)");
8191 } else {
8192 if (GetVDim () != 2) {
8193 throw _String("The first argument of <= (K-means) must be an Nx2 matrix, with samples in the first column, and counts in the 2nd.");
8194 } else if (classes->ObjectClass () != MATRIX) {
8195 throw _String ("Invalid number of clusters is call to K-means (must be >=1):") & _String((_String*)classes->toStr());
8196 } else {
8197 arg = (_Matrix*)classes->Compute();
8198 if (!arg->check_dimension (1,2) || (cluster_count=arg->theData[0]) < 1 || (iter_count = arg->theData[1]) < 1) {
8199 throw _String ("Invalid second argument is call to K-means (must be a 2x1 matrix of positive integers specifying cluster_count and maximum number of random restarts. Had ") & _String((_String*)classes->toStr()).Enquote();
8200 } else {
8201 for (unsigned long i=1UL; i<lDim; i+=2UL) {
8202 long pCount = theData[i];
8203 if (pCount <= 0L) {
8204 throw _String ("Invalid count entry in matrix passed to K-means (must be a positive integer):") & pCount;
8205 }
8206 sample_count += pCount;
8207 }
8208 }
8209 }
8210 }
8211
8212 if (cluster_count > sample_count) {
8213 throw _String ("More clusters requested than available data points");
8214 }
8215
8216 _Matrix * res = (_Matrix*)_returnMatrixOrUseCache(2, cluster_count, _NUMERICAL_TYPE, false, cache);
8217
8218 if (cluster_count == 1L) {
8219 hyFloat sampleMean = 0.,
8220 errorEstimate = 0.;
8221
8222 for (unsigned long c1=0UL, c2=1UL; c1 < 2*hDim; c1+=2UL, c2+=2UL) {
8223 sampleMean += theData[c1] * theData[c2];
8224 }
8225
8226 sampleMean /= sample_count;
8227
8228 for (unsigned long c1=0UL, c2=1UL; c1 < 2*hDim; c1+=2UL, c2+=2UL) {
8229 hyFloat locErr = theData[c1] - sampleMean;
8230 errorEstimate += locErr*locErr*theData[c2];
8231 }
8232
8233 res->theData[0] = sampleMean;
8234 res->theData[1] = errorEstimate;
8235 } else {
8236
8237 hyFloat minError = 1.e100;
8238
8239
8240 _SimpleList full_copy_list ((unsigned long)sample_count,0,0);
8241
8242 for (unsigned long c1 = 0UL, overall = 0UL; c1 < hDim; c1++) {
8243 unsigned long copies = get (c1, 1);
8244
8245 for (unsigned long c2 = 0; c2 < copies; c2++, overall++) {
8246 full_copy_list.list_data[overall] = c1;
8247 }
8248 }
8249
8250
8251 long hit_min_error;
8252 _Matrix best_cluster_means;
8253
8254 for (unsigned long sampleCount = 0UL; sampleCount < iter_count; sampleCount ++) {
8255 // choose N random cluster centers to start with
8256 _SimpleList chosen_means = full_copy_list.Sample(cluster_count),
8257 cluster_assignments (hDim, 0, 0);
8258
8259 _Matrix cluster_means (cluster_count,2,false,true);
8260
8261
8262
8263 for (long cc = 0; cc < cluster_count; cc = cc+1) {
8264 cluster_means.set (cc, 0) = get(chosen_means.get(cc), 0);
8265 }
8266
8267 hyFloat last_error_estimate = 1.e100,
8268 error_estimate = 0.;
8269
8270
8271 for (unsigned long cIters = 0UL; cIters < hDim * 5; cIters ++) {
8272
8273 bool moved = false;
8274 // assign each point to the nearest cluster centroid
8275 for (unsigned long data_point = 0UL; data_point < hDim; data_point++) {
8276
8277 unsigned long best_cluster = cluster_assignments.get (data_point);
8278
8279 hyFloat this_value = get (data_point, 0),
8280 current_min_distance = fabs (this_value-cluster_means.get (cluster_assignments.get (data_point),0));
8281
8282 for (unsigned long cluster_id = 0UL; cluster_id < cluster_count; cluster_id++) {
8283 if (StoreIfLess (current_min_distance, fabs (this_value-cluster_means.get (cluster_id,0)))) {
8284 best_cluster = cluster_id;
8285 }
8286 }
8287
8288 if (best_cluster != cluster_assignments.get (data_point)) {
8289 moved = true;
8290 cluster_assignments[data_point] = best_cluster;
8291 }
8292 }
8293
8294 if (moved) {
8295 for (long cc = 0; cc < cluster_count; cc = cc+1) {
8296 cluster_means.set (cc,0) = 0.;
8297 cluster_means.set (cc,1) = 0.;
8298 }
8299 for (unsigned long data_point = 0UL; data_point < hDim; data_point++) {
8300 cluster_means.set (cluster_assignments.get (data_point), 0) += get (data_point,0);
8301 cluster_means.set (cluster_assignments.get (data_point), 1) += get (data_point,1);
8302 }
8303 for (long cc = 0; cc < cluster_count; cc = cc+1) {
8304 if (cluster_means.get (cc, 1) != .0) {
8305 cluster_means.set (cc,0) /= cluster_means.get (cc,1);
8306 }
8307 }
8308 } else {
8309 break;
8310 }
8311
8312
8313 }
8314
8315 if (minError == 0.0 || fabs((minError-error_estimate)/minError) < 0.001) {
8316 hit_min_error ++;
8317 } else if (error_estimate < minError) {
8318 hit_min_error = 1;
8319 minError = error_estimate;
8320 best_cluster_means = cluster_means;
8321 }
8322 }
8323
8324 for (long k2 = 0; k2 < cluster_count; k2++) {
8325 res->theData[k2] = best_cluster_means.get (k2, 0);
8326 }
8327
8328 res->theData[cluster_count] = minError;
8329 res->theData[cluster_count+1] = hit_min_error;
8330 return res;
8331 }
8332 } catch (_String const& err) {
8333 HandleApplicationError (err);
8334 }
8335
8336 return new _Matrix;
8337 }
8338
8339
8340 //_____________________________________________________________________________________________
PopulateConstantMatrix(hyFloat v)8341 void _Matrix::PopulateConstantMatrix (hyFloat v) {
8342 if (is_numeric()) {
8343 InitializeArray(theData, lDim, (hyFloat&&)v);
8344 }
8345 }
8346
8347 //_____________________________________________________________________________________________
AddObj(HBLObjectRef mp,HBLObjectRef cache)8348 HBLObjectRef _Matrix::AddObj (HBLObjectRef mp, HBLObjectRef cache)
8349 {
8350 if (_Matrix::ObjectClass()!=mp->ObjectClass()) {
8351 if (mp->ObjectClass () == STRING) {
8352 _FormulaParsingContext def;
8353 _Matrix * convMatrix = new _Matrix (((_FString*)mp)->get_str(), false, def),
8354 * res;
8355 res = (_Matrix*)AddObj (convMatrix, cache);
8356 DeleteObject (convMatrix);
8357 return res;
8358 }
8359 if (mp->ObjectClass () == NUMBER) {
8360 _Matrix* aNum = (_Matrix*)ComputeNumeric ();
8361
8362 hyFloat pValue = mp->Value();
8363
8364 if (aNum->is_numeric()) {
8365 return ApplyScalarOperation ([=] (hyFloat h) -> hyFloat {return h + pValue;}, cache);
8366 }
8367 }
8368
8369 HandleApplicationError ( kErrorStringIncompatibleOperands );
8370 return new _Matrix (1,1);
8371 }
8372
8373 _Matrix * m = (_Matrix*)mp;
8374 AgreeObjects (*m);
8375 _Matrix * result = (_Matrix *)_returnMatrixOrUseCache( hDim, vDim, storageType, theIndex && m->theIndex, cache);
8376 AddMatrix (*result,*m);
8377 return result;
8378 }
8379
8380 //_____________________________________________________________________________________________
operator -=(_Matrix & m)8381 void _Matrix::operator -= (_Matrix& m)
8382 {
8383 AgreeObjects (m);
8384 if ((!m.theIndex)&&theIndex) {
8385 CheckIfSparseEnough(true);
8386 }
8387 Subtract (*this,m);
8388 }
8389
8390 //_____________________________________________________________________________________________
NonZeroEntries(_SimpleList & target)8391 void _Matrix::NonZeroEntries (_SimpleList& target) {
8392 if (theIndex && storageType == 1) {
8393 target.Clear();
8394 target.RequestSpace(lDim);
8395 for (long elementID = 0; elementID < lDim; elementID ++) {
8396 if (theIndex[elementID] >= 0) {
8397 target << theIndex[elementID];
8398 }
8399 }
8400 target.Sort();
8401 }
8402 }
8403
8404 //_____________________________________________________________________________________________
Equal(HBLObjectRef mp)8405 bool _Matrix::Equal(HBLObjectRef mp)
8406 {
8407 if (mp->ObjectClass()!=ObjectClass()) {
8408 return false;
8409 }
8410
8411 _Matrix * m = (_Matrix*)mp;
8412
8413 if (m->storageType == storageType && m->hDim == hDim && m->vDim == vDim) {
8414 if (is_numeric()) {
8415 if (theIndex || m->theIndex) {
8416 for (long r = 0L; r < hDim; r ++) {
8417 for (long c = 0L; c < vDim; c++) {
8418 if (!CheckEqual((*this)(r,c), (*m)(r,c))) {
8419 return false;
8420 }
8421 }
8422 }
8423
8424 } else {
8425 for (long elementID = 0; elementID < lDim; elementID ++) {
8426 if (!CheckEqual(theData[elementID], m->theData[elementID])) {
8427 return false;
8428 }
8429 }
8430 }
8431
8432 return true;
8433 } else {
8434 if (IsAStringMatrix() && m->IsAStringMatrix()) {
8435 for (long r = 0L; r < hDim; r ++) {
8436 for (long c = 0L; c < vDim; c++) {
8437 _Formula * f1 = GetFormula(r,c),
8438 * f2 = GetFormula(r,c);
8439
8440 if (f1 && f2) {
8441 if (((_FString*)f1->Compute())->get_str() != ((_FString*)f2->Compute())->get_str()) {
8442 return false;
8443 }
8444 } else {
8445 if (f1 || f2) {
8446 return false;
8447 }
8448 }
8449 }
8450 }
8451 return true;
8452 }
8453 }
8454 }
8455
8456 return false;
8457 }
8458
8459
8460 //_____________________________________________________________________________________________
SubObj(HBLObjectRef mp,HBLObjectRef cache)8461 HBLObjectRef _Matrix::SubObj (HBLObjectRef mp, HBLObjectRef cache)
8462 {
8463 if (mp->ObjectClass()!=ObjectClass()) {
8464 HandleApplicationError ( kErrorStringIncompatibleOperands );
8465 return new _Matrix (1,1);
8466 }
8467
8468 _Matrix * m = (_Matrix*)mp;
8469 AgreeObjects (*m);
8470 _Matrix * result = (_Matrix*) _returnMatrixOrUseCache(hDim, vDim, storageType,theIndex && m->theIndex, cache);
8471 Subtract (*result,*m);
8472 return result;
8473 }
8474
8475 //_____________________________________________________________________________________________
operator *=(hyFloat c)8476 void _Matrix::operator *= (hyFloat c) {
8477 Multiply (*this,c);
8478 }
8479
8480 //_____________________________________________________________________________________________
operator *(hyFloat c)8481 _Matrix _Matrix::operator * (hyFloat c) {
8482 _Matrix result (*this);
8483 Multiply (result,c);
8484 return result;
8485 }
8486
8487 //_____________________________________________________________________________________________
operator *=(_Matrix & m)8488 void _Matrix::operator *= (_Matrix& m) {
8489 if (CheckDimensions (m)) {
8490 AgreeObjects (m);
8491 _Matrix result (hDim, m.vDim, false, storageType);
8492 Multiply (result,m);
8493 //if ((theIndex!=nil)||(m.theIndex!=nil)) result.AmISparse();
8494 if (theIndex!=nil && m.theIndex!=nil) {
8495 result.AmISparse();
8496 }
8497 Swap (result);
8498 }
8499 }
8500
8501 //long count_sparse_successes = 0L;
8502 //long total_multbys = 0L;
8503
8504 //_____________________________________________________________________________________________
MultbyS(_Matrix & m,bool leftMultiply,_Matrix * externalStorage,hyFloat * stash)8505 void _Matrix::MultbyS (_Matrix& m, bool leftMultiply, _Matrix* externalStorage, hyFloat* stash) {
8506 _Matrix * result = nil;
8507 if (!externalStorage) {
8508 result = new _Matrix (hDim, m.vDim, false, storageType);
8509 }
8510
8511 _Matrix * receptacle = (externalStorage?externalStorage:result);
8512
8513 if (leftMultiply) {
8514 m.Multiply (*receptacle,*this);
8515 } else {
8516 Multiply (*receptacle,m);
8517 }
8518
8519 if (theIndex&&m.theIndex) {
8520 //total_multbys++;
8521 // 20200928: speculatively compress the matrix; convert back to dense if storage is too large
8522 /*CompressSparseMatrix(false,stash);
8523 if (lDim >= hDim * vDim *_Matrix::switchThreshold/100) {
8524 Swap (*receptacle);
8525 }*/
8526
8527 if (receptacle->AmISparseFast(*this) == false) {
8528 Swap (*receptacle);
8529 } /*else {
8530 CompressSparseMatrix(false,stash);
8531 }*/
8532
8533 //if (total_multbys % 100) {
8534 // printf ("%ld / %ld\n", count_sparse_successes, total_multbys);
8535 //}
8536 } else { // both dense
8537 Swap (*receptacle);
8538 }
8539
8540
8541 if (!externalStorage) {
8542 DeleteObject (result);
8543 } else {
8544 if (!externalStorage->CheckIfSparseEnough (true, false)) { // no conversion took place; reset memory
8545 memset (externalStorage->theData, 0, sizeof (hyFloat)*externalStorage->lDim);
8546 }
8547 //for (long s = 0; s < externalStorage->lDim; s++) externalStorage->theData[s] = 0.0;
8548 }
8549 }
8550
8551 //_____________________________________________________________________________________________
MultObj(HBLObjectRef mp,HBLObjectRef cache)8552 HBLObjectRef _Matrix::MultObj (HBLObjectRef mp, HBLObjectRef cache) {
8553
8554 if (mp->ObjectClass()!=ObjectClass()) {
8555 if (mp->ObjectClass()!=NUMBER) {
8556 HandleApplicationError ( kErrorStringIncompatibleOperands );
8557 return new _Matrix (1,1);
8558 } else {
8559 hyFloat theV = mp->Value();
8560 return (HBLObjectRef)((*this)*theV).makeDynamic();
8561 }
8562 }
8563
8564 _Matrix* m = (_Matrix*)mp;
8565 if (!CheckDimensions (*m)) return new _MathObject;
8566 AgreeObjects (*m);
8567
8568 _Matrix* result = (_Matrix*) _returnMatrixOrUseCache(hDim, m->vDim, storageType, false, cache);
8569 Multiply (*result,*m);
8570 return result;
8571
8572 }
8573
8574 //_____________________________________________________________________________________________
MultElements(HBLObjectRef mp,bool elementWiseDivide,HBLObjectRef cache)8575 HBLObjectRef _Matrix::MultElements (HBLObjectRef mp, bool elementWiseDivide, HBLObjectRef cache) {
8576
8577 if (mp->ObjectClass()!=ObjectClass()) {
8578 HandleApplicationError ( kErrorStringIncompatibleOperands );
8579 return new _Matrix (1,1);
8580 }
8581
8582 _Matrix* m = (_Matrix*)mp;
8583
8584 bool by_column = false;
8585 // if the second argument has dimension 1xcolumns of the first matrix, then
8586 // result [i][j] is assigned this [i][j] * / argument [0][j]
8587 // in other words, divide or multiply each column
8588
8589 bool by_row = false;
8590 // if the first argument has dimension rows of the second matrix x 1 then
8591 // result [i][j] is assigned argument [i][j] * / this [i][0]
8592 // in other words, divide or multiply each row
8593
8594
8595 if ( GetHDim()!=m->GetHDim() || GetVDim()!=m->GetVDim()) {
8596 if (GetVDim() == m->GetVDim() && m->GetHDim () == 1) {
8597 by_column = true;
8598 } else {
8599 if (GetHDim() == m->GetHDim() && GetVDim () == 1) {
8600 by_row = true;
8601 } else {
8602 HandleApplicationError ("Element-wise multiplication/division requires matrixes of the same dimension, or (NxM) $ (1xM) or (Nx1) $ (NxM) matrices ");
8603 return new _Matrix (1,1);
8604 }
8605 }
8606 }
8607
8608 if (! is_numeric() || ! m->is_numeric() ) {
8609 HandleApplicationError ("Element-wise multiplication/division only works on numeric matrices");
8610 return new _Matrix (1,1);
8611 }
8612
8613 _Matrix* result = (_Matrix*) _returnMatrixOrUseCache(GetHDim(), m->GetVDim(), _NUMERICAL_TYPE, false, cache);
8614
8615 if (theIndex || m->theIndex) {
8616 auto operation = elementWiseDivide ? DivNumbers : MultNumbers;
8617
8618 long index = 0L;
8619 if (by_row) {
8620 for (long row = 0; row < hDim; row++) {
8621 for (long column = 0; column < m->vDim; column++, index++) {
8622 result->theData[index] = operation ( (*this)(row,0), (*m)(row,column));
8623 }
8624 }
8625 } else {
8626 if (by_column) {
8627 for (long row = 0; row < hDim; row++) {
8628 for (long column = 0; column < m->vDim; column++, index++) {
8629 result->theData[index] = operation ( (*this)(row,column), (*m)(0,column));
8630 }
8631 }
8632 }
8633 else {
8634 for (long row = 0; row < hDim; row++) {
8635 for (long column = 0; column < m->vDim; column++, index++) {
8636 result->theData[index] = operation ( (*this)(row,column), (*m)(row,column));
8637 }
8638 }
8639 }
8640 }
8641 } else {
8642 if (elementWiseDivide) {
8643 long index = 0L;
8644 if (by_row) {
8645 for (long row = 0; row < hDim; row++) {
8646 for (long column = 0; column < m->vDim; column++, index++) {
8647 result->theData[index] = theData[row] / m->theData [index];
8648 }
8649 }
8650 } else {
8651 if (by_column) {
8652 for (long row = 0; row < hDim; row++) {
8653 for (long column = 0; column < m->vDim; column++, index++) {
8654 result->theData[index] = theData[index] / m->theData [column];
8655 }
8656 }
8657 }
8658 else {
8659 for (long row = 0; row < hDim; row++) {
8660 for (long column = 0; column < m->vDim; column++, index++) {
8661 result->theData[index] = theData[index] / m->theData [index];
8662 }
8663 }
8664 }
8665 }
8666 } else {
8667 long index = 0L;
8668 if (by_row) {
8669 for (long row = 0; row < hDim; row++) {
8670 for (long column = 0; column < m->vDim; column++, index++) {
8671 result->theData[index] = theData[row] * m->theData [index];
8672 }
8673 }
8674 } else {
8675 if (by_column) {
8676 for (long row = 0; row < hDim; row++) {
8677 for (long column = 0; column < m->vDim; column++, index++) {
8678 result->theData[index] = theData[index] * m->theData [column];
8679 }
8680 }
8681 }
8682 else {
8683 for (long row = 0; row < hDim; row++) {
8684 for (long column = 0; column < m->vDim; column++, index++) {
8685 result->theData[index] =theData[index] * m->theData [index];
8686 }
8687 }
8688 }
8689 }
8690 }
8691 }
8692
8693 if (theIndex||m->theIndex) {
8694 result->AmISparse();
8695 }
8696
8697 return result;
8698 }
8699
8700 //_____________________________________________________________________________________________
CheckDimensions(_Matrix & secondArg) const8701 bool _Matrix::CheckDimensions (_Matrix& secondArg) const {
8702 // check matrix dimensions to ensure that they are multipliable
8703 if (vDim!=secondArg.hDim) {
8704 if (hDim == 1 && secondArg.hDim==1 && vDim == secondArg.vDim) { // handle scalar product separately
8705 secondArg.Transpose();
8706 } else {
8707 char str[255];
8708 snprintf (str, sizeof(str),"Incompatible matrix dimensions in call to CheckDimension: %ldx%ld and %ldx%ld\n",hDim,vDim,secondArg.hDim,secondArg.vDim);
8709 HandleApplicationError (str);
8710 return false;
8711 }
8712 }
8713 return true;
8714 }
8715
8716 //_____________________________________________________________________________________________
operator *(_Matrix & m)8717 _Matrix _Matrix::operator * (_Matrix& m)
8718 {
8719 if (!CheckDimensions (m)) {
8720 _Matrix d;
8721 return d;
8722 }
8723
8724 AgreeObjects (m);
8725 _Matrix result (hDim, m.vDim, false, storageType);
8726 Multiply (result,m);
8727 if ((theIndex!=nil)||(m.theIndex!=nil)) {
8728 result.AmISparse();
8729 }
8730 return result;
8731
8732 }
8733 //_____________________________________________________________________________________________
operator +(_Matrix & m)8734 _Matrix _Matrix::operator + (_Matrix& m)
8735 {
8736 AgreeObjects (m);
8737 _Matrix result (hDim, vDim, bool((theIndex!=nil)&&(m.theIndex!=nil)), storageType);
8738 AddMatrix (result,m);
8739 return result;
8740
8741 }
8742 //_____________________________________________________________________________________________
operator -(_Matrix & m)8743 _Matrix _Matrix::operator - (_Matrix& m)
8744 {
8745 AgreeObjects (m);
8746 _Matrix result (hDim, vDim, bool((theIndex!=nil)&&(m.theIndex!=nil)), storageType);
8747 Subtract (result,m);
8748 return result;
8749 }
8750
8751 //_________________________________________________________
internal_to_str(_StringBuffer * string,FILE * file,unsigned long padding)8752 void _Matrix::internal_to_str (_StringBuffer* string, FILE * file, unsigned long padding) {
8753
8754 StringFileWrapper res (string, file);
8755 _String padder (" ", padding);
8756
8757 static const _String kUseJSONForMatrix ("USE_JSON_FOR_MATRIX");
8758
8759 bool is_numeric_mx = is_numeric ();
8760 bool directly_printable = is_numeric_mx || IsAStringMatrix ();
8761
8762 if (directly_printable) {
8763
8764 long digs = -1L;
8765
8766 bool doJSON = hy_env::EnvVariableTrue(kUseJSONForMatrix);
8767
8768 char openBracket = doJSON ? '[' : '{',
8769 closeBracket = doJSON ? ']' : '}';
8770
8771 if (is_numeric_mx) {
8772 digs = MIN (print_digit_specification = hy_env::EnvVariableGetDefaultNumber(hy_env::print_float_digits), 15);
8773 }
8774 res << padder << openBracket << kStringFileWrapperNewLine;
8775
8776 if (is_numeric_mx) {
8777
8778 _String formatStr = _String("%") &_String(digs+6)&'.'&_String(digs)&'g';
8779
8780 char number_buffer [256];
8781
8782 for (long i = 0L; i<hDim; i++) {
8783 if (i) {
8784 res << padder;
8785 }
8786 res << openBracket;
8787
8788 for (long j = 0L; j<vDim; j++) {
8789 if (j) {
8790 res << ", ";
8791 }
8792 parameterToCharBuffer ((*this)(i,j), number_buffer, 255, doJSON);
8793 res << number_buffer;
8794 }
8795 res << closeBracket << (doJSON && i != hDim -1 ? ',' : ' ') << kStringFileWrapperNewLine;
8796 }
8797 } else {
8798 for (long i = 0L; i<hDim; i++) {
8799 if (i) {
8800 res << padder;
8801 }
8802 res << openBracket;
8803
8804 for (long j = 0L; j<vDim; j++) {
8805 if (j) {
8806 res << ", ";
8807 }
8808 res << '"';
8809
8810 _Formula * f = GetFormula (i,j);
8811 if (f) {
8812 HBLObjectRef fv = f->Compute();
8813 if (fv) {
8814 res << _String ((_String*)fv->toStr());
8815 //;((_FString*)fv)->get_str();
8816 }
8817 }
8818 res << '"';
8819
8820 }
8821 res << closeBracket << (doJSON && i != hDim -1 ? ',' : ' ') << kStringFileWrapperNewLine;
8822 }
8823 }
8824 res << padder << closeBracket;
8825 } else if (storageType==_POLYNOMIAL_TYPE) {
8826 ANALYTIC_COMPUTATION_FLAG = hy_env::EnvVariableTrue (ANAL_COMP_FLAG);
8827 if (!ANALYTIC_COMPUTATION_FLAG) {
8828 ((_Matrix*)Compute())->internal_to_str (string, file, padding);
8829 return;
8830 }
8831 for (long i = 0; i<hDim; i++) {
8832 res << "\n[";
8833 for (long j = 0; j<vDim; j++) {
8834 long p = Hash (i,j);
8835 if (j) {
8836 res << ",";
8837 }
8838 if (p>=0) {
8839 _String *sp = (_String*) GetMatrixObject (p)->toStr();
8840 res << *sp;
8841 DeleteObject (sp);
8842 } else {
8843 res << "0.";
8844 }
8845 }
8846 res << "]\n";
8847 }
8848 } else {
8849 _Matrix* eval = (_Matrix*)(storageType==3?EvaluateSimple():Evaluate(false));
8850 eval->internal_to_str(string, file, padding);
8851 DeleteObject (eval);
8852 }
8853 }
8854 //_________________________________________________________
toFileStr(FILE * dest,unsigned long padding)8855 void _Matrix::toFileStr (FILE*dest, unsigned long padding){
8856 internal_to_str(nil, dest, padding);
8857 }
8858 //_____________________________________________________________________________________________
8859
toStr(unsigned long padding)8860 BaseRef _Matrix::toStr(unsigned long padding) {
8861 _StringBuffer * serialized = new _StringBuffer (2048L);
8862 internal_to_str (serialized, nil, padding);
8863 return serialized;
8864 }
8865
8866 //_____________________________________________________________________________________________
8867
Serialize(_StringBuffer & res,_String & myID)8868 void _Matrix::Serialize (_StringBuffer& res, _String& myID) {
8869 if (storageType != _POLYNOMIAL_TYPE) {
8870 res << '\n';
8871 res << myID;
8872 if (is_numeric()) {
8873 res << '=';
8874 res.AppendNewInstance((_String*)toStr());
8875 res << ';';
8876 } else if (is_expression_based()) {
8877 res << (_String ("={") & hDim & ',' & vDim & "};\n");
8878 for (long h=0L; h<hDim; h++) {
8879 for (long v=0L; v<vDim; v++) {
8880 _Formula *theCell = GetFormula (h,v);
8881 if (theCell&& !theCell->IsEmpty()) {
8882 res << myID << '[' << _String(h) << "][" << _String(v) << "]:=";
8883 res.AppendNewInstance((_String*)theCell->toStr(kFormulaStringConversionNormal));
8884 res << ";\n";
8885 }
8886 }
8887 }
8888 }
8889 }
8890 }
8891
8892
8893 //_____________________________________________________________________________________________
8894
SetIncrement(int m)8895 void SetIncrement (int m) {
8896 _Matrix::storageIncrement = m;
8897 }
8898 //_____________________________________________________________________________________________
InitMxVar(_SimpleList & mxVariables,hyFloat glValue)8899 void _Matrix::InitMxVar (_SimpleList& mxVariables, hyFloat glValue) {
8900 mxVariables.Each ([&] (long value, unsigned long) -> void {
8901 LocateVar(value)->SetValue (new _Constant (glValue), false,true, NULL);
8902 });
8903 }
8904 //_____________________________________________________________________________________________
ImportMatrixExp(FILE * theSource)8905 bool _Matrix::ImportMatrixExp (FILE* theSource) {
8906 // TODO: SLKP 20171027, need to review and possibly deprecate
8907 long mDim=0,i,k=0,j,m;
8908 char buffer[255],fc=0;
8909 buffer[0]=0;
8910 while(1) {
8911 buffer[mDim]=fgetc(theSource);
8912 if (feof(theSource)) {
8913 return false;
8914 }
8915 if (buffer[mDim]==',') {
8916 break;
8917 }
8918 mDim++;
8919 }
8920 buffer[mDim] = 0;
8921 mDim = atol (buffer); // matrix dimension
8922 Clear();
8923 CreateMatrix (this,mDim,mDim,false,false);
8924 // read in the variables
8925 i = 0;
8926 _SimpleList varList,c1,c2;
8927 while (fc!=';') {
8928 fc = fgetc (theSource);
8929 if ((fc==',')||(fc==';')) {
8930 buffer [i] = 0;
8931 _String varName (buffer);
8932
8933 _Variable * ppv = CheckReceptacle (&varName, kEmptyString, true);
8934 varList << ppv->get_index();
8935 i = 0;
8936 } else {
8937 buffer[i]=fc;
8938 i++;
8939 }
8940 if (feof(theSource)) {
8941 return false;
8942 }
8943 }
8944 do {
8945 fc = fgetc (theSource);
8946 if (feof(theSource)) {
8947 return false;
8948 }
8949 } while (fc!=';');
8950
8951 k = 0; // term counter
8952
8953 while (k<mDim*mDim) {
8954 i = 0;
8955 while (fc!='{') {
8956 fc = fgetc (theSource);
8957 buffer[i] = fc;
8958 i++;
8959 if (feof(theSource)) {
8960 return false;
8961 }
8962 }
8963 _Polynomial* thisCell = new _Polynomial (varList);
8964 m = atol (buffer);
8965 hyFloat* theCoeffs = (hyFloat*)MatrixMemAllocate(m*sizeof(hyFloat));
8966 j = 0;
8967 while (fc!='}') {
8968 i = 0;
8969 do {
8970 buffer[i] = fc = fgetc (theSource);
8971 i++;
8972 if (feof(theSource)) {
8973 DeleteObject (thisCell);
8974 return false;
8975 }
8976 } while ((fc!=',')&&(fc!='}'));
8977 buffer[i]=0;
8978 theCoeffs[j]=atof (buffer);
8979 j++;
8980 if (j>m) {
8981 DeleteObject (thisCell);
8982 return false;
8983 }
8984 }
8985 fc = fgetc(theSource);
8986 if (fc != '{') {
8987 DeleteObject (thisCell);
8988 return false;
8989 }
8990 _PolynomialData *pd = new _PolynomialData (varList.countitems(),j,theCoeffs);
8991 MatrixMemFree (theCoeffs);
8992 c1.Clear();
8993 while (fc!='}') {
8994 i = 0;
8995 do {
8996 buffer[i] = fc = fgetc (theSource);
8997 i++;
8998 if (feof(theSource)) {
8999 DeleteObject (thisCell);
9000 DeleteObject (pd);
9001 return false;
9002 }
9003 } while ((fc!=',')&&(fc!='}'));
9004 buffer[i]=0;
9005 c1<<atol (buffer);
9006 }
9007 fc = fgetc(theSource);
9008 if (fc != '{') {
9009 DeleteObject (thisCell);
9010 DeleteObject (pd);
9011 return false;
9012 }
9013 c2.Clear();
9014 while (fc!='}') {
9015 i = 0;
9016 do {
9017 buffer[i] = fc = fgetc (theSource);
9018 i++;
9019 if (feof(theSource)) {
9020 DeleteObject (thisCell);
9021 DeleteObject (pd);
9022 return false;
9023 }
9024 } while ((fc!=',')&&(fc!='}'));
9025 buffer[i]=0;
9026 c2<<atol (buffer);
9027 }
9028 thisCell->SetTheTerms(pd);
9029 thisCell->SetCLists (c1,c2);
9030 StoreObject(k,thisCell);
9031 k++;
9032 }
9033
9034 return true;
9035 }
9036
9037 //_____________________________________________________________________________________________
9038
ExportMatrixExp(_Matrix * theBase,FILE * theDump)9039 void _Matrix::ExportMatrixExp (_Matrix* theBase, FILE* theDump)
9040 // TODO: SLKP 20171027, need to review and possibly deprecate
9041
9042 // export the matrix's computational form in the following format
9043 // matrix dimension followed by a comma
9044 // a comma separated list of variable names followed by a semicolon
9045 // a list of (precision, maxcap) followed by a semicolon
9046 // for each matrix entry
9047 // number of coeffs,
9048 // a {} enclosed list of coefficients
9049 // a {} enclosed first computational list
9050 // a {} enclosed second computational list
9051 // followed by a comma
9052 {
9053 // write out the preliminaries
9054 if (storageType!=0) {
9055 HandleApplicationError ( kErrorStringMatrixExportError );
9056 return;
9057 }
9058 fprintf (theDump,"%ld,",hDim);
9059 _SimpleList mxVariables;
9060 {
9061 _AVLList mxA (&mxVariables);
9062 ScanForVariables(mxA,true);
9063 mxA.ReorderList();
9064 }
9065
9066
9067 long k, i=0;
9068 hyFloat* varPool = (hyFloat*)MatrixMemAllocate (mxVariables.countitems()*sizeof(hyFloat));
9069 for (k=0; k<mxVariables.countitems(); k++) {
9070 fprintf (theDump,"%s",LocateVar(mxVariables(k))->GetName()->get_str());
9071 if (k<mxVariables.countitems()-1) {
9072 fprintf (theDump,"%c",',');
9073 } else {
9074 fprintf (theDump,"%c",';');
9075 }
9076 varPool[k]=topPolyCap;
9077 }
9078
9079 // begin by computing the actual "numerical exponential"
9080 // initialize all the variables to the polycap value
9081
9082 InitMxVar (mxVariables, topPolyCap);
9083
9084 _Matrix *dummy = (_Matrix*)theBase->Evaluate(false);
9085 _Matrix *numExp = (_Matrix*)(dummy->Exponentiate());
9086
9087 DeleteObject(dummy);
9088 checkParameter (ANAL_MATRIX_TOLERANCE,analMatrixTolerance,1e-6);
9089 fprintf (theDump,"%g,%g;",analMatrixTolerance,topPolyCap);
9090
9091 // now loop thru the cells and check the precision term by term
9092 for (k=0; k<lDim; k++) {
9093 _SimpleList termRank, termIndex,c1,c2;
9094 _Polynomial* thisCell = ((_Polynomial**)theData)[k];
9095 long nTerms = thisCell->GetTheTerms()->NumberOfTerms(),
9096 step = nTerms/10+1, upTo = step, tup,j;
9097 hyFloat* coeffHolder = (hyFloat*)MatrixMemAllocate (nTerms*sizeof(hyFloat)), error, bestError = 1;
9098
9099 thisCell->RankTerms(&termRank);
9100 for (i=0; i<nTerms; i++) {
9101 termIndex<<i;
9102 }
9103 SortLists (&termRank,&termIndex);
9104 termRank.Clear();
9105 for (i=0; i<nTerms; i++) {
9106 termRank<<(nTerms-termIndex.Find(i)-1);
9107 }
9108 bestError = 1;
9109 while(upTo<nTerms+step) {
9110 if (upTo<nTerms) {
9111 tup = upTo;
9112 } else {
9113 tup = nTerms-1;
9114 }
9115 termIndex.Clear();
9116 for (i=0,j=0; (i<nTerms)&&(j<=tup); i++) {
9117 if (termRank.list_data[i]<=tup) {
9118 coeffHolder[j]=thisCell->GetTheTerms()->GetCoeff(i);
9119 j++;
9120 termIndex<<i;
9121 }
9122 }
9123 thisCell->Convert2ComputationForm(&c1,&c2,&termIndex);
9124 error = fabs(thisCell->ComputeP(varPool,coeffHolder,thisCell->GetNoVariables()+1,c1.countitems(),c1.quickArrayAccess(),
9125 c2.quickArrayAccess())-numExp->directIndex(k));
9126 if (error<bestError) {
9127 bestError = error;
9128 }
9129 if (bestError<=analMatrixTolerance) {
9130 break;
9131 }
9132 upTo+=step;
9133 }
9134
9135 if (bestError>analMatrixTolerance) {
9136 char be[100];
9137 snprintf (be, sizeof(be),"%g",bestError);
9138 _String wm ("Polynomial Matrix Exp approximation failed tolerance test in cell (");
9139 wm = wm&_String(k/hDim)&","&_String(k%hDim)&"). Tolerance achieved is:"&be;
9140 ReportWarning (wm);
9141 }
9142 fprintf(theDump,"%ld{",tup+1);
9143 for (i=0; i<=tup; i++) {
9144 if (i) {
9145 fprintf(theDump,",%18.16g",coeffHolder[i]);
9146 } else {
9147 fprintf(theDump,"%18.16g",coeffHolder[i]);
9148 }
9149 }
9150 fprintf(theDump,"}%ld",tup);
9151 c1.toFileStr(theDump);
9152 c2.toFileStr(theDump);
9153 MatrixMemFree (coeffHolder);
9154
9155 }
9156 MatrixMemFree (varPool);
9157 DeleteObject (numExp);
9158 }
9159
9160 //_____________________________________________________________________________________________
9161
ExpNumberOfSubs(_Matrix * freqs,bool mbf)9162 hyFloat _Matrix::ExpNumberOfSubs (_Matrix* freqs, bool mbf) {
9163 // TODO SLKP 20171027 SLKP reviewed and edited; check correctness
9164
9165 if (!is_square_numeric(false) || !freqs->is_numeric()) {
9166 return 0.0;
9167 }
9168
9169 hyFloat result = 0.0;
9170 _Matrix *stencil = BranchLengthStencil();
9171
9172 if ( freqs->is_dense() == false ) {
9173 freqs->CheckIfSparseEnough(true);
9174 }
9175
9176 if (stencil) {
9177 if (mbf) {
9178 ForEachCellNumeric ([&] (hyFloat value, unsigned long index, unsigned long row, unsigned long column) -> void {
9179 if (row != column && stencil->theData[index]) {
9180 result += value * freqs->theData[row] * freqs->theData[column];
9181 }
9182 });
9183 } else {
9184 ForEachCellNumeric ([&] (hyFloat value, unsigned long index, unsigned long row, unsigned long column) -> void {
9185 if (row != column && stencil->theData[index]) {
9186 result += value * freqs->theData[row];
9187 }
9188 });
9189 }
9190 } else {
9191 if (mbf) {
9192 ForEachCellNumeric ([&] (hyFloat value, unsigned long index, unsigned long row, unsigned long column) -> void {
9193 if (row != column) {
9194 result += value * freqs->theData[row] * freqs->theData[column];
9195 }
9196 });
9197 } else {
9198 ForEachCellNumeric ([&] (hyFloat value, unsigned long index, unsigned long row, unsigned long column) -> void {
9199 if (row != column) {
9200 result += value * freqs->theData[row];
9201 }
9202 });
9203 }
9204 }
9205 return result;
9206 }
9207
9208 //_____________________________________________________________________________________________
ComputeRowAndColSums(void)9209 _List* _Matrix::ComputeRowAndColSums (void) {
9210 // the first entry is the matrix with row sums
9211 // the second - the entry with column sums
9212 // the third - a constant with the total sum
9213 if ((storageType == 1) && (hDim >= 1) && (vDim >= 1)) {
9214 _List* resList = new _List;
9215 _Matrix *rowSums = new _Matrix (hDim,1,false,true),
9216 *columnSums = new _Matrix (vDim,1,false,true);
9217
9218
9219 hyFloat totals = 0.0;
9220
9221 if (theIndex) {
9222 for (long item = 0; item < lDim; item ++) {
9223 long idx = theIndex[item];
9224 if (idx>=0) {
9225
9226 hyFloat v = theData[idx];
9227
9228 rowSums->theData[idx/vDim] += v;
9229 columnSums->theData[idx%vDim] += v;
9230 totals += v;
9231 }
9232 }
9233 } else {
9234 for (long rows = 0; rows < hDim; rows++) {
9235 hyFloat rowSum = 0.;
9236
9237 for (long columns = 0; columns < vDim; columns ++) {
9238 rowSum += theData[rows*vDim+columns];
9239 }
9240
9241 rowSums->theData[rows] = rowSum;
9242 totals += rowSum;
9243 }
9244
9245 for (long columns = 0; columns < vDim; columns++) {
9246 hyFloat colSum = 0.;
9247
9248 for (long rows = 0; rows < hDim; rows ++) {
9249 colSum += theData[rows*vDim+columns];
9250 }
9251
9252 columnSums->theData[columns] = colSum;
9253 }
9254 }
9255
9256 (*resList) < rowSums
9257 < columnSums
9258 < new _Constant (totals);
9259
9260 return resList;
9261
9262 }
9263 return nil;
9264 }
9265
9266 //_____________________________________________________________________________________________
9267
NeighborJoin(bool methodIndex,HBLObjectRef cache)9268 _Matrix* _Matrix::NeighborJoin (bool methodIndex, HBLObjectRef cache) {
9269 long specCount = GetHDim();
9270
9271 if (storageType != 1 || specCount!= GetVDim() || specCount < 4) {
9272 HandleApplicationError ("NeigborJoin needs a square numeric matrix of dimension >= 4");
9273 return new _Matrix;
9274 }
9275
9276 CheckIfSparseEnough (true);
9277
9278 _Matrix netDivergence (specCount,1,false,true);
9279 _SimpleList useColumn (specCount,0,1),
9280 columnIndex (specCount,0,1);
9281
9282 _Matrix* res = (_Matrix* )_returnMatrixOrUseCache((specCount+1)*2,3,_NUMERICAL_TYPE,false,cache);
9283
9284 for (long k=0; k<specCount ; k++) {
9285 for (long j=0; j<k; j++) {
9286 hyFloat d = theData[j*specCount+k];
9287
9288 netDivergence.theData[k] += d;
9289 netDivergence.theData[j] += d;
9290
9291 }
9292 res->theData[k*3+2] = 1;
9293 }
9294
9295 long cladesMade = 1;
9296
9297 while (cladesMade < specCount) {
9298 hyFloat min = 1.e100;
9299
9300 long minIndex = -1,
9301 minIndex2 = -1,
9302 minIndexR = -1,
9303 minIndexC = -1,
9304 k = specCount-1-cladesMade;
9305
9306 hyFloat recRemaining = 1./k;
9307
9308 if (cladesMade == specCount-1) {
9309 minIndex = useColumn.list_data[1];
9310
9311 hyFloat d = theData[minIndex];
9312
9313 if ((d<0)&&methodIndex) {
9314 d = 0;
9315 }
9316
9317 k = columnIndex.list_data[1];
9318
9319 if (k>=specCount+cladesMade-2) {
9320 k = columnIndex[0];
9321 }
9322
9323 long m = specCount+cladesMade-2;
9324
9325 res->theData[k*3+1] = d;
9326 res->theData[k*3] = m;
9327 res->theData[3*m+2] += res->theData[3*k+2];
9328 res->theData[3*m] = -1;
9329
9330 break;
9331 }
9332
9333 for (long i=1; i<useColumn.lLength; i++) {
9334 long c1 = useColumn.list_data[i];
9335
9336 for (long j=0; j<i; j=j+1) {
9337 long c2 = useColumn.list_data[j];
9338
9339 //if (c2>=c1)
9340 //break;
9341
9342 hyFloat d = theData[c2*specCount+c1]-(netDivergence.theData[c1]+netDivergence.theData[c2])*recRemaining;
9343
9344 if (d<min) {
9345 min = d;
9346 minIndex = c2;
9347 minIndex2 = c1;
9348 minIndexR = j;
9349 minIndexC = i;
9350 }
9351 }
9352 }
9353
9354 if (minIndex < 0 || minIndex2 < 0 || minIndexR < 0 || minIndexC < 0) {
9355 _String err = _String ("Invalid distance matrix passed to NeighborJoin. Matrices written onto ") & hy_messages_log_name;
9356 ReportWarning ((_String*)toStr());
9357 ReportWarning (_String((_String*)netDivergence.toStr()));
9358 ReportWarning (_String((_String*)useColumn.toStr()));
9359 HandleApplicationError (err);
9360 DeleteObject (res);
9361 return new _Matrix;
9362 }
9363
9364 hyFloat D = theData[minIndex*specCount+minIndex2],
9365 d = (D - (netDivergence.theData[minIndex2]-netDivergence.theData[minIndex])*recRemaining)*0.5,
9366 d2 = D - d;
9367
9368 if (methodIndex) {
9369 if (d<0) {
9370 d = 0.0;
9371 d2 = D;
9372 }
9373 if (d2<0) {
9374 d2 = 0.0;
9375 d = D;
9376 if (d<0) {
9377 d = 0;
9378 }
9379 }
9380 }
9381
9382 long m = columnIndex.list_data [minIndexC],
9383 n = columnIndex.list_data [minIndexR];
9384
9385 k = specCount+cladesMade-1;
9386
9387 res->theData[n*3] = k;
9388 res->theData[n*3+1] = d;
9389
9390 res->theData[m*3] = k;
9391 res->theData[m*3+1] = d2;
9392
9393 res->theData[k*3+2] = res->theData[n*3+2]+res->theData[m*3+2]+1;
9394
9395 d = theData[minIndex*specCount+minIndex2];
9396
9397 netDivergence.theData[minIndex] = 0;
9398 netDivergence.theData[minIndex2] = 0;
9399
9400 useColumn.Delete(minIndexC);
9401 columnIndex.Delete(minIndexC);
9402
9403 for (k=0; k<useColumn.lLength; k++) {
9404 long k2 = useColumn.list_data[k];
9405
9406 if (k2>=minIndex) {
9407 if (k2 == minIndex) {
9408 k++;
9409 }
9410 break;
9411 }
9412
9413 hyFloat d2 = theData[k2*specCount+minIndex]+theData[k2*specCount+minIndex2],
9414 t = (d2-d)*.5;
9415
9416 netDivergence.theData [k2] += t-d2;
9417 theData [k2*specCount+minIndex] = t;
9418 netDivergence.theData[minIndex] += t;
9419
9420 }
9421
9422 for (; k<useColumn.lLength; k++) {
9423 long k2 = useColumn.list_data[k];
9424 if (k2 >= minIndex2) {
9425 if (k2 == minIndex2) {
9426 k++;
9427 }
9428 break;
9429 }
9430
9431 hyFloat d2 = theData[minIndex*specCount+k2]+theData[k2*specCount+minIndex2],
9432 t = (d2-d)*.5;
9433
9434 netDivergence.theData [k2] += t-d2;
9435 theData[minIndex*specCount+k2] = t;
9436 netDivergence.theData[minIndex] += t;
9437
9438 }
9439
9440 //for (k=minIndex2+1;k<ds.species; k=k+1)
9441 for (; k<useColumn.lLength; k++) {
9442 long k2 = useColumn.list_data[k];
9443
9444 hyFloat d2 = theData[minIndex*specCount+k2]+theData[minIndex2*specCount+k2],
9445 t = (d2-d)*.5;
9446
9447 netDivergence.theData [k2] += t-d2;
9448 theData[minIndex*specCount+k2] = t;
9449 netDivergence.theData[minIndex] += t;
9450 }
9451
9452 columnIndex.list_data[minIndexR] = specCount+cladesMade-1;
9453 {
9454 for (long i=0; i<minIndex2; i++) {
9455 theData[i*specCount+minIndex2] = 0;
9456 }
9457 }
9458 {
9459 for (long i=minIndex2+1; i<specCount; i++) {
9460 theData[minIndex2*specCount+i]=0;
9461 }
9462 }
9463
9464 cladesMade ++;
9465 }
9466
9467
9468 //_Matrix *tree = res->MakeTreeFromParent (specCount);
9469 //DeleteObject (res);
9470 //return tree;
9471 return res;
9472 }
9473
9474 //_____________________________________________________________________________________________
MakeTreeFromParent(long specCount,HBLObjectRef cache)9475 _Matrix* _Matrix::MakeTreeFromParent (long specCount, HBLObjectRef cache) {
9476 if (is_empty()) {
9477 return new _Matrix;
9478 }
9479
9480 try {
9481
9482 if (specCount<0L ) {
9483 throw (_String ("Parameter to ") & __PRETTY_FUNCTION__ & " must be greater than or equal to 0");
9484 }
9485
9486 if (GetVDim () != 3) {
9487 throw (_String ("Expected a matrix with 3 columns"));
9488 }
9489 if (GetHDim () <= 2*specCount + 1) {
9490 throw (_String ("Expected a matrix with at least ") & (2*specCount + 1) & " rows");
9491 }
9492
9493 const long result_rows = 2*(specCount+1);
9494 _Matrix *tree = (_Matrix* )_returnMatrixOrUseCache(result_rows,5,_NUMERICAL_TYPE,false,cache),
9495 CI (2*(specCount+1),1,false,true);
9496
9497
9498 for (long kk = 0; kk < specCount-1; kk++) {
9499 tree->theData[kk*5+4] = -1; // set parent records to
9500 }
9501
9502 long cladesMade = 0L;
9503
9504 for (long nodeID2 = 0L; nodeID2 < specCount; nodeID2 ++) {
9505 long nodeID = nodeID2,
9506 nodeDepth = 0,
9507 saveNodeID = nodeID,
9508 parentID = theData[nodeID*3],
9509 layoutOffset = cladesMade,
9510 m,
9511 n;
9512
9513 while (parentID>=0) {
9514 long idx = parentID-specCount;
9515 if (idx < 0 || idx >= result_rows) {
9516 throw (_String ("Invalid parent index in row ") & nodeID2);
9517 }
9518 n = tree->theData[idx*5+4];
9519 if (n >= 0) {
9520 layoutOffset = n+tree->theData[idx*5+3];
9521 break;
9522 }
9523 parentID = theData[parentID*3];
9524 }
9525
9526 parentID = theData[nodeID*3];
9527
9528 while (parentID>=0) {
9529 n = parentID-specCount;
9530 if (n < 0 || n >= result_rows) {
9531 throw (_String ("Invalid parent index in row ") & nodeID);
9532 }
9533 m = theData[nodeID*3+2];
9534
9535 if (tree->theData[n*5+4] < 0)
9536 /* this node hasn't been laid out yet */
9537 {
9538 if (theData[parentID*3]>=0) {
9539 tree->theData[n*5+4] = layoutOffset; /* where the layout for the clade begins */
9540 tree->theData[n*5+3] = m; /* offset for that layout */
9541 }
9542
9543 m += layoutOffset - 1;
9544
9545 tree->theData[m*5] = nodeID;
9546 tree->theData[m*5+2] = theData[nodeID*3+1];
9547
9548 CI.theData[nodeID] = m;
9549 } else
9550 /* it has been laid out */
9551 {
9552 m += tree->theData[n*5+3]+tree->theData[n*5+4] - 1;
9553
9554 tree->theData[m*5] = nodeID;
9555 tree->theData[m*5+2] = theData[nodeID*3+1];
9556
9557 tree->theData[n*5+3] = m + theData[nodeID*3+2];
9558
9559 CI.theData[nodeID] = m;
9560 nodeDepth ++;
9561
9562 break;
9563 }
9564 nodeDepth++;
9565 nodeID = parentID;
9566 parentID = theData[nodeID*3];
9567 }
9568
9569 /* update levels of nodes */
9570
9571 if (parentID<0) {
9572 nodeID = saveNodeID;
9573 parentID = theData[nodeID*3];
9574
9575 while (parentID>=0) {
9576 m = CI.theData[nodeID];
9577 if (m < 0 || m >= result_rows) {
9578 throw (_String ("Invalid parent index in row ") & nodeID);
9579 }
9580 tree->theData[m*5+1] = nodeDepth;
9581 nodeDepth --;
9582 saveNodeID = nodeID;
9583 nodeID = parentID;
9584 parentID = theData[nodeID*3];
9585 }
9586
9587 cladesMade += theData[3*saveNodeID+2];
9588 } else {
9589 m = CI.theData[parentID];
9590
9591 n = tree->theData[m*5+1];/* depth of the parent */
9592
9593 nodeID = saveNodeID;
9594
9595 while (nodeDepth >= 0) {
9596 m = CI.theData[nodeID];
9597
9598 tree->theData[m*5+1] = nodeDepth+n;
9599
9600 nodeDepth --;
9601 nodeID = theData[nodeID*3];
9602 }
9603 }
9604 }
9605 tree->theData[cladesMade*5] = 2*specCount-2;
9606 tree->theData[cladesMade*5+1] = 0;
9607 tree->theData[(specCount-2)*5+4] = 0;
9608 return tree;
9609 } catch (const _String& error) {
9610 HandleApplicationError(error);
9611 return new _Matrix (1,1,false,true);
9612 }
9613 }
9614
9615
9616 //_____________________________________________________________________________________________
FisherExact(hyFloat p1,hyFloat p2,hyFloat p3)9617 hyFloat _Matrix::FisherExact (hyFloat p1, hyFloat p2, hyFloat p3)
9618 {
9619 if ((hDim>=1)&&(vDim>=1)&&(hDim+vDim>2)) {
9620 if (vDim<hDim) {
9621 _Matrix temp (*this);
9622 temp.Transpose();
9623 return temp.FisherExact (p1,p2,p3);
9624 }
9625 _Matrix * numericMx = (_Matrix*)ComputeNumeric();
9626
9627 double prob,
9628 pval;
9629
9630 numericMx->CheckIfSparseEnough (true);
9631
9632 double *tempArray = new double [numericMx->lDim];
9633
9634 for (long i=0; i<hDim; i++)
9635 for (long j=0; j<vDim; j++) {
9636 tempArray[j*hDim+i] = numericMx->theData[i*vDim+j];
9637 }
9638
9639 fexact_ (hDim,vDim,tempArray,p1,p2,p3,&prob,&pval);
9640 delete [] tempArray;
9641 return pval;
9642
9643 }
9644 return 1.;
9645 }
9646
9647 //_____________________________________________________________________________________________
9648
SimplexHelper1(long rowIndex,_SimpleList & columnList,long columnCount,bool useAbsValue,long & maxIndex,hyFloat & maxValue)9649 void _Matrix::SimplexHelper1 (long rowIndex, _SimpleList& columnList, long columnCount, bool useAbsValue, long& maxIndex, hyFloat& maxValue)
9650 // find the maximum element (using absolute value of not) in row rowIndex+1 of this matrix,
9651 // over first columnCount columns indexed by columnList
9652 // column indexing is offset by + 1 to account for the first column not being eligible for pivoting
9653 {
9654 if (columnCount <= 0) {
9655 maxValue = 0.0;
9656 } else {
9657 rowIndex = (rowIndex+1)*vDim;
9658 maxIndex = columnList.list_data[0];
9659 maxValue = theData[rowIndex+maxIndex+1];
9660 for (long k=1; k<columnCount; k++) {
9661 hyFloat t = useAbsValue?
9662 (fabs(theData[rowIndex+columnList.list_data[k]+1])-fabs(maxValue))
9663 :(theData[rowIndex+columnList.list_data[k]+1]-maxValue);
9664 if (t>0.) {
9665 maxValue = theData[rowIndex+columnList.list_data[k]+1];
9666 maxIndex = columnList.list_data[k];
9667 }
9668 }
9669 }
9670 }
9671
9672 //_____________________________________________________________________________________________
9673
SimplexHelper2(long & pivotIndex,long columnToExamine,hyFloat eps)9674 void _Matrix::SimplexHelper2 (long& pivotIndex, long columnToExamine, hyFloat eps)
9675 {
9676 long m = hDim-2,
9677 n = vDim-1,
9678 i = 0;
9679
9680 hyFloat q1,
9681 q;
9682
9683 pivotIndex = -1;
9684 for (; i<m; i++)
9685 if (theData[(i+1)*vDim+columnToExamine+1] < -eps) {
9686 break;
9687 }
9688 if (i>=m) {
9689 return; // function is unbounded
9690 }
9691 q1 = -theData[(i+1)*vDim]/theData[(i+1)*vDim+columnToExamine+1];
9692 pivotIndex = i;
9693 for (i=pivotIndex+1; i<m; i++) {
9694 if (theData[(i+1)*vDim+columnToExamine+1] < -eps) {
9695 q = -theData[(i+1)*vDim]/theData[(i+1)*vDim+columnToExamine+1];
9696 if (q<q1) {
9697 pivotIndex = i;
9698 q1 = q;
9699 } else {
9700 hyFloat q0, qp;
9701 if (q==q1) { // degeneracy
9702 for (long k=0; k<n; k++) {
9703 qp = -theData[(pivotIndex+1)*vDim + k + 1]/theData[(pivotIndex+1)*vDim+columnToExamine+1];
9704 q0 = -theData[(i+1)*vDim + k + 1]/theData[(i+1)*vDim+columnToExamine+1];
9705 if (q0!=qp) {
9706 break;
9707 }
9708 }
9709 if (q0 < qp) {
9710 pivotIndex = i;
9711 }
9712 }
9713 }
9714 }
9715 }
9716
9717 }
9718
9719 //_____________________________________________________________________________________________
9720
SimplexHelper3(long i1,long k1,long ip,long kp)9721 void _Matrix::SimplexHelper3 (long i1, long k1, long ip, long kp)
9722 {
9723 hyFloat piv = 1./theData[(ip+1)*vDim+kp+1];
9724 for (long i=0; i<=i1+1; i++)
9725 if (i-1 != ip) { // not the pivot row
9726 theData[i*vDim+kp+1] *= piv;
9727 for (long k=0; k<=k1+1; k++)
9728 if (k-1 != kp) {
9729 theData[i*vDim+k] -= theData[(ip+1)*vDim+k] * theData[i*vDim+kp+1];
9730 }
9731 }
9732 for (long k=0; k<=k1+1; k++)
9733 if (k-1 != kp) {
9734 theData[(ip+1)*vDim+k] *= -piv;
9735 }
9736 theData[(ip+1)*vDim+kp+1] = piv;
9737 }
9738
9739 //_____________________________________________________________________________________________
SimplexSolve(hyFloat desiredPrecision)9740 _Matrix* _Matrix::SimplexSolve (hyFloat desiredPrecision ) {
9741 // this function is adapted from the Num. Recipes in C version; but with 0 indexing
9742 // hyphy primitives
9743 // and without goto labels
9744
9745 // the of dimension RxC is interpreted as follows
9746 // R-1 constraints
9747 // C-2 variables
9748
9749 // the first row:
9750 // cell 0 - current value of the objective function
9751 // cells 1-C-2 - coefficient of variable x_k in the objective function
9752 // cell C-1 - if >=0. then maximize the function
9753 // - if <0 then minimize the function
9754
9755 // other rows
9756 // constraint j written in the form
9757 // b_j - a_j1 x_1 - a_j2 x_2 - ... -a_jk x_k
9758 // last cell is what type of constraint it is:
9759 // < 0: <= inequality
9760 // > 0: >= inequality
9761 // = 0 equality
9762
9763 // upon return, will contain a row matrix of either C-1 cells:
9764 // extreme value of the objective function in the first cell
9765 // variable values in the same order as originally supplied
9766 // if an kEmptyString matrix is returned - no feasible solution could be found
9767 // if a 1x1 matrix is returned - the objective function is unbounded
9768
9769 try {
9770
9771 long n = vDim-2, // number of variables
9772 m = hDim-1; // number of constraints
9773
9774 if (is_numeric() && n>0 && m>0) {
9775 while (1) // artificial construct used to break out on error
9776 // an to avoid goto statements
9777 {
9778 bool doMaximize = (*this)(0,n+1) >= 0.0;
9779
9780 // allocate temporary storage
9781 _Matrix tempMatrix (m+2,n+1,false,true);
9782 // first, copy the objective function row
9783 for (long i=0; i<=n; i++) {
9784 tempMatrix.Store(0,i,doMaximize?(*this)(0,i):-(*this)(0,i));
9785 }
9786
9787 // now, count the number of constraints of each type and reorder things
9788
9789 long m1 = 0, // <= constraints
9790 m2 = 0, // >= constraints
9791 m3 = 0; // == constraints
9792
9793 {
9794 for (long i=1; i<=m; i++) {
9795 hyFloat t = (*this)(i,n+1);
9796 if (t<0.0) {
9797 m1++;
9798 } else if (t>0.0) {
9799 m2++;
9800 } else {
9801 m3++;
9802 }
9803 if ((*this)(i,0) < 0.0) {
9804 throw _String("Negative values are not allowed in the first column of the simplex tableau");
9805 }
9806 }
9807 }
9808
9809
9810 // copy coefficients into the temp matrix, sorting the constraints in the <=, >= and == order
9811 {
9812 for (long i=1, t1=0, t2=0, t3=0; i<=m; i++) {
9813 hyFloat t = (*this)(i,n+1);
9814 long idx;
9815 if (t<0.0) {
9816 idx=1+t1++;
9817 } else if (t>0.0) {
9818 idx=1+m1+t2++;
9819 } else {
9820 idx=1+m1+m2+t3++;
9821 }
9822 for (long j=0; j<=n; j++) {
9823 tempMatrix.Store(idx,j,(*this)(i,j));
9824 }
9825
9826 }
9827 }
9828 // allocate temporary storage
9829
9830 _SimpleList l1 (n+1,0,1),
9831 l3 (m,0,0),
9832 izrov (n,0,1),
9833 iposv (m,n,1);
9834
9835 long nl1 = n;
9836
9837 if (m2+m3) { // >= and == constraints exist; origin is not a feasible solution
9838 for (long i=0; i<m2; l3.list_data[i] = 1,i++) ; // slack variables in the list of 'basis' variables
9839 for (long k=0; k<=n; k++) { // compute the auxiliary objective function
9840 hyFloat q = 0.;
9841 for (long k2 = m1+1; k2<=m; k2++) {
9842 q += tempMatrix(k2,k);
9843 }
9844 tempMatrix.Store (m+1,k,-q);
9845 }
9846 while (1) { // initial artifical construct
9847 long pivotColumn,
9848 ip;
9849 hyFloat pivotValue;
9850
9851 tempMatrix.SimplexHelper1 (m,l1,nl1,false,pivotColumn, pivotValue);
9852 if (pivotValue <= desiredPrecision && tempMatrix(m+1,0) < -desiredPrecision)
9853 // aux objective is still negative and can't be improved
9854 // no feasible solution
9855 {
9856 return new _Matrix;
9857 }
9858 if (pivotValue <= desiredPrecision && tempMatrix(m+1,0) <= desiredPrecision)
9859 // aux objective is zero and can't be improved
9860 // found a feasible solution; clean up artificial variables and move on to phase 2
9861 {
9862 for (ip = m1+m2; ip < m; ip++) {
9863 if (iposv.list_data[ip] == ip + n) {
9864 tempMatrix.SimplexHelper1 (ip,l1,nl1,true,pivotColumn, pivotValue);
9865 if (pivotValue > desiredPrecision) {
9866 goto one;
9867 }
9868 }
9869 }
9870 for (long i = m1; i<m1+m2; i++)
9871 if (l3.list_data[i-m1] == 1)
9872 for (long k=0; k<=n; k++) {
9873 tempMatrix.Store (i+1,k,-tempMatrix(i+1,k));
9874 }
9875
9876 break;
9877 }
9878
9879 tempMatrix.SimplexHelper2 (ip,pivotColumn,desiredPrecision);
9880 if (ip<0) {
9881 return new _Matrix (1,1,false,true); // unbounded function
9882 }
9883
9884 one:
9885 tempMatrix.SimplexHelper3 (m,n-1,ip,pivotColumn);
9886 if (iposv.list_data[ip] >= n+m1+m2) {
9887 long k = 0;
9888 for (k=0; k<nl1; k++)
9889 if (l1.list_data[k] == pivotColumn) {
9890 break;
9891 }
9892 nl1--;
9893 for (long i2=k; i2<nl1; i2++) {
9894 l1.list_data[i2] = l1.list_data[i2+1];
9895 }
9896 } else {
9897 long k2 = iposv.list_data[ip] - m1 - n;
9898 if (k2 >= 0 && l3.list_data[k2]) {
9899 l3.list_data[k2] = 0;
9900 tempMatrix.theData[(m+1)*tempMatrix.vDim + pivotColumn + 1] ++;
9901 for (long i=0; i<m+2; i++) {
9902 tempMatrix.theData[i*tempMatrix.vDim + pivotColumn + 1] *= -1.0;
9903 }
9904 }
9905 }
9906 long s = izrov.list_data[pivotColumn];
9907 izrov.list_data[pivotColumn] = iposv.list_data[ip];
9908 iposv.list_data[ip] = s;
9909 }// end of phase 1
9910 }
9911
9912 while (1) {
9913 long pivotColumn,
9914 pivotRow;
9915 hyFloat pivotValue;
9916
9917 tempMatrix.SimplexHelper1 (-1,l1,nl1,false,pivotColumn,pivotValue);
9918 if (pivotValue < desiredPrecision) { // done!
9919 // produce the final solution
9920 _Matrix * resMatrix = new _Matrix (1,n+1,false,true);
9921 resMatrix->Store(0,0,doMaximize?tempMatrix(0,0):-tempMatrix(0,0));
9922 for (long k=0; k<iposv.lLength; k++)
9923 if (iposv.list_data[k]<n) {
9924 resMatrix->Store(0,iposv.list_data[k]+1,tempMatrix(k+1,0));
9925 }
9926 return resMatrix;
9927 }
9928 tempMatrix.SimplexHelper2 (pivotRow,pivotColumn,desiredPrecision);
9929 if (pivotRow<0) {
9930 return new _Matrix (1,1,false,true);
9931 }
9932 tempMatrix.SimplexHelper3 (m-1,n-1,pivotRow,pivotColumn);
9933 long s = izrov.list_data[pivotColumn];
9934 izrov.list_data[pivotColumn] = iposv.list_data[pivotRow];
9935 iposv.list_data[pivotRow] = s;
9936 }
9937
9938 }
9939 } else {
9940 throw _String("SimplexSolve requires a numeric matrix with > 1 row and > 2 columns");
9941 }
9942
9943
9944 } catch (_String const& err) {
9945 HandleApplicationError (err);
9946 }
9947 return new _Matrix;
9948 }
9949
9950 //_____________________________________________________________________________________________
9951
CopyABlock(_Matrix * source,long startRow,long startColumn,long rowSpan,long colSpan)9952 void _Matrix::CopyABlock (_Matrix * source, long startRow, long startColumn, long rowSpan, long colSpan)
9953 {
9954 long indexTarget = startRow*vDim + startColumn,
9955 indexSource = 0,
9956 sourceHDim = rowSpan<=0?source->hDim:rowSpan,
9957 sourceVDim = colSpan<=0?source->vDim:colSpan,
9958 maxRow = MIN (hDim, startRow + sourceHDim),
9959 maxColumn = MIN (vDim, startColumn + sourceVDim);
9960
9961 for (long r = startRow; r < maxRow; r++) {
9962 for (long c = startColumn, c2 = 0; c < maxColumn; c++, c2++) {
9963 theData[indexTarget+c2] = source->theData[indexSource+c2];
9964 }
9965
9966 indexSource += sourceVDim;
9967 indexTarget += vDim;
9968 }
9969 }
9970
9971
9972 //_____________________________________________________________________________________________
DirichletDeviate(void)9973 HBLObjectRef _Matrix::DirichletDeviate (void)
9974 {
9975 /* -----------------------------------------------------------
9976 DirichletDeviate()
9977 Generate vector of random deviates from the Dirichlet
9978 distribution defined by contents of this matrix as
9979 hyperparameters (a > 0).
9980 ----------------------------------------------------------- */
9981 try {
9982
9983 long dim;
9984
9985 hyFloat denom = 0.;
9986
9987 _Matrix res (1, dim = GetHDim()*GetVDim(), false, true); // row vector
9988
9989
9990 if (!is_numeric()) {
9991 throw _String("Only numeric vectors can be passed to DirichletDeviate");
9992 }
9993
9994 if (is_row() || is_column ()) {
9995 // generate a random deviate from gamma distribution for each hyperparameter
9996
9997 for (long i = 0L; i < dim; i++) {
9998 if (theData[i] < 0.) {
9999 throw _String("Dirichlet not defined for negative parameter values.");
10000 }
10001
10002 res.Store (0, i, gammaDeviate(theData[i]));
10003 denom += res(0,i);
10004 }
10005
10006 // normalize by sum
10007 for (long i = 0; i < dim; i++) {
10008 res.Store (0, i, res(0,i)/denom);
10009 }
10010
10011 return (HBLObjectRef) res.makeDynamic();
10012 } else {
10013 throw _String("Argument must be a row- or column-vector.");
10014 }
10015 } catch (_String const& err) {
10016 HandleApplicationError (err);
10017 }
10018 return new _Matrix (1,1,false,true);
10019 }
10020
10021
10022
10023 //_____________________________________________________________________________________________
GaussianDeviate(_Matrix & cov)10024 HBLObjectRef _Matrix::GaussianDeviate (_Matrix & cov)
10025 {
10026 /* ------------------------------------------------------
10027 GaussianDeviate()
10028 Generate vector of random deviates from k-
10029 dimensional Gaussian distribution given contents
10030 of this matrix as mean parameters, and argument
10031 as covariance matrix.
10032
10033 Use algorithm described in Numerical Recipes
10034 3rd ed., p.379
10035 ------------------------------------------------------ */
10036
10037 //ReportWarning (_String("Entered _Matrix::GaussianDeviate() with cov = ") & (_String *)(cov.toStr()));
10038
10039 try {
10040
10041 if (storageType != 1 || GetHDim() > 1) {
10042 HandleApplicationError (_String("ERROR in _Matrix::GaussianDeviate(), expecting to be called on numeric row vector matrix, current dimensions: ") & GetHDim() & "x" & GetVDim());
10043 return new _Matrix;
10044 }
10045
10046 long kdim = GetVDim(); // number of entries in this _Matrix object as vector of means
10047
10048 if (cov.check_dimension(kdim, kdim)) {
10049 _Matrix* cov_cd = (_Matrix *) cov.CholeskyDecompose(),
10050 * gaussvec = new _Matrix (1, kdim, false, true);
10051
10052 //ReportWarning (_String("\nCholesky decomposition of cov = ") & (_String *) cov_cd->toStr());
10053
10054 // fill column vector with independent standard normal deviates
10055 for (long i = 0L; i < kdim; i++) {
10056 gaussvec->Store (0, i, gaussDeviate());
10057 }
10058
10059 //ReportWarning (_String ("\nvector of gaussian deviates = ") & (_String *) gaussvec.toStr());
10060
10061 // left multiply vector by Cholesky decomposition of covariance matrix
10062 *gaussvec *= *cov_cd;
10063
10064 // shift mean
10065 for (long i = 0L; i < kdim; i++) {
10066 gaussvec->Store (0, i, (*gaussvec)(0,i) + theData[i]);
10067 }
10068
10069 DeleteObject (cov_cd);
10070 return gaussvec;
10071 } else {
10072 throw (_String("Error in _Matrix::GaussianDeviate(), incompatible dimensions in covariance matrix: ") & cov.GetHDim() & "x" & cov.GetVDim());
10073
10074 }
10075 } catch (const _String& err) {
10076 HandleApplicationError (err);
10077 }
10078
10079 return new _Matrix;
10080 }
10081
10082
10083 //_____________________________________________________________________________________________
MultinomialSample(_Constant * replicates)10084 HBLObjectRef _Matrix::MultinomialSample (_Constant *replicates) {
10085
10086 try {
10087 _List reference_manager;
10088
10089 long values = GetHDim();
10090 unsigned long samples = replicates?replicates->Value ():0;
10091
10092 _Matrix *eval = (_Matrix*)Compute (),
10093 * sorted = nil,
10094 * result = nil;
10095
10096 if (samples == 0UL) {
10097 throw _String ("Expected a numerical (>=1) value for the number of replicates");
10098 } else if ( ! eval->is_numeric() || GetVDim() != 2 || values < 2) {
10099 throw _String ("Expecting numerical Nx2 (with N>=1) matrix.");
10100 } else {
10101 _Constant one (1.);
10102 sorted = (_Matrix*) eval->SortMatrixOnColumn(&one, nil);
10103 reference_manager < sorted;
10104 hyFloat sum = 0.;
10105
10106 for (long n = 1L; n < 2*values; n+=2L) {
10107 hyFloat v = sorted->theData[n];
10108 if (v < 0.) {
10109 sum = 0.;
10110 break;
10111 }
10112 sum += v;
10113 }
10114 if (CheckEqual (sum, 0.)) {
10115 throw _String ("The probabilities (second column) cannot add to 0 or be negative");
10116 } else {
10117 sum = 1./sum;
10118
10119 _Matrix *raw_result = new _Matrix (1, values, false, true),
10120 *normalized = new _Matrix (1, values, false, true);
10121
10122 reference_manager <raw_result;
10123 reference_manager <normalized;
10124
10125
10126 for (long v = 0; v < values; v++) {
10127 normalized->theData[values-1-v] = sorted->theData[1+2*v] * sum;
10128 }
10129
10130
10131 hyFloat seconds_accumulator = .0,
10132 temp;
10133
10134 for (unsigned long it = 0UL; it < samples; it++) {
10135 raw_result->theData[DrawFromDiscrete(normalized->theData, values)] += 1.;
10136 }
10137
10138 result = new _Matrix (values, 2, false, true);
10139
10140 for (long v = 0; v < values; v++) {
10141 result->theData[2*v] = (long)sorted->theData[2*(values-1-v)];
10142 result->theData[2*v+1] = raw_result->theData[v];
10143 }
10144
10145
10146 return result;
10147 }
10148 }
10149 }
10150 catch (_String const& err) {
10151 HandleApplicationError (err);
10152 }
10153 return new _Matrix;
10154 }
10155
10156 //_____________________________________________________________________________________________
InverseWishartDeviate(_Matrix & df)10157 HBLObjectRef _Matrix::InverseWishartDeviate (_Matrix & df)
10158 {
10159 /* ---------------------------------------------------
10160 InverseWishartDeviate()
10161 Generates a random matrix whose inverse
10162 has the Wishart distribution with this matrix
10163 supplying the covariance matrix parameter and
10164 a degrees of freedom vector argument.
10165 --------------------------------------------------- */
10166
10167 try {
10168 long n = GetHDim();
10169
10170
10171 if (!is_square_numeric()) {
10172 throw _String("Expecting a numerical square matrix.");
10173 }
10174
10175 else if (!df.is_numeric() || !df.check_dimension(n,1)) {
10176 throw _String("Expecting numerical column vector for second argument (degrees of freedom).");
10177 } else {
10178 // compute Cholesky factor for this matrix inverse, extract the diagonal
10179 _List reference_manager;
10180
10181 _Matrix * inv = (_Matrix *) Inverse(nil);
10182 _Matrix * invCD = (_Matrix *) (inv->CholeskyDecompose());
10183
10184 DeleteObject (inv);
10185 reference_manager < invCD;
10186
10187 return WishartDeviate (df, *invCD);
10188 }
10189 } catch (const _String& err) {
10190 HandleApplicationError (err);
10191 }
10192 return new _Matrix;
10193 }
10194
10195 //_____________________________________________________________________________________________
WishartDeviate(_Matrix & df)10196 HBLObjectRef _Matrix::WishartDeviate (_Matrix & df) {
10197 _Matrix diag; // calls default constructor
10198 return WishartDeviate (df, diag);
10199 }
10200
10201 //_____________________________________________________________________________________________
10202
WishartDeviate(_Matrix & df,_Matrix & decomp)10203 HBLObjectRef _Matrix::WishartDeviate (_Matrix & df, _Matrix & decomp) {
10204 /* ---------------------------------------------------
10205 WishartDeviate()
10206 Generates a random matrix following the Wishart
10207 distribution with this matrix supplying the
10208 covariance matrix parameter.
10209
10210 First argument: degrees of freedom vector.
10211 Second argument (optional):
10212 Diagonal of Cholesky decomposition of
10213 covariance matrix, overrides this matrix.
10214 --------------------------------------------------- */
10215
10216
10217 try {
10218
10219 long n = GetHDim();
10220
10221 _Matrix rdeviates (n, n, false, true),
10222 rd_transpose;
10223
10224
10225 if (!(df.is_row () || df.is_column())) {
10226 throw _String("Expecting row vector for degrees of freedom argument.");
10227 } else if (df.is_column()) {
10228 df.Transpose(); // convert column vector to row vector
10229 }
10230
10231 if (decomp.is_empty()) { // no second argument, perform Cholesky decomposition
10232 if (!is_square_numeric()) {
10233 throw _String("Expecting square numeric matrix.");
10234 } else {
10235 _Matrix * cholesky = (_Matrix *) CholeskyDecompose();
10236
10237 if (cholesky->GetHDim() > 0) {
10238 decomp = *cholesky;
10239 DeleteObject (cholesky);
10240 } else {
10241 return cholesky; // empty _Matrix from error in CholeskyDecompose()
10242 }
10243 }
10244 }
10245
10246
10247 // populate diagonal with square root of i.i.d. chi-square random deviates
10248 for (unsigned long i = 0UL; i < n; i++) {
10249 rdeviates.Store (i, i, sqrt(chisqDeviate(df(0,i)-i+1)) );
10250
10251 // populate upper triagonal with i.i.d. standard normal N(0,1) deviates
10252 for (unsigned long j = i+1UL; j < n; j++) {
10253 rdeviates.Store (i, j, gaussDeviate());
10254 }
10255 }
10256
10257
10258 // result is obtained from D^T B D, where B = A^T A, ^T is matrix transpose
10259 rd_transpose = rdeviates;
10260 rd_transpose.Transpose();
10261 rd_transpose *= rdeviates; // A^T A
10262 rd_transpose *= decomp; // A^T A D
10263
10264 decomp.Transpose();
10265 decomp *= rd_transpose; // D^T A^T A D
10266
10267 return (HBLObjectRef) decomp.makeDynamic();
10268 } catch (const _String& err) {
10269 HandleApplicationError(err);
10270 }
10271 return new _Matrix;
10272 }
10273
10274 //-----------------------------------------------------------------------------------------------------------------
10275
_returnMatrixOrUseCache(long nrow,long ncol,long type,bool is_sparse,HBLObjectRef cache)10276 HBLObjectRef _returnMatrixOrUseCache (long nrow, long ncol, long type, bool is_sparse, HBLObjectRef cache) {
10277 if (cache && cache->ObjectClass() == MATRIX) {
10278 _Matrix *cached_mx = (_Matrix*)cache;
10279 if (cached_mx->check_dimension(nrow, ncol) && cached_mx->has_type (type) && cached_mx->is_dense() == !is_sparse) {
10280 cached_mx->Clear(false);
10281 } else {
10282 cached_mx->Clear();
10283 _Matrix::CreateMatrix (cached_mx, nrow, ncol, is_sparse, type == _NUMERICAL_TYPE ? true : false);
10284 }
10285 //cached_mx->AddAReference();
10286 return cached_mx;
10287 }
10288 return new _Matrix (nrow, ncol, is_sparse, type == _NUMERICAL_TYPE ? true : false);
10289 }
10290
10291