1 /* $Id: ClpCholeskyDense.cpp 2385 2019-01-06 19:43:06Z unxusr $ */
2 /*
3 Copyright (C) 2003, International Business Machines Corporation
4 and others. All Rights Reserved.
5
6 This code is licensed under the terms of the Eclipse Public License (EPL).
7 */
8 #include "CoinPragma.hpp"
9 #include "CoinHelperFunctions.hpp"
10 #include "ClpConfig.h"
11 #include "ClpHelperFunctions.hpp"
12 #include "ClpInterior.hpp"
13 #include "ClpCholeskyDense.hpp"
14 #include "ClpMessage.hpp"
15 #include "ClpQuadraticObjective.hpp"
16 #if CLP_HAS_ABC
17 #include "CoinAbcCommon.hpp"
18 #endif
19 #if CLP_HAS_ABC < 3
20 #undef cilk_for
21 #undef cilk_spawn
22 #undef cilk_sync
23 #define cilk_for for
24 #define cilk_spawn
25 #define cilk_sync
26 #endif
27
28 /*#############################################################################*/
29 /* Constructors / Destructor / Assignment*/
30 /*#############################################################################*/
31
32 /*-------------------------------------------------------------------*/
33 /* Default Constructor */
34 /*-------------------------------------------------------------------*/
ClpCholeskyDense()35 ClpCholeskyDense::ClpCholeskyDense()
36 : ClpCholeskyBase()
37 , borrowSpace_(false)
38 {
39 type_ = 11;
40 ;
41 }
42
43 /*-------------------------------------------------------------------*/
44 /* Copy constructor */
45 /*-------------------------------------------------------------------*/
ClpCholeskyDense(const ClpCholeskyDense & rhs)46 ClpCholeskyDense::ClpCholeskyDense(const ClpCholeskyDense &rhs)
47 : ClpCholeskyBase(rhs)
48 , borrowSpace_(rhs.borrowSpace_)
49 {
50 assert(!rhs.borrowSpace_ || !rhs.sizeFactor_); /* can't do if borrowing space*/
51 }
52
53 /*-------------------------------------------------------------------*/
54 /* Destructor */
55 /*-------------------------------------------------------------------*/
~ClpCholeskyDense()56 ClpCholeskyDense::~ClpCholeskyDense()
57 {
58 if (borrowSpace_) {
59 /* set NULL*/
60 sparseFactor_ = NULL;
61 workDouble_ = NULL;
62 diagonal_ = NULL;
63 }
64 }
65
66 /*----------------------------------------------------------------*/
67 /* Assignment operator */
68 /*-------------------------------------------------------------------*/
69 ClpCholeskyDense &
operator =(const ClpCholeskyDense & rhs)70 ClpCholeskyDense::operator=(const ClpCholeskyDense &rhs)
71 {
72 if (this != &rhs) {
73 assert(!rhs.borrowSpace_ || !rhs.sizeFactor_); /* can't do if borrowing space*/
74 ClpCholeskyBase::operator=(rhs);
75 borrowSpace_ = rhs.borrowSpace_;
76 }
77 return *this;
78 }
79 /*-------------------------------------------------------------------*/
80 /* Clone*/
81 /*-------------------------------------------------------------------*/
clone() const82 ClpCholeskyBase *ClpCholeskyDense::clone() const
83 {
84 return new ClpCholeskyDense(*this);
85 }
86 /* If not power of 2 then need to redo a bit*/
87 #define BLOCK 16
88 #define BLOCKSHIFT 4
89 /* Block unroll if power of 2 and at least 8*/
90 #define BLOCKUNROLL
91
92 #define BLOCKSQ (BLOCK * BLOCK)
93 #define BLOCKSQSHIFT (BLOCKSHIFT + BLOCKSHIFT)
94 #define number_blocks(x) (((x) + BLOCK - 1) >> BLOCKSHIFT)
95 #define number_rows(x) ((x) << BLOCKSHIFT)
96 #define number_entries(x) ((x) << BLOCKSQSHIFT)
97 /* Gets space */
reserveSpace(const ClpCholeskyBase * factor,int numberRows)98 int ClpCholeskyDense::reserveSpace(const ClpCholeskyBase *factor, int numberRows)
99 {
100 numberRows_ = numberRows;
101 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
102 /* allow one stripe extra*/
103 numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2);
104 sizeFactor_ = numberBlocks * BLOCKSQ;
105 /*#define CHOL_COMPARE*/
106 #ifdef CHOL_COMPARE
107 sizeFactor_ += 95000;
108 #endif
109 if (!factor) {
110 sparseFactor_ = new longDouble[sizeFactor_];
111 rowsDropped_ = new char[numberRows_];
112 memset(rowsDropped_, 0, numberRows_);
113 workDouble_ = new longDouble[numberRows_];
114 diagonal_ = new longDouble[numberRows_];
115 } else {
116 borrowSpace_ = true;
117 int numberFull = factor->numberRows();
118 sparseFactor_ = factor->sparseFactor() + (factor->size() - sizeFactor_);
119 workDouble_ = factor->workDouble() + (numberFull - numberRows_);
120 diagonal_ = factor->diagonal() + (numberFull - numberRows_);
121 }
122 numberRowsDropped_ = 0;
123 return 0;
124 }
125 /* Returns space needed */
space(int numberRows) const126 int ClpCholeskyDense::space(int numberRows) const
127 {
128 int numberBlocks = (numberRows + BLOCK - 1) >> BLOCKSHIFT;
129 /* allow one stripe extra*/
130 numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2);
131 int sizeFactor = numberBlocks * BLOCKSQ;
132 #ifdef CHOL_COMPARE
133 sizeFactor += 95000;
134 #endif
135 return sizeFactor;
136 }
137 /* Orders rows and saves pointer to matrix.and model */
order(ClpInterior * model)138 int ClpCholeskyDense::order(ClpInterior *model)
139 {
140 model_ = model;
141 int numberRows;
142 int numberRowsModel = model_->numberRows();
143 int numberColumns = model_->numberColumns();
144 if (!doKKT_) {
145 numberRows = numberRowsModel;
146 } else {
147 numberRows = 2 * numberRowsModel + numberColumns;
148 }
149 reserveSpace(NULL, numberRows);
150 rowCopy_ = model->clpMatrix()->reverseOrderedCopy();
151 return 0;
152 }
153 /* Does Symbolic factorization given permutation.
154 This is called immediately after order. If user provides this then
155 user must provide factorize and solve. Otherwise the default factorization is used
156 returns non-zero if not enough memory */
symbolic()157 int ClpCholeskyDense::symbolic()
158 {
159 return 0;
160 }
161 /* Factorize - filling in rowsDropped and returning number dropped */
factorize(const CoinWorkDouble * diagonal,int * rowsDropped)162 int ClpCholeskyDense::factorize(const CoinWorkDouble *diagonal, int *rowsDropped)
163 {
164 assert(!borrowSpace_);
165 const CoinBigIndex *columnStart = model_->clpMatrix()->getVectorStarts();
166 const int *columnLength = model_->clpMatrix()->getVectorLengths();
167 const int *row = model_->clpMatrix()->getIndices();
168 const double *element = model_->clpMatrix()->getElements();
169 const CoinBigIndex *rowStart = rowCopy_->getVectorStarts();
170 const int *rowLength = rowCopy_->getVectorLengths();
171 const int *column = rowCopy_->getIndices();
172 const double *elementByRow = rowCopy_->getElements();
173 int numberColumns = model_->clpMatrix()->getNumCols();
174 CoinZeroN(sparseFactor_, sizeFactor_);
175 /*perturbation*/
176 CoinWorkDouble perturbation = model_->diagonalPerturbation() * model_->diagonalNorm();
177 perturbation = perturbation * perturbation;
178 if (perturbation > 1.0) {
179 #ifdef COIN_DEVELOP
180 /*if (model_->model()->logLevel()&4) */
181 std::cout << "large perturbation " << perturbation << std::endl;
182 #endif
183 perturbation = CoinSqrt(perturbation);
184 ;
185 perturbation = 1.0;
186 }
187 int iRow;
188 int newDropped = 0;
189 CoinWorkDouble largest = 1.0;
190 CoinWorkDouble smallest = COIN_DBL_MAX;
191 CoinWorkDouble delta2 = model_->delta(); /* add delta*delta to diagonal*/
192 delta2 *= delta2;
193 if (!doKKT_) {
194 longDouble *work = sparseFactor_;
195 work--; /* skip diagonal*/
196 int addOffset = numberRows_ - 1;
197 const CoinWorkDouble *diagonalSlack = diagonal + numberColumns;
198 /* largest in initial matrix*/
199 CoinWorkDouble largest2 = 1.0e-20;
200 for (iRow = 0; iRow < numberRows_; iRow++) {
201 if (!rowsDropped_[iRow]) {
202 CoinBigIndex startRow = rowStart[iRow];
203 CoinBigIndex endRow = rowStart[iRow] + rowLength[iRow];
204 CoinWorkDouble diagonalValue = diagonalSlack[iRow] + delta2;
205 for (CoinBigIndex k = startRow; k < endRow; k++) {
206 int iColumn = column[k];
207 CoinBigIndex start = columnStart[iColumn];
208 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
209 CoinWorkDouble multiplier = diagonal[iColumn] * elementByRow[k];
210 for (CoinBigIndex j = start; j < end; j++) {
211 int jRow = row[j];
212 if (!rowsDropped_[jRow]) {
213 if (jRow > iRow) {
214 CoinWorkDouble value = element[j] * multiplier;
215 work[jRow] += value;
216 } else if (jRow == iRow) {
217 CoinWorkDouble value = element[j] * multiplier;
218 diagonalValue += value;
219 }
220 }
221 }
222 }
223 for (int j = iRow + 1; j < numberRows_; j++)
224 largest2 = CoinMax(largest2, CoinAbs(work[j]));
225 diagonal_[iRow] = diagonalValue;
226 largest2 = CoinMax(largest2, CoinAbs(diagonalValue));
227 } else {
228 /* dropped*/
229 diagonal_[iRow] = 1.0;
230 }
231 addOffset--;
232 work += addOffset;
233 }
234 /*check sizes*/
235 largest2 *= 1.0e-20;
236 largest = CoinMin(largest2, CHOL_SMALL_VALUE);
237 int numberDroppedBefore = 0;
238 for (iRow = 0; iRow < numberRows_; iRow++) {
239 int dropped = rowsDropped_[iRow];
240 /* Move to int array*/
241 rowsDropped[iRow] = dropped;
242 if (!dropped) {
243 CoinWorkDouble diagonal = diagonal_[iRow];
244 if (diagonal > largest2) {
245 diagonal_[iRow] = diagonal + perturbation;
246 } else {
247 diagonal_[iRow] = diagonal + perturbation;
248 rowsDropped[iRow] = 2;
249 numberDroppedBefore++;
250 }
251 }
252 }
253 doubleParameters_[10] = CoinMax(1.0e-20, largest);
254 integerParameters_[20] = 0;
255 doubleParameters_[3] = 0.0;
256 doubleParameters_[4] = COIN_DBL_MAX;
257 integerParameters_[34] = 0; /* say all must be positive*/
258 #ifdef CHOL_COMPARE
259 if (numberRows_ < 200)
260 factorizePart3(rowsDropped);
261 #endif
262 factorizePart2(rowsDropped);
263 newDropped = integerParameters_[20] + numberDroppedBefore;
264 largest = doubleParameters_[3];
265 smallest = doubleParameters_[4];
266 if (model_->messageHandler()->logLevel() > 1)
267 std::cout << "Cholesky - largest " << largest << " smallest " << smallest << std::endl;
268 choleskyCondition_ = largest / smallest;
269 /*drop fresh makes some formADAT easier*/
270 if (newDropped || numberRowsDropped_) {
271 newDropped = 0;
272 for (int i = 0; i < numberRows_; i++) {
273 char dropped = static_cast< char >(rowsDropped[i]);
274 rowsDropped_[i] = dropped;
275 if (dropped == 2) {
276 /*dropped this time*/
277 rowsDropped[newDropped++] = i;
278 rowsDropped_[i] = 0;
279 }
280 }
281 numberRowsDropped_ = newDropped;
282 newDropped = -(2 + newDropped);
283 }
284 } else {
285 /* KKT*/
286 CoinPackedMatrix *quadratic = NULL;
287 ClpQuadraticObjective *quadraticObj = (dynamic_cast< ClpQuadraticObjective * >(model_->objectiveAsObject()));
288 if (quadraticObj)
289 quadratic = quadraticObj->quadraticObjective();
290 /* matrix*/
291 int numberRowsModel = model_->numberRows();
292 int numberColumns = model_->numberColumns();
293 int numberTotal = numberColumns + numberRowsModel;
294 longDouble *work = sparseFactor_;
295 work--; /* skip diagonal*/
296 int addOffset = numberRows_ - 1;
297 int iColumn;
298 if (!quadratic) {
299 for (iColumn = 0; iColumn < numberColumns; iColumn++) {
300 CoinWorkDouble value = diagonal[iColumn];
301 if (CoinAbs(value) > 1.0e-100) {
302 value = 1.0 / value;
303 largest = CoinMax(largest, CoinAbs(value));
304 diagonal_[iColumn] = -value;
305 CoinBigIndex start = columnStart[iColumn];
306 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
307 for (CoinBigIndex j = start; j < end; j++) {
308 /*choleskyRow_[numberElements]=row[j]+numberTotal;*/
309 /*sparseFactor_[numberElements++]=element[j];*/
310 work[row[j] + numberTotal] = element[j];
311 largest = CoinMax(largest, CoinAbs(element[j]));
312 }
313 } else {
314 diagonal_[iColumn] = -value;
315 }
316 addOffset--;
317 work += addOffset;
318 }
319 } else {
320 /* Quadratic*/
321 const int *columnQuadratic = quadratic->getIndices();
322 const CoinBigIndex *columnQuadraticStart = quadratic->getVectorStarts();
323 const int *columnQuadraticLength = quadratic->getVectorLengths();
324 const double *quadraticElement = quadratic->getElements();
325 for (iColumn = 0; iColumn < numberColumns; iColumn++) {
326 CoinWorkDouble value = diagonal[iColumn];
327 CoinBigIndex j;
328 if (CoinAbs(value) > 1.0e-100) {
329 value = 1.0 / value;
330 for (j = columnQuadraticStart[iColumn];
331 j < columnQuadraticStart[iColumn] + columnQuadraticLength[iColumn]; j++) {
332 int jColumn = columnQuadratic[j];
333 if (jColumn > iColumn) {
334 work[jColumn] = -quadraticElement[j];
335 } else if (iColumn == jColumn) {
336 value += quadraticElement[j];
337 }
338 }
339 largest = CoinMax(largest, CoinAbs(value));
340 diagonal_[iColumn] = -value;
341 CoinBigIndex start = columnStart[iColumn];
342 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
343 for (j = start; j < end; j++) {
344 work[row[j] + numberTotal] = element[j];
345 largest = CoinMax(largest, CoinAbs(element[j]));
346 }
347 } else {
348 value = 1.0e100;
349 diagonal_[iColumn] = -value;
350 }
351 addOffset--;
352 work += addOffset;
353 }
354 }
355 /* slacks*/
356 for (iColumn = numberColumns; iColumn < numberTotal; iColumn++) {
357 CoinWorkDouble value = diagonal[iColumn];
358 if (CoinAbs(value) > 1.0e-100) {
359 value = 1.0 / value;
360 largest = CoinMax(largest, CoinAbs(value));
361 } else {
362 value = 1.0e100;
363 }
364 diagonal_[iColumn] = -value;
365 work[iColumn - numberColumns + numberTotal] = -1.0;
366 addOffset--;
367 work += addOffset;
368 }
369 /* Finish diagonal*/
370 for (iRow = 0; iRow < numberRowsModel; iRow++) {
371 diagonal_[iRow + numberTotal] = delta2;
372 }
373 /*check sizes*/
374 largest *= 1.0e-20;
375 largest = CoinMin(largest, CHOL_SMALL_VALUE);
376 doubleParameters_[10] = CoinMax(1.0e-20, largest);
377 integerParameters_[20] = 0;
378 doubleParameters_[3] = 0.0;
379 doubleParameters_[4] = COIN_DBL_MAX;
380 /* Set up LDL cutoff*/
381 integerParameters_[34] = numberTotal;
382 /* KKT*/
383 int *rowsDropped2 = new int[numberRows_];
384 CoinZeroN(rowsDropped2, numberRows_);
385 #ifdef CHOL_COMPARE
386 if (numberRows_ < 200)
387 factorizePart3(rowsDropped2);
388 #endif
389 factorizePart2(rowsDropped2);
390 newDropped = integerParameters_[20];
391 largest = doubleParameters_[3];
392 smallest = doubleParameters_[4];
393 if (model_->messageHandler()->logLevel() > 1)
394 COIN_DETAIL_PRINT(std::cout << "Cholesky - largest " << largest << " smallest " << smallest << std::endl);
395 choleskyCondition_ = largest / smallest;
396 /* Should save adjustments in ..R_*/
397 int n1 = 0, n2 = 0;
398 CoinWorkDouble *primalR = model_->primalR();
399 CoinWorkDouble *dualR = model_->dualR();
400 for (iRow = 0; iRow < numberTotal; iRow++) {
401 if (rowsDropped2[iRow]) {
402 n1++;
403 /*printf("row region1 %d dropped\n",iRow);*/
404 /*rowsDropped_[iRow]=1;*/
405 rowsDropped_[iRow] = 0;
406 primalR[iRow] = doubleParameters_[20];
407 } else {
408 rowsDropped_[iRow] = 0;
409 primalR[iRow] = 0.0;
410 }
411 }
412 for (; iRow < numberRows_; iRow++) {
413 if (rowsDropped2[iRow]) {
414 n2++;
415 /*printf("row region2 %d dropped\n",iRow);*/
416 /*rowsDropped_[iRow]=1;*/
417 rowsDropped_[iRow] = 0;
418 dualR[iRow - numberTotal] = doubleParameters_[34];
419 } else {
420 rowsDropped_[iRow] = 0;
421 dualR[iRow - numberTotal] = 0.0;
422 }
423 }
424 }
425 return 0;
426 }
427 /* Factorize - filling in rowsDropped and returning number dropped */
factorizePart3(int * rowsDropped)428 void ClpCholeskyDense::factorizePart3(int *rowsDropped)
429 {
430 int iColumn;
431 longDouble *xx = sparseFactor_;
432 longDouble *yy = diagonal_;
433 diagonal_ = sparseFactor_ + 40000;
434 sparseFactor_ = diagonal_ + numberRows_;
435 /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/
436 CoinMemcpyN(xx, 40000, sparseFactor_);
437 CoinMemcpyN(yy, numberRows_, diagonal_);
438 int numberDropped = 0;
439 CoinWorkDouble largest = 0.0;
440 CoinWorkDouble smallest = COIN_DBL_MAX;
441 double dropValue = doubleParameters_[10];
442 int firstPositive = integerParameters_[34];
443 longDouble *work = sparseFactor_;
444 /* Allow for triangular*/
445 int addOffset = numberRows_ - 1;
446 work--;
447 for (iColumn = 0; iColumn < numberRows_; iColumn++) {
448 int iRow;
449 int addOffsetNow = numberRows_ - 1;
450 ;
451 longDouble *workNow = sparseFactor_ - 1 + iColumn;
452 CoinWorkDouble diagonalValue = diagonal_[iColumn];
453 for (iRow = 0; iRow < iColumn; iRow++) {
454 double aj = *workNow;
455 addOffsetNow--;
456 workNow += addOffsetNow;
457 diagonalValue -= aj * aj * workDouble_[iRow];
458 }
459 bool dropColumn = false;
460 if (iColumn < firstPositive) {
461 /* must be negative*/
462 if (diagonalValue <= -dropValue) {
463 smallest = CoinMin(smallest, -diagonalValue);
464 largest = CoinMax(largest, -diagonalValue);
465 workDouble_[iColumn] = diagonalValue;
466 diagonalValue = 1.0 / diagonalValue;
467 } else {
468 dropColumn = true;
469 workDouble_[iColumn] = -1.0e100;
470 diagonalValue = 0.0;
471 integerParameters_[20]++;
472 }
473 } else {
474 /* must be positive*/
475 if (diagonalValue >= dropValue) {
476 smallest = CoinMin(smallest, diagonalValue);
477 largest = CoinMax(largest, diagonalValue);
478 workDouble_[iColumn] = diagonalValue;
479 diagonalValue = 1.0 / diagonalValue;
480 } else {
481 dropColumn = true;
482 workDouble_[iColumn] = 1.0e100;
483 diagonalValue = 0.0;
484 integerParameters_[20]++;
485 }
486 }
487 if (!dropColumn) {
488 diagonal_[iColumn] = diagonalValue;
489 for (iRow = iColumn + 1; iRow < numberRows_; iRow++) {
490 double value = work[iRow];
491 workNow = sparseFactor_ - 1;
492 int addOffsetNow = numberRows_ - 1;
493 ;
494 for (int jColumn = 0; jColumn < iColumn; jColumn++) {
495 double aj = workNow[iColumn];
496 double multiplier = workDouble_[jColumn];
497 double ai = workNow[iRow];
498 addOffsetNow--;
499 workNow += addOffsetNow;
500 value -= aj * ai * multiplier;
501 }
502 work[iRow] = value * diagonalValue;
503 }
504 } else {
505 /* drop column*/
506 rowsDropped[iColumn] = 2;
507 numberDropped++;
508 diagonal_[iColumn] = 0.0;
509 for (iRow = iColumn + 1; iRow < numberRows_; iRow++) {
510 work[iRow] = 0.0;
511 }
512 }
513 addOffset--;
514 work += addOffset;
515 }
516 doubleParameters_[3] = largest;
517 doubleParameters_[4] = smallest;
518 integerParameters_[20] = numberDropped;
519 sparseFactor_ = xx;
520 diagonal_ = yy;
521 }
522 /*#define POS_DEBUG*/
523 #ifdef POS_DEBUG
524 static int counter = 0;
bNumber(const longDouble * array,int & iRow,int & iCol)525 int ClpCholeskyDense::bNumber(const longDouble *array, int &iRow, int &iCol)
526 {
527 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
528 longDouble *a = sparseFactor_ + BLOCKSQ * numberBlocks;
529 int k = array - a;
530 assert((k % BLOCKSQ) == 0);
531 iCol = 0;
532 int kk = k >> BLOCKSQSHIFT;
533 /*printf("%d %d %d %d\n",k,kk,BLOCKSQ,BLOCKSQSHIFT);*/
534 k = kk;
535 while (k >= numberBlocks) {
536 iCol++;
537 k -= numberBlocks;
538 numberBlocks--;
539 }
540 iRow = iCol + k;
541 counter++;
542 if (counter > 100000)
543 exit(77);
544 return kk;
545 }
546 #endif
547 /* Factorize - filling in rowsDropped and returning number dropped */
factorizePart2(int * rowsDropped)548 void ClpCholeskyDense::factorizePart2(int *rowsDropped)
549 {
550 int iColumn;
551 /*longDouble * xx = sparseFactor_;*/
552 /*longDouble * yy = diagonal_;*/
553 /*diagonal_ = sparseFactor_ + 40000;*/
554 /*sparseFactor_=diagonal_ + numberRows_;*/
555 /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/
556 /*memcpy(sparseFactor_,xx,40000*sizeof(double));*/
557 /*memcpy(diagonal_,yy,numberRows_*sizeof(double));*/
558 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
559 /* later align on boundary*/
560 longDouble *a = sparseFactor_ + BLOCKSQ * numberBlocks;
561 int n = numberRows_;
562 int nRound = numberRows_ & (~(BLOCK - 1));
563 /* adjust if exact*/
564 if (nRound == n)
565 nRound -= BLOCK;
566 int sizeLastBlock = n - nRound;
567 int get = n * (n - 1) / 2; /* ? as no diagonal*/
568 int block = numberBlocks * (numberBlocks + 1) / 2;
569 int ifOdd;
570 int rowLast;
571 if (sizeLastBlock != BLOCK) {
572 longDouble *aa = &a[(block - 1) * BLOCKSQ];
573 rowLast = nRound - 1;
574 ifOdd = 1;
575 int put = BLOCKSQ;
576 /* do last separately*/
577 put -= (BLOCK - sizeLastBlock) * (BLOCK + 1);
578 for (iColumn = numberRows_ - 1; iColumn >= nRound; iColumn--) {
579 int put2 = put;
580 put -= BLOCK;
581 for (int iRow = numberRows_ - 1; iRow > iColumn; iRow--) {
582 aa[--put2] = sparseFactor_[--get];
583 assert(aa + put2 >= sparseFactor_ + get);
584 }
585 /* save diagonal as well*/
586 aa[--put2] = diagonal_[iColumn];
587 }
588 n = nRound;
589 block--;
590 } else {
591 /* exact fit*/
592 rowLast = numberRows_ - 1;
593 ifOdd = 0;
594 }
595 /* Now main loop*/
596 int nBlock = 0;
597 for (; n > 0; n -= BLOCK) {
598 longDouble *aa = &a[(block - 1) * BLOCKSQ];
599 longDouble *aaLast = NULL;
600 int put = BLOCKSQ;
601 int putLast = 0;
602 /* see if we have small block*/
603 if (ifOdd) {
604 aaLast = &a[(block - 1) * BLOCKSQ];
605 aa = aaLast - BLOCKSQ;
606 putLast = BLOCKSQ - BLOCK + sizeLastBlock;
607 }
608 for (iColumn = n - 1; iColumn >= n - BLOCK; iColumn--) {
609 if (aaLast) {
610 /* last bit*/
611 for (int iRow = numberRows_ - 1; iRow > rowLast; iRow--) {
612 aaLast[--putLast] = sparseFactor_[--get];
613 assert(aaLast + putLast >= sparseFactor_ + get);
614 }
615 putLast -= BLOCK - sizeLastBlock;
616 }
617 longDouble *aPut = aa;
618 int j = rowLast;
619 for (int jBlock = 0; jBlock <= nBlock; jBlock++) {
620 int put2 = put;
621 int last = CoinMax(j - BLOCK, iColumn);
622 for (int iRow = j; iRow > last; iRow--) {
623 aPut[--put2] = sparseFactor_[--get];
624 assert(aPut + put2 >= sparseFactor_ + get);
625 }
626 if (j - BLOCK < iColumn) {
627 /* save diagonal as well*/
628 aPut[--put2] = diagonal_[iColumn];
629 }
630 j -= BLOCK;
631 aPut -= BLOCKSQ;
632 }
633 put -= BLOCK;
634 }
635 nBlock++;
636 block -= nBlock + ifOdd;
637 }
638 ClpCholeskyDenseC info;
639 info.diagonal_ = diagonal_;
640 info.doubleParameters_[0] = doubleParameters_[10];
641 info.integerParameters_[0] = integerParameters_[34];
642 #ifndef CLP_CILK
643 ClpCholeskyCfactor(&info, a, numberRows_, numberBlocks,
644 diagonal_, workDouble_, rowsDropped);
645 #else
646 info.a = a;
647 info.n = numberRows_;
648 info.numberBlocks = numberBlocks;
649 info.work = workDouble_;
650 info.rowsDropped = rowsDropped;
651 info.integerParameters_[1] = model_->numberThreads();
652 ClpCholeskySpawn(&info);
653 #endif
654 double largest = 0.0;
655 double smallest = COIN_DBL_MAX;
656 int numberDropped = 0;
657 for (int i = 0; i < numberRows_; i++) {
658 if (diagonal_[i]) {
659 largest = CoinMax(largest, CoinAbs(diagonal_[i]));
660 smallest = CoinMin(smallest, CoinAbs(diagonal_[i]));
661 } else {
662 numberDropped++;
663 }
664 }
665 doubleParameters_[3] = CoinMax(doubleParameters_[3], 1.0 / smallest);
666 doubleParameters_[4] = CoinMin(doubleParameters_[4], 1.0 / largest);
667 integerParameters_[20] += numberDropped;
668 }
669 /* Non leaf recursive factor*/
ClpCholeskyCfactor(ClpCholeskyDenseC * thisStruct,longDouble * a,int n,int numberBlocks,longDouble * diagonal,longDouble * work,int * rowsDropped)670 void ClpCholeskyCfactor(ClpCholeskyDenseC *thisStruct, longDouble *a, int n, int numberBlocks,
671 longDouble *diagonal, longDouble *work, int *rowsDropped)
672 {
673 if (n <= BLOCK) {
674 ClpCholeskyCfactorLeaf(thisStruct, a, n, diagonal, work, rowsDropped);
675 } else {
676 int nb = number_blocks((n + 1) >> 1);
677 int nThis = number_rows(nb);
678 longDouble *aother;
679 int nLeft = n - nThis;
680 int nintri = (nb * (nb + 1)) >> 1;
681 int nbelow = (numberBlocks - nb) * nb;
682 ClpCholeskyCfactor(thisStruct, a, nThis, numberBlocks, diagonal, work, rowsDropped);
683 ClpCholeskyCtriRec(thisStruct, a, nThis, a + number_entries(nb), diagonal, work, nLeft, nb, 0, numberBlocks);
684 aother = a + number_entries(nintri + nbelow);
685 ClpCholeskyCrecTri(thisStruct, a + number_entries(nb), nLeft, nThis, nb, 0, aother, diagonal, work, numberBlocks);
686 ClpCholeskyCfactor(thisStruct, aother, nLeft,
687 numberBlocks - nb, diagonal + nThis, work + nThis, rowsDropped);
688 }
689 }
690 /* Non leaf recursive triangle rectangle update*/
ClpCholeskyCtriRec(ClpCholeskyDenseC * thisStruct,longDouble * aTri,int nThis,longDouble * aUnder,longDouble * diagonal,longDouble * work,int nLeft,int iBlock,int jBlock,int numberBlocks)691 void ClpCholeskyCtriRec(ClpCholeskyDenseC *thisStruct, longDouble *aTri, int nThis, longDouble *aUnder,
692 longDouble *diagonal, longDouble *work,
693 int nLeft, int iBlock, int jBlock,
694 int numberBlocks)
695 {
696 if (nThis <= BLOCK && nLeft <= BLOCK) {
697 ClpCholeskyCtriRecLeaf(/*thisStruct,*/ aTri, aUnder, diagonal, work, nLeft);
698 } else if (nThis < nLeft) {
699 int nb = number_blocks((nLeft + 1) >> 1);
700 int nLeft2 = number_rows(nb);
701 cilk_spawn ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder, diagonal, work, nLeft2, iBlock, jBlock, numberBlocks);
702 ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder + number_entries(nb), diagonal, work, nLeft - nLeft2,
703 iBlock + nb, jBlock, numberBlocks);
704 cilk_sync;
705 } else {
706 int nb = number_blocks((nThis + 1) >> 1);
707 int nThis2 = number_rows(nb);
708 longDouble *aother;
709 int kBlock = jBlock + nb;
710 int i;
711 int nintri = (nb * (nb + 1)) >> 1;
712 int nbelow = (numberBlocks - nb) * nb;
713 ClpCholeskyCtriRec(thisStruct, aTri, nThis2, aUnder, diagonal, work, nLeft, iBlock, jBlock, numberBlocks);
714 /* and rectangular update */
715 i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) - (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
716 aother = aUnder + number_entries(i);
717 ClpCholeskyCrecRec(thisStruct, aTri + number_entries(nb), nThis - nThis2, nLeft, nThis2, aUnder, aother,
718 work, kBlock, jBlock, numberBlocks);
719 ClpCholeskyCtriRec(thisStruct, aTri + number_entries(nintri + nbelow), nThis - nThis2, aother, diagonal + nThis2,
720 work + nThis2, nLeft,
721 iBlock - nb, kBlock - nb, numberBlocks - nb);
722 }
723 }
724 /* Non leaf recursive rectangle triangle update*/
ClpCholeskyCrecTri(ClpCholeskyDenseC * thisStruct,longDouble * aUnder,int nTri,int nDo,int iBlock,int jBlock,longDouble * aTri,longDouble * diagonal,longDouble * work,int numberBlocks)725 void ClpCholeskyCrecTri(ClpCholeskyDenseC *thisStruct, longDouble *aUnder, int nTri, int nDo,
726 int iBlock, int jBlock, longDouble *aTri,
727 longDouble *diagonal, longDouble *work,
728 int numberBlocks)
729 {
730 if (nTri <= BLOCK && nDo <= BLOCK) {
731 ClpCholeskyCrecTriLeaf(/*thisStruct,*/ aUnder, aTri, /*diagonal,*/ work, nTri);
732 } else if (nTri < nDo) {
733 int nb = number_blocks((nDo + 1) >> 1);
734 int nDo2 = number_rows(nb);
735 longDouble *aother;
736 int i;
737 ClpCholeskyCrecTri(thisStruct, aUnder, nTri, nDo2, iBlock, jBlock, aTri, diagonal, work, numberBlocks);
738 i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) - (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
739 aother = aUnder + number_entries(i);
740 ClpCholeskyCrecTri(thisStruct, aother, nTri, nDo - nDo2, iBlock - nb, jBlock, aTri, diagonal + nDo2,
741 work + nDo2, numberBlocks - nb);
742 } else {
743 int nb = number_blocks((nTri + 1) >> 1);
744 int nTri2 = number_rows(nb);
745 longDouble *aother;
746 int i;
747 cilk_spawn ClpCholeskyCrecTri(thisStruct, aUnder, nTri2, nDo, iBlock, jBlock, aTri, diagonal, work, numberBlocks);
748 /* and rectangular update */
749 i = ((numberBlocks - iBlock) * (numberBlocks - iBlock + 1) - (numberBlocks - iBlock - nb) * (numberBlocks - iBlock - nb + 1)) >> 1;
750 aother = aTri + number_entries(nb);
751 ClpCholeskyCrecRec(thisStruct, aUnder, nTri2, nTri - nTri2, nDo, aUnder + number_entries(nb), aother,
752 work, iBlock, jBlock, numberBlocks);
753 ClpCholeskyCrecTri(thisStruct, aUnder + number_entries(nb), nTri - nTri2, nDo, iBlock + nb, jBlock,
754 aTri + number_entries(i), diagonal, work, numberBlocks);
755 cilk_sync;
756 }
757 }
758 /* Non leaf recursive rectangle rectangle update,
759 nUnder is number of rows in iBlock,
760 nUnderK is number of rows in kBlock
761 */
ClpCholeskyCrecRec(ClpCholeskyDenseC * thisStruct,longDouble * above,int nUnder,int nUnderK,int nDo,longDouble * aUnder,longDouble * aOther,longDouble * work,int iBlock,int jBlock,int numberBlocks)762 void ClpCholeskyCrecRec(ClpCholeskyDenseC *thisStruct, longDouble *above, int nUnder, int nUnderK,
763 int nDo, longDouble *aUnder, longDouble *aOther,
764 longDouble *work,
765 int iBlock, int jBlock,
766 int numberBlocks)
767 {
768 if (nDo <= BLOCK && nUnder <= BLOCK && nUnderK <= BLOCK) {
769 assert(nDo == BLOCK && nUnder == BLOCK);
770 ClpCholeskyCrecRecLeaf(/*thisStruct,*/ above, aUnder, aOther, work, nUnderK);
771 } else if (nDo <= nUnderK && nUnder <= nUnderK) {
772 int nb = number_blocks((nUnderK + 1) >> 1);
773 int nUnder2 = number_rows(nb);
774 cilk_spawn ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnder2, nDo, aUnder, aOther, work,
775 iBlock, jBlock, numberBlocks);
776 ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK - nUnder2, nDo, aUnder + number_entries(nb),
777 aOther + number_entries(nb), work, iBlock, jBlock, numberBlocks);
778 cilk_sync;
779 } else if (nUnderK <= nDo && nUnder <= nDo) {
780 int nb = number_blocks((nDo + 1) >> 1);
781 int nDo2 = number_rows(nb);
782 int i;
783 ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK, nDo2, aUnder, aOther, work,
784 iBlock, jBlock, numberBlocks);
785 i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) - (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
786 ClpCholeskyCrecRec(thisStruct, above + number_entries(i), nUnder, nUnderK, nDo - nDo2,
787 aUnder + number_entries(i),
788 aOther, work + nDo2,
789 iBlock - nb, jBlock, numberBlocks - nb);
790 } else {
791 int nb = number_blocks((nUnder + 1) >> 1);
792 int nUnder2 = number_rows(nb);
793 int i;
794 cilk_spawn ClpCholeskyCrecRec(thisStruct, above, nUnder2, nUnderK, nDo, aUnder, aOther, work,
795 iBlock, jBlock, numberBlocks);
796 i = ((numberBlocks - iBlock) * (numberBlocks - iBlock - 1) - (numberBlocks - iBlock - nb) * (numberBlocks - iBlock - nb - 1)) >> 1;
797 ClpCholeskyCrecRec(thisStruct, above + number_entries(nb), nUnder - nUnder2, nUnderK, nDo, aUnder,
798 aOther + number_entries(i), work, iBlock + nb, jBlock, numberBlocks);
799 cilk_sync;
800 }
801 }
802 /* Leaf recursive factor*/
ClpCholeskyCfactorLeaf(ClpCholeskyDenseC * thisStruct,longDouble * a,int n,longDouble * diagonal,longDouble * work,int * rowsDropped)803 void ClpCholeskyCfactorLeaf(ClpCholeskyDenseC *thisStruct, longDouble *a, int n,
804 longDouble *diagonal, longDouble *work, int *rowsDropped)
805 {
806 double dropValue = thisStruct->doubleParameters_[0];
807 int firstPositive = thisStruct->integerParameters_[0];
808 int rowOffset = static_cast< int >(diagonal - thisStruct->diagonal_);
809 int i, j, k;
810 CoinWorkDouble t00, temp1;
811 longDouble *aa;
812 aa = a - BLOCK;
813 for (j = 0; j < n; j++) {
814 bool dropColumn;
815 CoinWorkDouble useT00;
816 aa += BLOCK;
817 t00 = aa[j];
818 for (k = 0; k < j; ++k) {
819 CoinWorkDouble multiplier = work[k];
820 t00 -= a[j + k * BLOCK] * a[j + k * BLOCK] * multiplier;
821 }
822 dropColumn = false;
823 useT00 = t00;
824 if (j + rowOffset < firstPositive) {
825 /* must be negative*/
826 if (t00 <= -dropValue) {
827 /*aa[j]=t00;*/
828 t00 = 1.0 / t00;
829 } else {
830 dropColumn = true;
831 /*aa[j]=-1.0e100;*/
832 useT00 = -1.0e-100;
833 t00 = 0.0;
834 }
835 } else {
836 /* must be positive*/
837 if (t00 >= dropValue) {
838 /*aa[j]=t00;*/
839 t00 = 1.0 / t00;
840 } else {
841 dropColumn = true;
842 /*aa[j]=1.0e100;*/
843 useT00 = 1.0e-100;
844 t00 = 0.0;
845 }
846 }
847 if (!dropColumn) {
848 diagonal[j] = t00;
849 work[j] = useT00;
850 temp1 = t00;
851 for (i = j + 1; i < n; i++) {
852 t00 = aa[i];
853 for (k = 0; k < j; ++k) {
854 CoinWorkDouble multiplier = work[k];
855 t00 -= a[i + k * BLOCK] * a[j + k * BLOCK] * multiplier;
856 }
857 aa[i] = t00 * temp1;
858 }
859 } else {
860 /* drop column*/
861 rowsDropped[j + rowOffset] = 2;
862 diagonal[j] = 0.0;
863 /*aa[j]=1.0e100;*/
864 work[j] = 1.0e100;
865 for (i = j + 1; i < n; i++) {
866 aa[i] = 0.0;
867 }
868 }
869 }
870 }
871 /* Leaf recursive triangle rectangle update*/
ClpCholeskyCtriRecLeaf(longDouble * aTri,longDouble * aUnder,longDouble * diagonal,longDouble * work,int nUnder)872 void ClpCholeskyCtriRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble *aTri, longDouble *aUnder, longDouble *diagonal, longDouble *work,
873 int nUnder)
874 {
875 #ifdef POS_DEBUG
876 int iru, icu;
877 int iu = bNumber(aUnder, iru, icu);
878 int irt, ict;
879 int it = bNumber(aTri, irt, ict);
880 /*printf("%d %d\n",iu,it);*/
881 printf("trirecleaf under (%d,%d), tri (%d,%d)\n",
882 iru, icu, irt, ict);
883 assert(diagonal == thisStruct->diagonal_ + ict * BLOCK);
884 #endif
885 int j;
886 longDouble *aa;
887 #ifdef BLOCKUNROLL
888 if (nUnder == BLOCK) {
889 aa = aTri - 2 * BLOCK;
890 for (j = 0; j < BLOCK; j += 2) {
891 int i;
892 CoinWorkDouble temp0 = diagonal[j];
893 CoinWorkDouble temp1 = diagonal[j + 1];
894 aa += 2 * BLOCK;
895 for (i = 0; i < BLOCK; i += 2) {
896 CoinWorkDouble at1;
897 CoinWorkDouble t00 = aUnder[i + j * BLOCK];
898 CoinWorkDouble t10 = aUnder[i + BLOCK + j * BLOCK];
899 CoinWorkDouble t01 = aUnder[i + 1 + j * BLOCK];
900 CoinWorkDouble t11 = aUnder[i + 1 + BLOCK + j * BLOCK];
901 int k;
902 for (k = 0; k < j; ++k) {
903 CoinWorkDouble multiplier = work[k];
904 CoinWorkDouble au0 = aUnder[i + k * BLOCK] * multiplier;
905 CoinWorkDouble au1 = aUnder[i + 1 + k * BLOCK] * multiplier;
906 CoinWorkDouble at0 = aTri[j + k * BLOCK];
907 CoinWorkDouble at1 = aTri[j + 1 + k * BLOCK];
908 t00 -= au0 * at0;
909 t10 -= au0 * at1;
910 t01 -= au1 * at0;
911 t11 -= au1 * at1;
912 }
913 t00 *= temp0;
914 at1 = aTri[j + 1 + j * BLOCK] * work[j];
915 t10 -= t00 * at1;
916 t01 *= temp0;
917 t11 -= t01 * at1;
918 aUnder[i + j * BLOCK] = t00;
919 aUnder[i + 1 + j * BLOCK] = t01;
920 aUnder[i + BLOCK + j * BLOCK] = t10 * temp1;
921 aUnder[i + 1 + BLOCK + j * BLOCK] = t11 * temp1;
922 }
923 }
924 } else {
925 #endif
926 aa = aTri - BLOCK;
927 for (j = 0; j < BLOCK; j++) {
928 int i;
929 CoinWorkDouble temp1 = diagonal[j];
930 aa += BLOCK;
931 for (i = 0; i < nUnder; i++) {
932 int k;
933 CoinWorkDouble t00 = aUnder[i + j * BLOCK];
934 for (k = 0; k < j; ++k) {
935 CoinWorkDouble multiplier = work[k];
936 t00 -= aUnder[i + k * BLOCK] * aTri[j + k * BLOCK] * multiplier;
937 }
938 aUnder[i + j * BLOCK] = t00 * temp1;
939 }
940 }
941 #ifdef BLOCKUNROLL
942 }
943 #endif
944 }
945 /* Leaf recursive rectangle triangle update*/
ClpCholeskyCrecTriLeaf(longDouble * aUnder,longDouble * aTri,longDouble * work,int nUnder)946 void ClpCholeskyCrecTriLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble *aUnder, longDouble *aTri,
947 /*longDouble * diagonal,*/ longDouble *work, int nUnder)
948 {
949 #ifdef POS_DEBUG
950 int iru, icu;
951 int iu = bNumber(aUnder, iru, icu);
952 int irt, ict;
953 int it = bNumber(aTri, irt, ict);
954 /*printf("%d %d\n",iu,it);*/
955 printf("rectrileaf under (%d,%d), tri (%d,%d)\n",
956 iru, icu, irt, ict);
957 assert(diagonal == thisStruct->diagonal_ + icu * BLOCK);
958 #endif
959 int i, j, k;
960 CoinWorkDouble t00;
961 longDouble *aa;
962 #ifdef BLOCKUNROLL
963 if (nUnder == BLOCK) {
964 longDouble *aUnder2 = aUnder - 2;
965 aa = aTri - 2 * BLOCK;
966 for (j = 0; j < BLOCK; j += 2) {
967 CoinWorkDouble t00, t01, t10, t11;
968 aa += 2 * BLOCK;
969 aUnder2 += 2;
970 t00 = aa[j];
971 t01 = aa[j + 1];
972 t10 = aa[j + 1 + BLOCK];
973 for (k = 0; k < BLOCK; ++k) {
974 CoinWorkDouble multiplier = work[k];
975 CoinWorkDouble a0 = aUnder2[k * BLOCK];
976 CoinWorkDouble a1 = aUnder2[1 + k * BLOCK];
977 CoinWorkDouble x0 = a0 * multiplier;
978 CoinWorkDouble x1 = a1 * multiplier;
979 t00 -= a0 * x0;
980 t01 -= a1 * x0;
981 t10 -= a1 * x1;
982 }
983 aa[j] = t00;
984 aa[j + 1] = t01;
985 aa[j + 1 + BLOCK] = t10;
986 for (i = j + 2; i < BLOCK; i += 2) {
987 t00 = aa[i];
988 t01 = aa[i + BLOCK];
989 t10 = aa[i + 1];
990 t11 = aa[i + 1 + BLOCK];
991 for (k = 0; k < BLOCK; ++k) {
992 CoinWorkDouble multiplier = work[k];
993 CoinWorkDouble a0 = aUnder2[k * BLOCK] * multiplier;
994 CoinWorkDouble a1 = aUnder2[1 + k * BLOCK] * multiplier;
995 t00 -= aUnder[i + k * BLOCK] * a0;
996 t01 -= aUnder[i + k * BLOCK] * a1;
997 t10 -= aUnder[i + 1 + k * BLOCK] * a0;
998 t11 -= aUnder[i + 1 + k * BLOCK] * a1;
999 }
1000 aa[i] = t00;
1001 aa[i + BLOCK] = t01;
1002 aa[i + 1] = t10;
1003 aa[i + 1 + BLOCK] = t11;
1004 }
1005 }
1006 } else {
1007 #endif
1008 aa = aTri - BLOCK;
1009 for (j = 0; j < nUnder; j++) {
1010 aa += BLOCK;
1011 for (i = j; i < nUnder; i++) {
1012 t00 = aa[i];
1013 for (k = 0; k < BLOCK; ++k) {
1014 CoinWorkDouble multiplier = work[k];
1015 t00 -= aUnder[i + k * BLOCK] * aUnder[j + k * BLOCK] * multiplier;
1016 }
1017 aa[i] = t00;
1018 }
1019 }
1020 #ifdef BLOCKUNROLL
1021 }
1022 #endif
1023 }
1024 /* Leaf recursive rectangle rectangle update,
1025 nUnder is number of rows in iBlock,
1026 nUnderK is number of rows in kBlock
1027 */
ClpCholeskyCrecRecLeaf(const longDouble * COIN_RESTRICT above,const longDouble * COIN_RESTRICT aUnder,longDouble * COIN_RESTRICT aOther,const longDouble * COIN_RESTRICT work,int nUnder)1028 void ClpCholeskyCrecRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/
1029 const longDouble *COIN_RESTRICT above,
1030 const longDouble *COIN_RESTRICT aUnder,
1031 longDouble *COIN_RESTRICT aOther,
1032 const longDouble *COIN_RESTRICT work,
1033 int nUnder)
1034 {
1035 #ifdef POS_DEBUG
1036 int ira, ica;
1037 int ia = bNumber(above, ira, ica);
1038 int iru, icu;
1039 int iu = bNumber(aUnder, iru, icu);
1040 int iro, ico;
1041 int io = bNumber(aOther, iro, ico);
1042 /*printf("%d %d %d\n",ia,iu,io);*/
1043 printf("recrecleaf above (%d,%d), under (%d,%d), other (%d,%d)\n",
1044 ira, ica, iru, icu, iro, ico);
1045 #endif
1046 int i, j, k;
1047 longDouble *aa;
1048 #ifdef BLOCKUNROLL
1049 aa = aOther - 4 * BLOCK;
1050 if (nUnder == BLOCK) {
1051 /*#define INTEL*/
1052 #ifdef INTEL
1053 aa += 2 * BLOCK;
1054 for (j = 0; j < BLOCK; j += 2) {
1055 aa += 2 * BLOCK;
1056 for (i = 0; i < BLOCK; i += 2) {
1057 CoinWorkDouble t00 = aa[i + 0 * BLOCK];
1058 CoinWorkDouble t10 = aa[i + 1 * BLOCK];
1059 CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK];
1060 CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK];
1061 for (k = 0; k < BLOCK; k++) {
1062 CoinWorkDouble multiplier = work[k];
1063 CoinWorkDouble a00 = aUnder[i + k * BLOCK] * multiplier;
1064 CoinWorkDouble a01 = aUnder[i + 1 + k * BLOCK] * multiplier;
1065 t00 -= a00 * above[j + 0 + k * BLOCK];
1066 t10 -= a00 * above[j + 1 + k * BLOCK];
1067 t01 -= a01 * above[j + 0 + k * BLOCK];
1068 t11 -= a01 * above[j + 1 + k * BLOCK];
1069 }
1070 aa[i + 0 * BLOCK] = t00;
1071 aa[i + 1 * BLOCK] = t10;
1072 aa[i + 1 + 0 * BLOCK] = t01;
1073 aa[i + 1 + 1 * BLOCK] = t11;
1074 }
1075 }
1076 #else
1077 for (j = 0; j < BLOCK; j += 4) {
1078 aa += 4 * BLOCK;
1079 for (i = 0; i < BLOCK; i += 4) {
1080 CoinWorkDouble t00 = aa[i + 0 + 0 * BLOCK];
1081 CoinWorkDouble t10 = aa[i + 0 + 1 * BLOCK];
1082 CoinWorkDouble t20 = aa[i + 0 + 2 * BLOCK];
1083 CoinWorkDouble t30 = aa[i + 0 + 3 * BLOCK];
1084 CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK];
1085 CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK];
1086 CoinWorkDouble t21 = aa[i + 1 + 2 * BLOCK];
1087 CoinWorkDouble t31 = aa[i + 1 + 3 * BLOCK];
1088 CoinWorkDouble t02 = aa[i + 2 + 0 * BLOCK];
1089 CoinWorkDouble t12 = aa[i + 2 + 1 * BLOCK];
1090 CoinWorkDouble t22 = aa[i + 2 + 2 * BLOCK];
1091 CoinWorkDouble t32 = aa[i + 2 + 3 * BLOCK];
1092 CoinWorkDouble t03 = aa[i + 3 + 0 * BLOCK];
1093 CoinWorkDouble t13 = aa[i + 3 + 1 * BLOCK];
1094 CoinWorkDouble t23 = aa[i + 3 + 2 * BLOCK];
1095 CoinWorkDouble t33 = aa[i + 3 + 3 * BLOCK];
1096 const longDouble *COIN_RESTRICT aUnderNow = aUnder + i;
1097 const longDouble *COIN_RESTRICT aboveNow = above + j;
1098 for (k = 0; k < BLOCK; k++) {
1099 CoinWorkDouble multiplier = work[k];
1100 CoinWorkDouble a00 = aUnderNow[0] * multiplier;
1101 CoinWorkDouble a01 = aUnderNow[1] * multiplier;
1102 CoinWorkDouble a02 = aUnderNow[2] * multiplier;
1103 CoinWorkDouble a03 = aUnderNow[3] * multiplier;
1104 t00 -= a00 * aboveNow[0];
1105 t10 -= a00 * aboveNow[1];
1106 t20 -= a00 * aboveNow[2];
1107 t30 -= a00 * aboveNow[3];
1108 t01 -= a01 * aboveNow[0];
1109 t11 -= a01 * aboveNow[1];
1110 t21 -= a01 * aboveNow[2];
1111 t31 -= a01 * aboveNow[3];
1112 t02 -= a02 * aboveNow[0];
1113 t12 -= a02 * aboveNow[1];
1114 t22 -= a02 * aboveNow[2];
1115 t32 -= a02 * aboveNow[3];
1116 t03 -= a03 * aboveNow[0];
1117 t13 -= a03 * aboveNow[1];
1118 t23 -= a03 * aboveNow[2];
1119 t33 -= a03 * aboveNow[3];
1120 aUnderNow += BLOCK;
1121 aboveNow += BLOCK;
1122 }
1123 aa[i + 0 + 0 * BLOCK] = t00;
1124 aa[i + 0 + 1 * BLOCK] = t10;
1125 aa[i + 0 + 2 * BLOCK] = t20;
1126 aa[i + 0 + 3 * BLOCK] = t30;
1127 aa[i + 1 + 0 * BLOCK] = t01;
1128 aa[i + 1 + 1 * BLOCK] = t11;
1129 aa[i + 1 + 2 * BLOCK] = t21;
1130 aa[i + 1 + 3 * BLOCK] = t31;
1131 aa[i + 2 + 0 * BLOCK] = t02;
1132 aa[i + 2 + 1 * BLOCK] = t12;
1133 aa[i + 2 + 2 * BLOCK] = t22;
1134 aa[i + 2 + 3 * BLOCK] = t32;
1135 aa[i + 3 + 0 * BLOCK] = t03;
1136 aa[i + 3 + 1 * BLOCK] = t13;
1137 aa[i + 3 + 2 * BLOCK] = t23;
1138 aa[i + 3 + 3 * BLOCK] = t33;
1139 }
1140 }
1141 #endif
1142 } else {
1143 int odd = nUnder & 1;
1144 int n = nUnder - odd;
1145 for (j = 0; j < BLOCK; j += 4) {
1146 aa += 4 * BLOCK;
1147 for (i = 0; i < n; i += 2) {
1148 CoinWorkDouble t00 = aa[i + 0 * BLOCK];
1149 CoinWorkDouble t10 = aa[i + 1 * BLOCK];
1150 CoinWorkDouble t20 = aa[i + 2 * BLOCK];
1151 CoinWorkDouble t30 = aa[i + 3 * BLOCK];
1152 CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK];
1153 CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK];
1154 CoinWorkDouble t21 = aa[i + 1 + 2 * BLOCK];
1155 CoinWorkDouble t31 = aa[i + 1 + 3 * BLOCK];
1156 const longDouble *COIN_RESTRICT aUnderNow = aUnder + i;
1157 const longDouble *COIN_RESTRICT aboveNow = above + j;
1158 for (k = 0; k < BLOCK; k++) {
1159 CoinWorkDouble multiplier = work[k];
1160 CoinWorkDouble a00 = aUnderNow[0] * multiplier;
1161 CoinWorkDouble a01 = aUnderNow[1] * multiplier;
1162 t00 -= a00 * aboveNow[0];
1163 t10 -= a00 * aboveNow[1];
1164 t20 -= a00 * aboveNow[2];
1165 t30 -= a00 * aboveNow[3];
1166 t01 -= a01 * aboveNow[0];
1167 t11 -= a01 * aboveNow[1];
1168 t21 -= a01 * aboveNow[2];
1169 t31 -= a01 * aboveNow[3];
1170 aUnderNow += BLOCK;
1171 aboveNow += BLOCK;
1172 }
1173 aa[i + 0 * BLOCK] = t00;
1174 aa[i + 1 * BLOCK] = t10;
1175 aa[i + 2 * BLOCK] = t20;
1176 aa[i + 3 * BLOCK] = t30;
1177 aa[i + 1 + 0 * BLOCK] = t01;
1178 aa[i + 1 + 1 * BLOCK] = t11;
1179 aa[i + 1 + 2 * BLOCK] = t21;
1180 aa[i + 1 + 3 * BLOCK] = t31;
1181 }
1182 if (odd) {
1183 CoinWorkDouble t0 = aa[n + 0 * BLOCK];
1184 CoinWorkDouble t1 = aa[n + 1 * BLOCK];
1185 CoinWorkDouble t2 = aa[n + 2 * BLOCK];
1186 CoinWorkDouble t3 = aa[n + 3 * BLOCK];
1187 CoinWorkDouble a0;
1188 for (k = 0; k < BLOCK; k++) {
1189 a0 = aUnder[n + k * BLOCK] * work[k];
1190 t0 -= a0 * above[j + 0 + k * BLOCK];
1191 t1 -= a0 * above[j + 1 + k * BLOCK];
1192 t2 -= a0 * above[j + 2 + k * BLOCK];
1193 t3 -= a0 * above[j + 3 + k * BLOCK];
1194 }
1195 aa[n + 0 * BLOCK] = t0;
1196 aa[n + 1 * BLOCK] = t1;
1197 aa[n + 2 * BLOCK] = t2;
1198 aa[n + 3 * BLOCK] = t3;
1199 }
1200 }
1201 }
1202 #else
1203 aa = aOther - BLOCK;
1204 for (j = 0; j < BLOCK; j++) {
1205 aa += BLOCK;
1206 for (i = 0; i < nUnder; i++) {
1207 CoinWorkDouble t00 = aa[i + 0 * BLOCK];
1208 for (k = 0; k < BLOCK; k++) {
1209 CoinWorkDouble a00 = aUnder[i + k * BLOCK] * work[k];
1210 t00 -= a00 * above[j + k * BLOCK];
1211 }
1212 aa[i] = t00;
1213 }
1214 }
1215 #endif
1216 }
1217 /* Uses factorization to solve. */
solve(CoinWorkDouble * region)1218 void ClpCholeskyDense::solve(CoinWorkDouble *region)
1219 {
1220 #ifdef CHOL_COMPARE
1221 double *region2 = NULL;
1222 if (numberRows_ < 200) {
1223 longDouble *xx = sparseFactor_;
1224 longDouble *yy = diagonal_;
1225 diagonal_ = sparseFactor_ + 40000;
1226 sparseFactor_ = diagonal_ + numberRows_;
1227 region2 = ClpCopyOfArray(region, numberRows_);
1228 int iRow, iColumn;
1229 int addOffset = numberRows_ - 1;
1230 longDouble *work = sparseFactor_ - 1;
1231 for (iColumn = 0; iColumn < numberRows_; iColumn++) {
1232 double value = region2[iColumn];
1233 for (iRow = iColumn + 1; iRow < numberRows_; iRow++)
1234 region2[iRow] -= value * work[iRow];
1235 addOffset--;
1236 work += addOffset;
1237 }
1238 for (iColumn = numberRows_ - 1; iColumn >= 0; iColumn--) {
1239 double value = region2[iColumn] * diagonal_[iColumn];
1240 work -= addOffset;
1241 addOffset++;
1242 for (iRow = iColumn + 1; iRow < numberRows_; iRow++)
1243 value -= region2[iRow] * work[iRow];
1244 region2[iColumn] = value;
1245 }
1246 sparseFactor_ = xx;
1247 diagonal_ = yy;
1248 }
1249 #endif
1250 /*longDouble * xx = sparseFactor_;*/
1251 /*longDouble * yy = diagonal_;*/
1252 /*diagonal_ = sparseFactor_ + 40000;*/
1253 /*sparseFactor_=diagonal_ + numberRows_;*/
1254 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
1255 /* later align on boundary*/
1256 longDouble *a = sparseFactor_ + BLOCKSQ * numberBlocks;
1257 int iBlock;
1258 longDouble *aa = a;
1259 int iColumn;
1260 for (iBlock = 0; iBlock < numberBlocks; iBlock++) {
1261 int nChunk;
1262 int jBlock;
1263 int iDo = iBlock * BLOCK;
1264 int base = iDo;
1265 if (iDo + BLOCK > numberRows_) {
1266 nChunk = numberRows_ - iDo;
1267 } else {
1268 nChunk = BLOCK;
1269 }
1270 solveF1(aa, nChunk, region + iDo);
1271 for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) {
1272 base += BLOCK;
1273 aa += BLOCKSQ;
1274 if (base + BLOCK > numberRows_) {
1275 nChunk = numberRows_ - base;
1276 } else {
1277 nChunk = BLOCK;
1278 }
1279 solveF2(aa, nChunk, region + iDo, region + base);
1280 }
1281 aa += BLOCKSQ;
1282 }
1283 /* do diagonal outside*/
1284 for (iColumn = 0; iColumn < numberRows_; iColumn++)
1285 region[iColumn] *= diagonal_[iColumn];
1286 int offset = ((numberBlocks * (numberBlocks + 1)) >> 1);
1287 aa = a + number_entries(offset - 1);
1288 int lBase = (numberBlocks - 1) * BLOCK;
1289 for (iBlock = numberBlocks - 1; iBlock >= 0; iBlock--) {
1290 int nChunk;
1291 int jBlock;
1292 int triBase = iBlock * BLOCK;
1293 int iBase = lBase;
1294 for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) {
1295 if (iBase + BLOCK > numberRows_) {
1296 nChunk = numberRows_ - iBase;
1297 } else {
1298 nChunk = BLOCK;
1299 }
1300 solveB2(aa, nChunk, region + triBase, region + iBase);
1301 iBase -= BLOCK;
1302 aa -= BLOCKSQ;
1303 }
1304 if (triBase + BLOCK > numberRows_) {
1305 nChunk = numberRows_ - triBase;
1306 } else {
1307 nChunk = BLOCK;
1308 }
1309 solveB1(aa, nChunk, region + triBase);
1310 aa -= BLOCKSQ;
1311 }
1312 #ifdef CHOL_COMPARE
1313 if (numberRows_ < 200) {
1314 for (int i = 0; i < numberRows_; i++) {
1315 assert(CoinAbs(region[i] - region2[i]) < 1.0e-3);
1316 }
1317 delete[] region2;
1318 }
1319 #endif
1320 }
1321 /* Forward part of solve 1*/
solveF1(longDouble * a,int n,CoinWorkDouble * region)1322 void ClpCholeskyDense::solveF1(longDouble *a, int n, CoinWorkDouble *region)
1323 {
1324 int j, k;
1325 CoinWorkDouble t00;
1326 for (j = 0; j < n; j++) {
1327 t00 = region[j];
1328 for (k = 0; k < j; ++k) {
1329 t00 -= region[k] * a[j + k * BLOCK];
1330 }
1331 /*t00*=a[j + j * BLOCK];*/
1332 region[j] = t00;
1333 }
1334 }
1335 /* Forward part of solve 2*/
solveF2(longDouble * a,int n,CoinWorkDouble * region,CoinWorkDouble * region2)1336 void ClpCholeskyDense::solveF2(longDouble *a, int n, CoinWorkDouble *region, CoinWorkDouble *region2)
1337 {
1338 int j, k;
1339 #ifdef BLOCKUNROLL
1340 if (n == BLOCK) {
1341 for (k = 0; k < BLOCK; k += 4) {
1342 CoinWorkDouble t0 = region2[0];
1343 CoinWorkDouble t1 = region2[1];
1344 CoinWorkDouble t2 = region2[2];
1345 CoinWorkDouble t3 = region2[3];
1346 t0 -= region[0] * a[0 + 0 * BLOCK];
1347 t1 -= region[0] * a[1 + 0 * BLOCK];
1348 t2 -= region[0] * a[2 + 0 * BLOCK];
1349 t3 -= region[0] * a[3 + 0 * BLOCK];
1350
1351 t0 -= region[1] * a[0 + 1 * BLOCK];
1352 t1 -= region[1] * a[1 + 1 * BLOCK];
1353 t2 -= region[1] * a[2 + 1 * BLOCK];
1354 t3 -= region[1] * a[3 + 1 * BLOCK];
1355
1356 t0 -= region[2] * a[0 + 2 * BLOCK];
1357 t1 -= region[2] * a[1 + 2 * BLOCK];
1358 t2 -= region[2] * a[2 + 2 * BLOCK];
1359 t3 -= region[2] * a[3 + 2 * BLOCK];
1360
1361 t0 -= region[3] * a[0 + 3 * BLOCK];
1362 t1 -= region[3] * a[1 + 3 * BLOCK];
1363 t2 -= region[3] * a[2 + 3 * BLOCK];
1364 t3 -= region[3] * a[3 + 3 * BLOCK];
1365
1366 t0 -= region[4] * a[0 + 4 * BLOCK];
1367 t1 -= region[4] * a[1 + 4 * BLOCK];
1368 t2 -= region[4] * a[2 + 4 * BLOCK];
1369 t3 -= region[4] * a[3 + 4 * BLOCK];
1370
1371 t0 -= region[5] * a[0 + 5 * BLOCK];
1372 t1 -= region[5] * a[1 + 5 * BLOCK];
1373 t2 -= region[5] * a[2 + 5 * BLOCK];
1374 t3 -= region[5] * a[3 + 5 * BLOCK];
1375
1376 t0 -= region[6] * a[0 + 6 * BLOCK];
1377 t1 -= region[6] * a[1 + 6 * BLOCK];
1378 t2 -= region[6] * a[2 + 6 * BLOCK];
1379 t3 -= region[6] * a[3 + 6 * BLOCK];
1380
1381 t0 -= region[7] * a[0 + 7 * BLOCK];
1382 t1 -= region[7] * a[1 + 7 * BLOCK];
1383 t2 -= region[7] * a[2 + 7 * BLOCK];
1384 t3 -= region[7] * a[3 + 7 * BLOCK];
1385 #if BLOCK > 8
1386 t0 -= region[8] * a[0 + 8 * BLOCK];
1387 t1 -= region[8] * a[1 + 8 * BLOCK];
1388 t2 -= region[8] * a[2 + 8 * BLOCK];
1389 t3 -= region[8] * a[3 + 8 * BLOCK];
1390
1391 t0 -= region[9] * a[0 + 9 * BLOCK];
1392 t1 -= region[9] * a[1 + 9 * BLOCK];
1393 t2 -= region[9] * a[2 + 9 * BLOCK];
1394 t3 -= region[9] * a[3 + 9 * BLOCK];
1395
1396 t0 -= region[10] * a[0 + 10 * BLOCK];
1397 t1 -= region[10] * a[1 + 10 * BLOCK];
1398 t2 -= region[10] * a[2 + 10 * BLOCK];
1399 t3 -= region[10] * a[3 + 10 * BLOCK];
1400
1401 t0 -= region[11] * a[0 + 11 * BLOCK];
1402 t1 -= region[11] * a[1 + 11 * BLOCK];
1403 t2 -= region[11] * a[2 + 11 * BLOCK];
1404 t3 -= region[11] * a[3 + 11 * BLOCK];
1405
1406 t0 -= region[12] * a[0 + 12 * BLOCK];
1407 t1 -= region[12] * a[1 + 12 * BLOCK];
1408 t2 -= region[12] * a[2 + 12 * BLOCK];
1409 t3 -= region[12] * a[3 + 12 * BLOCK];
1410
1411 t0 -= region[13] * a[0 + 13 * BLOCK];
1412 t1 -= region[13] * a[1 + 13 * BLOCK];
1413 t2 -= region[13] * a[2 + 13 * BLOCK];
1414 t3 -= region[13] * a[3 + 13 * BLOCK];
1415
1416 t0 -= region[14] * a[0 + 14 * BLOCK];
1417 t1 -= region[14] * a[1 + 14 * BLOCK];
1418 t2 -= region[14] * a[2 + 14 * BLOCK];
1419 t3 -= region[14] * a[3 + 14 * BLOCK];
1420
1421 t0 -= region[15] * a[0 + 15 * BLOCK];
1422 t1 -= region[15] * a[1 + 15 * BLOCK];
1423 t2 -= region[15] * a[2 + 15 * BLOCK];
1424 t3 -= region[15] * a[3 + 15 * BLOCK];
1425 #endif
1426 region2[0] = t0;
1427 region2[1] = t1;
1428 region2[2] = t2;
1429 region2[3] = t3;
1430 region2 += 4;
1431 a += 4;
1432 }
1433 } else {
1434 #endif
1435 for (k = 0; k < n; ++k) {
1436 CoinWorkDouble t00 = region2[k];
1437 for (j = 0; j < BLOCK; j++) {
1438 t00 -= region[j] * a[k + j * BLOCK];
1439 }
1440 region2[k] = t00;
1441 }
1442 #ifdef BLOCKUNROLL
1443 }
1444 #endif
1445 }
1446 /* Backward part of solve 1*/
solveB1(longDouble * a,int n,CoinWorkDouble * region)1447 void ClpCholeskyDense::solveB1(longDouble *a, int n, CoinWorkDouble *region)
1448 {
1449 int j, k;
1450 CoinWorkDouble t00;
1451 for (j = n - 1; j >= 0; j--) {
1452 t00 = region[j];
1453 for (k = j + 1; k < n; ++k) {
1454 t00 -= region[k] * a[k + j * BLOCK];
1455 }
1456 /*t00*=a[j + j * BLOCK];*/
1457 region[j] = t00;
1458 }
1459 }
1460 /* Backward part of solve 2*/
solveB2(longDouble * a,int n,CoinWorkDouble * region,CoinWorkDouble * region2)1461 void ClpCholeskyDense::solveB2(longDouble *a, int n, CoinWorkDouble *region, CoinWorkDouble *region2)
1462 {
1463 int j, k;
1464 #ifdef BLOCKUNROLL
1465 if (n == BLOCK) {
1466 for (j = 0; j < BLOCK; j += 4) {
1467 CoinWorkDouble t0 = region[0];
1468 CoinWorkDouble t1 = region[1];
1469 CoinWorkDouble t2 = region[2];
1470 CoinWorkDouble t3 = region[3];
1471 t0 -= region2[0] * a[0 + 0 * BLOCK];
1472 t1 -= region2[0] * a[0 + 1 * BLOCK];
1473 t2 -= region2[0] * a[0 + 2 * BLOCK];
1474 t3 -= region2[0] * a[0 + 3 * BLOCK];
1475
1476 t0 -= region2[1] * a[1 + 0 * BLOCK];
1477 t1 -= region2[1] * a[1 + 1 * BLOCK];
1478 t2 -= region2[1] * a[1 + 2 * BLOCK];
1479 t3 -= region2[1] * a[1 + 3 * BLOCK];
1480
1481 t0 -= region2[2] * a[2 + 0 * BLOCK];
1482 t1 -= region2[2] * a[2 + 1 * BLOCK];
1483 t2 -= region2[2] * a[2 + 2 * BLOCK];
1484 t3 -= region2[2] * a[2 + 3 * BLOCK];
1485
1486 t0 -= region2[3] * a[3 + 0 * BLOCK];
1487 t1 -= region2[3] * a[3 + 1 * BLOCK];
1488 t2 -= region2[3] * a[3 + 2 * BLOCK];
1489 t3 -= region2[3] * a[3 + 3 * BLOCK];
1490
1491 t0 -= region2[4] * a[4 + 0 * BLOCK];
1492 t1 -= region2[4] * a[4 + 1 * BLOCK];
1493 t2 -= region2[4] * a[4 + 2 * BLOCK];
1494 t3 -= region2[4] * a[4 + 3 * BLOCK];
1495
1496 t0 -= region2[5] * a[5 + 0 * BLOCK];
1497 t1 -= region2[5] * a[5 + 1 * BLOCK];
1498 t2 -= region2[5] * a[5 + 2 * BLOCK];
1499 t3 -= region2[5] * a[5 + 3 * BLOCK];
1500
1501 t0 -= region2[6] * a[6 + 0 * BLOCK];
1502 t1 -= region2[6] * a[6 + 1 * BLOCK];
1503 t2 -= region2[6] * a[6 + 2 * BLOCK];
1504 t3 -= region2[6] * a[6 + 3 * BLOCK];
1505
1506 t0 -= region2[7] * a[7 + 0 * BLOCK];
1507 t1 -= region2[7] * a[7 + 1 * BLOCK];
1508 t2 -= region2[7] * a[7 + 2 * BLOCK];
1509 t3 -= region2[7] * a[7 + 3 * BLOCK];
1510 #if BLOCK > 8
1511
1512 t0 -= region2[8] * a[8 + 0 * BLOCK];
1513 t1 -= region2[8] * a[8 + 1 * BLOCK];
1514 t2 -= region2[8] * a[8 + 2 * BLOCK];
1515 t3 -= region2[8] * a[8 + 3 * BLOCK];
1516
1517 t0 -= region2[9] * a[9 + 0 * BLOCK];
1518 t1 -= region2[9] * a[9 + 1 * BLOCK];
1519 t2 -= region2[9] * a[9 + 2 * BLOCK];
1520 t3 -= region2[9] * a[9 + 3 * BLOCK];
1521
1522 t0 -= region2[10] * a[10 + 0 * BLOCK];
1523 t1 -= region2[10] * a[10 + 1 * BLOCK];
1524 t2 -= region2[10] * a[10 + 2 * BLOCK];
1525 t3 -= region2[10] * a[10 + 3 * BLOCK];
1526
1527 t0 -= region2[11] * a[11 + 0 * BLOCK];
1528 t1 -= region2[11] * a[11 + 1 * BLOCK];
1529 t2 -= region2[11] * a[11 + 2 * BLOCK];
1530 t3 -= region2[11] * a[11 + 3 * BLOCK];
1531
1532 t0 -= region2[12] * a[12 + 0 * BLOCK];
1533 t1 -= region2[12] * a[12 + 1 * BLOCK];
1534 t2 -= region2[12] * a[12 + 2 * BLOCK];
1535 t3 -= region2[12] * a[12 + 3 * BLOCK];
1536
1537 t0 -= region2[13] * a[13 + 0 * BLOCK];
1538 t1 -= region2[13] * a[13 + 1 * BLOCK];
1539 t2 -= region2[13] * a[13 + 2 * BLOCK];
1540 t3 -= region2[13] * a[13 + 3 * BLOCK];
1541
1542 t0 -= region2[14] * a[14 + 0 * BLOCK];
1543 t1 -= region2[14] * a[14 + 1 * BLOCK];
1544 t2 -= region2[14] * a[14 + 2 * BLOCK];
1545 t3 -= region2[14] * a[14 + 3 * BLOCK];
1546
1547 t0 -= region2[15] * a[15 + 0 * BLOCK];
1548 t1 -= region2[15] * a[15 + 1 * BLOCK];
1549 t2 -= region2[15] * a[15 + 2 * BLOCK];
1550 t3 -= region2[15] * a[15 + 3 * BLOCK];
1551 #endif
1552 region[0] = t0;
1553 region[1] = t1;
1554 region[2] = t2;
1555 region[3] = t3;
1556 a += 4 * BLOCK;
1557 region += 4;
1558 }
1559 } else {
1560 #endif
1561 for (j = 0; j < BLOCK; j++) {
1562 CoinWorkDouble t00 = region[j];
1563 for (k = 0; k < n; ++k) {
1564 t00 -= region2[k] * a[k + j * BLOCK];
1565 }
1566 region[j] = t00;
1567 }
1568 #ifdef BLOCKUNROLL
1569 }
1570 #endif
1571 }
1572
1573 /* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
1574 */
1575