1 /* $Id: ClpCholeskyDense.cpp 1753 2011-06-19 16:27:26Z stefan $ */
2 /*
3   Copyright (C) 2003, International Business Machines Corporation
4   and others.  All Rights Reserved.
5 
6   This code is licensed under the terms of the Eclipse Public License (EPL).
7 */
8 #include "CoinPragma.hpp"
9 #include "CoinHelperFunctions.hpp"
10 #include "ClpHelperFunctions.hpp"
11 
12 #include "ClpInterior.hpp"
13 #include "ClpCholeskyDense.hpp"
14 #include "ClpMessage.hpp"
15 #include "ClpQuadraticObjective.hpp"
16 
17 /*#############################################################################*/
18 /* Constructors / Destructor / Assignment*/
19 /*#############################################################################*/
20 
21 /*-------------------------------------------------------------------*/
22 /* Default Constructor */
23 /*-------------------------------------------------------------------*/
ClpCholeskyDense()24 ClpCholeskyDense::ClpCholeskyDense ()
25      : ClpCholeskyBase(),
26        borrowSpace_(false)
27 {
28      type_ = 11;
29 }
30 
31 /*-------------------------------------------------------------------*/
32 /* Copy constructor */
33 /*-------------------------------------------------------------------*/
ClpCholeskyDense(const ClpCholeskyDense & rhs)34 ClpCholeskyDense::ClpCholeskyDense (const ClpCholeskyDense & rhs)
35      : ClpCholeskyBase(rhs),
36        borrowSpace_(rhs.borrowSpace_)
37 {
38      assert(!rhs.borrowSpace_ || !rhs.sizeFactor_); /* can't do if borrowing space*/
39 }
40 
41 
42 /*-------------------------------------------------------------------*/
43 /* Destructor */
44 /*-------------------------------------------------------------------*/
~ClpCholeskyDense()45 ClpCholeskyDense::~ClpCholeskyDense ()
46 {
47      if (borrowSpace_) {
48           /* set NULL*/
49           sparseFactor_ = NULL;
50           workDouble_ = NULL;
51           diagonal_ = NULL;
52      }
53 }
54 
55 /*----------------------------------------------------------------*/
56 /* Assignment operator */
57 /*-------------------------------------------------------------------*/
58 ClpCholeskyDense &
operator =(const ClpCholeskyDense & rhs)59 ClpCholeskyDense::operator=(const ClpCholeskyDense& rhs)
60 {
61      if (this != &rhs) {
62           assert(!rhs.borrowSpace_ || !rhs.sizeFactor_); /* can't do if borrowing space*/
63           ClpCholeskyBase::operator=(rhs);
64           borrowSpace_ = rhs.borrowSpace_;
65      }
66      return *this;
67 }
68 /*-------------------------------------------------------------------*/
69 /* Clone*/
70 /*-------------------------------------------------------------------*/
clone() const71 ClpCholeskyBase * ClpCholeskyDense::clone() const
72 {
73      return new ClpCholeskyDense(*this);
74 }
75 /* If not power of 2 then need to redo a bit*/
76 #define BLOCK 16
77 #define BLOCKSHIFT 4
78 /* Block unroll if power of 2 and at least 8*/
79 #define BLOCKUNROLL
80 
81 #define BLOCKSQ ( BLOCK*BLOCK )
82 #define BLOCKSQSHIFT ( BLOCKSHIFT+BLOCKSHIFT )
83 #define number_blocks(x) (((x)+BLOCK-1)>>BLOCKSHIFT)
84 #define number_rows(x) ((x)<<BLOCKSHIFT)
85 #define number_entries(x) ((x)<<BLOCKSQSHIFT)
86 /* Gets space */
87 int
reserveSpace(const ClpCholeskyBase * factor,int numberRows)88 ClpCholeskyDense::reserveSpace(const ClpCholeskyBase * factor, int numberRows)
89 {
90      numberRows_ = numberRows;
91      int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
92      /* allow one stripe extra*/
93      numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2);
94      sizeFactor_ = numberBlocks * BLOCKSQ;
95      /*#define CHOL_COMPARE*/
96 #ifdef CHOL_COMPARE
97      sizeFactor_ += 95000;
98 #endif
99      if (!factor) {
100           sparseFactor_ = new longDouble [sizeFactor_];
101           rowsDropped_ = new char [numberRows_];
102           memset(rowsDropped_, 0, numberRows_);
103           workDouble_ = new longDouble[numberRows_];
104           diagonal_ = new longDouble[numberRows_];
105      } else {
106           borrowSpace_ = true;
107           int numberFull = factor->numberRows();
108           sparseFactor_ = factor->sparseFactor() + (factor->size() - sizeFactor_);
109           workDouble_ = factor->workDouble() + (numberFull - numberRows_);
110           diagonal_ = factor->diagonal() + (numberFull - numberRows_);
111      }
112      numberRowsDropped_ = 0;
113      return 0;
114 }
115 /* Returns space needed */
116 CoinBigIndex
space(int numberRows) const117 ClpCholeskyDense::space( int numberRows) const
118 {
119      int numberBlocks = (numberRows + BLOCK - 1) >> BLOCKSHIFT;
120      /* allow one stripe extra*/
121      numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2);
122      CoinBigIndex sizeFactor = numberBlocks * BLOCKSQ;
123 #ifdef CHOL_COMPARE
124      sizeFactor += 95000;
125 #endif
126      return sizeFactor;
127 }
128 /* Orders rows and saves pointer to matrix.and model */
129 int
order(ClpInterior * model)130 ClpCholeskyDense::order(ClpInterior * model)
131 {
132      model_ = model;
133      int numberRows;
134      int numberRowsModel = model_->numberRows();
135      int numberColumns = model_->numberColumns();
136      if (!doKKT_) {
137           numberRows = numberRowsModel;
138      } else {
139           numberRows = 2 * numberRowsModel + numberColumns;
140      }
141      reserveSpace(NULL, numberRows);
142      rowCopy_ = model->clpMatrix()->reverseOrderedCopy();
143      return 0;
144 }
145 /* Does Symbolic factorization given permutation.
146    This is called immediately after order.  If user provides this then
147    user must provide factorize and solve.  Otherwise the default factorization is used
148    returns non-zero if not enough memory */
149 int
symbolic()150 ClpCholeskyDense::symbolic()
151 {
152      return 0;
153 }
154 /* Factorize - filling in rowsDropped and returning number dropped */
155 int
factorize(const CoinWorkDouble * diagonal,int * rowsDropped)156 ClpCholeskyDense::factorize(const CoinWorkDouble * diagonal, int * rowsDropped)
157 {
158      assert (!borrowSpace_);
159      const CoinBigIndex * columnStart = model_->clpMatrix()->getVectorStarts();
160      const int * columnLength = model_->clpMatrix()->getVectorLengths();
161      const int * row = model_->clpMatrix()->getIndices();
162      const double * element = model_->clpMatrix()->getElements();
163      const CoinBigIndex * rowStart = rowCopy_->getVectorStarts();
164      const int * rowLength = rowCopy_->getVectorLengths();
165      const int * column = rowCopy_->getIndices();
166      const double * elementByRow = rowCopy_->getElements();
167      int numberColumns = model_->clpMatrix()->getNumCols();
168      CoinZeroN(sparseFactor_, sizeFactor_);
169      /*perturbation*/
170      CoinWorkDouble perturbation = model_->diagonalPerturbation() * model_->diagonalNorm();
171      perturbation = perturbation * perturbation;
172      if (perturbation > 1.0) {
173 #ifdef COIN_DEVELOP
174           /*if (model_->model()->logLevel()&4) */
175           std::cout << "large perturbation " << perturbation << std::endl;
176 #endif
177           perturbation = CoinSqrt(perturbation);
178           perturbation = 1.0;
179      }
180      int iRow;
181      int newDropped = 0;
182      CoinWorkDouble largest = 1.0;
183      CoinWorkDouble smallest = COIN_DBL_MAX;
184      CoinWorkDouble delta2 = model_->delta(); /* add delta*delta to diagonal*/
185      delta2 *= delta2;
186      if (!doKKT_) {
187           longDouble * work = sparseFactor_;
188           work--; /* skip diagonal*/
189           int addOffset = numberRows_ - 1;
190           const CoinWorkDouble * diagonalSlack = diagonal + numberColumns;
191           /* largest in initial matrix*/
192           CoinWorkDouble largest2 = 1.0e-20;
193           for (iRow = 0; iRow < numberRows_; iRow++) {
194                if (!rowsDropped_[iRow]) {
195                     CoinBigIndex startRow = rowStart[iRow];
196                     CoinBigIndex endRow = rowStart[iRow] + rowLength[iRow];
197                     CoinWorkDouble diagonalValue = diagonalSlack[iRow] + delta2;
198                     for (CoinBigIndex k = startRow; k < endRow; k++) {
199                          int iColumn = column[k];
200                          CoinBigIndex start = columnStart[iColumn];
201                          CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
202                          CoinWorkDouble multiplier = diagonal[iColumn] * elementByRow[k];
203                          for (CoinBigIndex j = start; j < end; j++) {
204                               int jRow = row[j];
205                               if (!rowsDropped_[jRow]) {
206                                    if (jRow > iRow) {
207                                         CoinWorkDouble value = element[j] * multiplier;
208                                         work[jRow] += value;
209                                    } else if (jRow == iRow) {
210                                         CoinWorkDouble value = element[j] * multiplier;
211                                         diagonalValue += value;
212                                    }
213                               }
214                          }
215                     }
216                     for (int j = iRow + 1; j < numberRows_; j++)
217                          largest2 = CoinMax(largest2, CoinAbs(work[j]));
218                     diagonal_[iRow] = diagonalValue;
219                     largest2 = CoinMax(largest2, CoinAbs(diagonalValue));
220                } else {
221                     /* dropped*/
222                     diagonal_[iRow] = 1.0;
223                }
224                addOffset--;
225                work += addOffset;
226           }
227           /*check sizes*/
228           largest2 *= 1.0e-20;
229           largest = CoinMin(largest2, CHOL_SMALL_VALUE);
230           int numberDroppedBefore = 0;
231           for (iRow = 0; iRow < numberRows_; iRow++) {
232                int dropped = rowsDropped_[iRow];
233                /* Move to int array*/
234                rowsDropped[iRow] = dropped;
235                if (!dropped) {
236                     CoinWorkDouble diagonal = diagonal_[iRow];
237                     if (diagonal > largest2) {
238                          diagonal_[iRow] = diagonal + perturbation;
239                     } else {
240                          diagonal_[iRow] = diagonal + perturbation;
241                          rowsDropped[iRow] = 2;
242                          numberDroppedBefore++;
243                     }
244                }
245           }
246           doubleParameters_[10] = CoinMax(1.0e-20, largest);
247           integerParameters_[20] = 0;
248           doubleParameters_[3] = 0.0;
249           doubleParameters_[4] = COIN_DBL_MAX;
250           integerParameters_[34] = 0; /* say all must be positive*/
251 #ifdef CHOL_COMPARE
252           if (numberRows_ < 200)
253                factorizePart3(rowsDropped);
254 #endif
255           factorizePart2(rowsDropped);
256           newDropped = integerParameters_[20] + numberDroppedBefore;
257           largest = doubleParameters_[3];
258           smallest = doubleParameters_[4];
259           if (model_->messageHandler()->logLevel() > 1)
260                std::cout << "Cholesky - largest " << largest << " smallest " << smallest << std::endl;
261           choleskyCondition_ = largest / smallest;
262           /*drop fresh makes some formADAT easier*/
263           if (newDropped || numberRowsDropped_) {
264                newDropped = 0;
265                for (int i = 0; i < numberRows_; i++) {
266                     char dropped = static_cast<char>(rowsDropped[i]);
267                     rowsDropped_[i] = dropped;
268                     if (dropped == 2) {
269                          /*dropped this time*/
270                          rowsDropped[newDropped++] = i;
271                          rowsDropped_[i] = 0;
272                     }
273                }
274                numberRowsDropped_ = newDropped;
275                newDropped = -(2 + newDropped);
276           }
277      } else {
278           /* KKT*/
279           CoinPackedMatrix * quadratic = NULL;
280           ClpQuadraticObjective * quadraticObj =
281                (dynamic_cast< ClpQuadraticObjective*>(model_->objectiveAsObject()));
282           if (quadraticObj)
283                quadratic = quadraticObj->quadraticObjective();
284           /* matrix*/
285           int numberRowsModel = model_->numberRows();
286           int numberColumns = model_->numberColumns();
287           int numberTotal = numberColumns + numberRowsModel;
288           longDouble * work = sparseFactor_;
289           work--; /* skip diagonal*/
290           int addOffset = numberRows_ - 1;
291           int iColumn;
292           if (!quadratic) {
293                for (iColumn = 0; iColumn < numberColumns; iColumn++) {
294                     CoinWorkDouble value = diagonal[iColumn];
295                     if (CoinAbs(value) > 1.0e-100) {
296                          value = 1.0 / value;
297                          largest = CoinMax(largest, CoinAbs(value));
298                          diagonal_[iColumn] = -value;
299                          CoinBigIndex start = columnStart[iColumn];
300                          CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
301                          for (CoinBigIndex j = start; j < end; j++) {
302                               /*choleskyRow_[numberElements]=row[j]+numberTotal;*/
303                               /*sparseFactor_[numberElements++]=element[j];*/
304                               work[row[j] + numberTotal] = element[j];
305                               largest = CoinMax(largest, CoinAbs(element[j]));
306                          }
307                     } else {
308                          diagonal_[iColumn] = -value;
309                     }
310                     addOffset--;
311                     work += addOffset;
312                }
313           } else {
314                /* Quadratic*/
315                const int * columnQuadratic = quadratic->getIndices();
316                const CoinBigIndex * columnQuadraticStart = quadratic->getVectorStarts();
317                const int * columnQuadraticLength = quadratic->getVectorLengths();
318                const double * quadraticElement = quadratic->getElements();
319                for (iColumn = 0; iColumn < numberColumns; iColumn++) {
320                     CoinWorkDouble value = diagonal[iColumn];
321                     CoinBigIndex j;
322                     if (CoinAbs(value) > 1.0e-100) {
323                          value = 1.0 / value;
324                          for (j = columnQuadraticStart[iColumn];
325                                    j < columnQuadraticStart[iColumn] + columnQuadraticLength[iColumn]; j++) {
326                               int jColumn = columnQuadratic[j];
327                               if (jColumn > iColumn) {
328                                    work[jColumn] = -quadraticElement[j];
329                               } else if (iColumn == jColumn) {
330                                    value += quadraticElement[j];
331                               }
332                          }
333                          largest = CoinMax(largest, CoinAbs(value));
334                          diagonal_[iColumn] = -value;
335                          CoinBigIndex start = columnStart[iColumn];
336                          CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
337                          for (j = start; j < end; j++) {
338                               work[row[j] + numberTotal] = element[j];
339                               largest = CoinMax(largest, CoinAbs(element[j]));
340                          }
341                     } else {
342                          value = 1.0e100;
343                          diagonal_[iColumn] = -value;
344                     }
345                     addOffset--;
346                     work += addOffset;
347                }
348           }
349           /* slacks*/
350           for (iColumn = numberColumns; iColumn < numberTotal; iColumn++) {
351                CoinWorkDouble value = diagonal[iColumn];
352                if (CoinAbs(value) > 1.0e-100) {
353                     value = 1.0 / value;
354                     largest = CoinMax(largest, CoinAbs(value));
355                } else {
356                     value = 1.0e100;
357                }
358                diagonal_[iColumn] = -value;
359                work[iColumn-numberColumns+numberTotal] = -1.0;
360                addOffset--;
361                work += addOffset;
362           }
363           /* Finish diagonal*/
364           for (iRow = 0; iRow < numberRowsModel; iRow++) {
365                diagonal_[iRow+numberTotal] = delta2;
366           }
367           /*check sizes*/
368           largest *= 1.0e-20;
369           largest = CoinMin(largest, CHOL_SMALL_VALUE);
370           doubleParameters_[10] = CoinMax(1.0e-20, largest);
371           integerParameters_[20] = 0;
372           doubleParameters_[3] = 0.0;
373           doubleParameters_[4] = COIN_DBL_MAX;
374           /* Set up LDL cutoff*/
375           integerParameters_[34] = numberTotal;
376           /* KKT*/
377           int * rowsDropped2 = new int[numberRows_];
378           CoinZeroN(rowsDropped2, numberRows_);
379 #ifdef CHOL_COMPARE
380           if (numberRows_ < 200)
381                factorizePart3(rowsDropped2);
382 #endif
383           factorizePart2(rowsDropped2);
384           newDropped = integerParameters_[20];
385           largest = doubleParameters_[3];
386           smallest = doubleParameters_[4];
387           if (model_->messageHandler()->logLevel() > 1)
388 	    COIN_DETAIL_PRINT(std::cout << "Cholesky - largest " << largest << " smallest " << smallest << std::endl);
389           choleskyCondition_ = largest / smallest;
390           /* Should save adjustments in ..R_*/
391           int n1 = 0, n2 = 0;
392           CoinWorkDouble * primalR = model_->primalR();
393           CoinWorkDouble * dualR = model_->dualR();
394           for (iRow = 0; iRow < numberTotal; iRow++) {
395                if (rowsDropped2[iRow]) {
396                     n1++;
397                     /*printf("row region1 %d dropped\n",iRow);*/
398                     /*rowsDropped_[iRow]=1;*/
399                     rowsDropped_[iRow] = 0;
400                     primalR[iRow] = doubleParameters_[20];
401                } else {
402                     rowsDropped_[iRow] = 0;
403                     primalR[iRow] = 0.0;
404                }
405           }
406           for (; iRow < numberRows_; iRow++) {
407                if (rowsDropped2[iRow]) {
408                     n2++;
409                     /*printf("row region2 %d dropped\n",iRow);*/
410                     /*rowsDropped_[iRow]=1;*/
411                     rowsDropped_[iRow] = 0;
412                     dualR[iRow-numberTotal] = doubleParameters_[34];
413                } else {
414                     rowsDropped_[iRow] = 0;
415                     dualR[iRow-numberTotal] = 0.0;
416                }
417           }
418      }
419      return 0;
420 }
421 /* Factorize - filling in rowsDropped and returning number dropped */
422 void
factorizePart3(int * rowsDropped)423 ClpCholeskyDense::factorizePart3( int * rowsDropped)
424 {
425      int iColumn;
426      longDouble * xx = sparseFactor_;
427      longDouble * yy = diagonal_;
428      diagonal_ = sparseFactor_ + 40000;
429      sparseFactor_ = diagonal_ + numberRows_;
430      /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/
431      CoinMemcpyN(xx, 40000, sparseFactor_);
432      CoinMemcpyN(yy, numberRows_, diagonal_);
433      int numberDropped = 0;
434      CoinWorkDouble largest = 0.0;
435      CoinWorkDouble smallest = COIN_DBL_MAX;
436      double dropValue = doubleParameters_[10];
437      int firstPositive = integerParameters_[34];
438      longDouble * work = sparseFactor_;
439      /* Allow for triangular*/
440      int addOffset = numberRows_ - 1;
441      work--;
442      for (iColumn = 0; iColumn < numberRows_; iColumn++) {
443           int iRow;
444           int addOffsetNow = numberRows_ - 1;
445           longDouble * workNow = sparseFactor_ - 1 + iColumn;
446           CoinWorkDouble diagonalValue = diagonal_[iColumn];
447           for (iRow = 0; iRow < iColumn; iRow++) {
448                double aj = *workNow;
449                addOffsetNow--;
450                workNow += addOffsetNow;
451                diagonalValue -= aj * aj * workDouble_[iRow];
452           }
453           bool dropColumn = false;
454           if (iColumn < firstPositive) {
455                /* must be negative*/
456                if (diagonalValue <= -dropValue) {
457                     smallest = CoinMin(smallest, -diagonalValue);
458                     largest = CoinMax(largest, -diagonalValue);
459                     workDouble_[iColumn] = diagonalValue;
460                     diagonalValue = 1.0 / diagonalValue;
461                } else {
462                     dropColumn = true;
463                     workDouble_[iColumn] = -1.0e100;
464                     diagonalValue = 0.0;
465                     integerParameters_[20]++;
466                }
467           } else {
468                /* must be positive*/
469                if (diagonalValue >= dropValue) {
470                     smallest = CoinMin(smallest, diagonalValue);
471                     largest = CoinMax(largest, diagonalValue);
472                     workDouble_[iColumn] = diagonalValue;
473                     diagonalValue = 1.0 / diagonalValue;
474                } else {
475                     dropColumn = true;
476                     workDouble_[iColumn] = 1.0e100;
477                     diagonalValue = 0.0;
478                     integerParameters_[20]++;
479                }
480           }
481           if (!dropColumn) {
482                diagonal_[iColumn] = diagonalValue;
483                for (iRow = iColumn + 1; iRow < numberRows_; iRow++) {
484                     double value = work[iRow];
485                     workNow = sparseFactor_ - 1;
486                     int addOffsetNow = numberRows_ - 1;
487                     for (int jColumn = 0; jColumn < iColumn; jColumn++) {
488                          double aj = workNow[iColumn];
489                          double multiplier = workDouble_[jColumn];
490                          double ai = workNow[iRow];
491                          addOffsetNow--;
492                          workNow += addOffsetNow;
493                          value -= aj * ai * multiplier;
494                     }
495                     work[iRow] = value * diagonalValue;
496                }
497           } else {
498                /* drop column*/
499                rowsDropped[iColumn] = 2;
500                numberDropped++;
501                diagonal_[iColumn] = 0.0;
502                for (iRow = iColumn + 1; iRow < numberRows_; iRow++) {
503                     work[iRow] = 0.0;
504                }
505           }
506           addOffset--;
507           work += addOffset;
508      }
509      doubleParameters_[3] = largest;
510      doubleParameters_[4] = smallest;
511      integerParameters_[20] = numberDropped;
512      sparseFactor_ = xx;
513      diagonal_ = yy;
514 }
515 /*#define POS_DEBUG*/
516 #ifdef POS_DEBUG
517 static int counter = 0;
bNumber(const longDouble * array,int & iRow,int & iCol)518 int ClpCholeskyDense::bNumber(const longDouble * array, int &iRow, int &iCol)
519 {
520      int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
521      longDouble * a = sparseFactor_ + BLOCKSQ * numberBlocks;
522      int k = array - a;
523      assert ((k % BLOCKSQ) == 0);
524      iCol = 0;
525      int kk = k >> BLOCKSQSHIFT;
526      /*printf("%d %d %d %d\n",k,kk,BLOCKSQ,BLOCKSQSHIFT);*/
527      k = kk;
528      while (k >= numberBlocks) {
529           iCol++;
530           k -= numberBlocks;
531           numberBlocks--;
532      }
533      iRow = iCol + k;
534      counter++;
535      if(counter > 100000)
536           exit(77);
537      return kk;
538 }
539 #endif
540 /* Factorize - filling in rowsDropped and returning number dropped */
541 void
factorizePart2(int * rowsDropped)542 ClpCholeskyDense::factorizePart2( int * rowsDropped)
543 {
544      int iColumn;
545      /*longDouble * xx = sparseFactor_;*/
546      /*longDouble * yy = diagonal_;*/
547      /*diagonal_ = sparseFactor_ + 40000;*/
548      /*sparseFactor_=diagonal_ + numberRows_;*/
549      /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/
550      /*memcpy(sparseFactor_,xx,40000*sizeof(double));*/
551      /*memcpy(diagonal_,yy,numberRows_*sizeof(double));*/
552      int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
553      /* later align on boundary*/
554      longDouble * a = sparseFactor_ + BLOCKSQ * numberBlocks;
555      int n = numberRows_;
556      int nRound = numberRows_ & (~(BLOCK - 1));
557      /* adjust if exact*/
558      if (nRound == n)
559           nRound -= BLOCK;
560      int sizeLastBlock = n - nRound;
561      int get = n * (n - 1) / 2; /* ? as no diagonal*/
562      int block = numberBlocks * (numberBlocks + 1) / 2;
563      int ifOdd;
564      int rowLast;
565      if (sizeLastBlock != BLOCK) {
566           longDouble * aa = &a[(block-1)*BLOCKSQ];
567           rowLast = nRound - 1;
568           ifOdd = 1;
569           int put = BLOCKSQ;
570           /* do last separately*/
571           put -= (BLOCK - sizeLastBlock) * (BLOCK + 1);
572           for (iColumn = numberRows_ - 1; iColumn >= nRound; iColumn--) {
573                int put2 = put;
574                put -= BLOCK;
575                for (int iRow = numberRows_ - 1; iRow > iColumn; iRow--) {
576                     aa[--put2] = sparseFactor_[--get];
577                     assert (aa + put2 >= sparseFactor_ + get);
578                }
579                /* save diagonal as well*/
580                aa[--put2] = diagonal_[iColumn];
581           }
582           n = nRound;
583           block--;
584      } else {
585           /* exact fit*/
586           rowLast = numberRows_ - 1;
587           ifOdd = 0;
588      }
589      /* Now main loop*/
590      int nBlock = 0;
591      for (; n > 0; n -= BLOCK) {
592           longDouble * aa = &a[(block-1)*BLOCKSQ];
593           longDouble * aaLast = NULL;
594           int put = BLOCKSQ;
595           int putLast = 0;
596           /* see if we have small block*/
597           if (ifOdd) {
598                aaLast = &a[(block-1)*BLOCKSQ];
599                aa = aaLast - BLOCKSQ;
600                putLast = BLOCKSQ - BLOCK + sizeLastBlock;
601           }
602           for (iColumn = n - 1; iColumn >= n - BLOCK; iColumn--) {
603                if (aaLast) {
604                     /* last bit*/
605                     for (int iRow = numberRows_ - 1; iRow > rowLast; iRow--) {
606                          aaLast[--putLast] = sparseFactor_[--get];
607                          assert (aaLast + putLast >= sparseFactor_ + get);
608                     }
609                     putLast -= BLOCK - sizeLastBlock;
610                }
611                longDouble * aPut = aa;
612                int j = rowLast;
613                for (int jBlock = 0; jBlock <= nBlock; jBlock++) {
614                     int put2 = put;
615                     int last = CoinMax(j - BLOCK, iColumn);
616                     for (int iRow = j; iRow > last; iRow--) {
617                          aPut[--put2] = sparseFactor_[--get];
618                          assert (aPut + put2 >= sparseFactor_ + get);
619                     }
620                     if (j - BLOCK < iColumn) {
621                          /* save diagonal as well*/
622                          aPut[--put2] = diagonal_[iColumn];
623                     }
624                     j -= BLOCK;
625                     aPut -= BLOCKSQ;
626                }
627                put -= BLOCK;
628           }
629           nBlock++;
630           block -= nBlock + ifOdd;
631      }
632      ClpCholeskyDenseC  info;
633      info.diagonal_ = diagonal_;
634      info.doubleParameters_[0] = doubleParameters_[10];
635      info.integerParameters_[0] = integerParameters_[34];
636 #ifndef CLP_CILK
637      ClpCholeskyCfactor(&info, a, numberRows_, numberBlocks,
638                         diagonal_, workDouble_, rowsDropped);
639 #else
640      info.a = a;
641      info.n = numberRows_;
642      info.numberBlocks = numberBlocks;
643      info.work = workDouble_;
644      info.rowsDropped = rowsDropped;
645      info.integerParameters_[1] = model_->numberThreads();
646      ClpCholeskySpawn(&info);
647 #endif
648      double largest = 0.0;
649      double smallest = COIN_DBL_MAX;
650      int numberDropped = 0;
651      for (int i = 0; i < numberRows_; i++) {
652           if (diagonal_[i]) {
653                largest = CoinMax(largest, CoinAbs(diagonal_[i]));
654                smallest = CoinMin(smallest, CoinAbs(diagonal_[i]));
655           } else {
656                numberDropped++;
657           }
658      }
659      doubleParameters_[3] = CoinMax(doubleParameters_[3], 1.0 / smallest);
660      doubleParameters_[4] = CoinMin(doubleParameters_[4], 1.0 / largest);
661      integerParameters_[20] += numberDropped;
662 }
663 /* Non leaf recursive factor*/
664 void
ClpCholeskyCfactor(ClpCholeskyDenseC * thisStruct,longDouble * a,int n,int numberBlocks,longDouble * diagonal,longDouble * work,int * rowsDropped)665 ClpCholeskyCfactor(ClpCholeskyDenseC * thisStruct, longDouble * a, int n, int numberBlocks,
666                    longDouble * diagonal, longDouble * work, int * rowsDropped)
667 {
668      if (n <= BLOCK) {
669           ClpCholeskyCfactorLeaf(thisStruct, a, n, diagonal, work, rowsDropped);
670      } else {
671           int nb = number_blocks((n + 1) >> 1);
672           int nThis = number_rows(nb);
673           longDouble * aother;
674           int nLeft = n - nThis;
675           int nintri = (nb * (nb + 1)) >> 1;
676           int nbelow = (numberBlocks - nb) * nb;
677           ClpCholeskyCfactor(thisStruct, a, nThis, numberBlocks, diagonal, work, rowsDropped);
678           ClpCholeskyCtriRec(thisStruct, a, nThis, a + number_entries(nb), diagonal, work, nLeft, nb, 0, numberBlocks);
679           aother = a + number_entries(nintri + nbelow);
680           ClpCholeskyCrecTri(thisStruct, a + number_entries(nb), nLeft, nThis, nb, 0, aother, diagonal, work, numberBlocks);
681           ClpCholeskyCfactor(thisStruct, aother, nLeft,
682                              numberBlocks - nb, diagonal + nThis, work + nThis, rowsDropped);
683      }
684 }
685 /* Non leaf recursive triangle rectangle update*/
686 void
ClpCholeskyCtriRec(ClpCholeskyDenseC * thisStruct,longDouble * aTri,int nThis,longDouble * aUnder,longDouble * diagonal,longDouble * work,int nLeft,int iBlock,int jBlock,int numberBlocks)687 ClpCholeskyCtriRec(ClpCholeskyDenseC * thisStruct, longDouble * aTri, int nThis, longDouble * aUnder,
688                    longDouble * diagonal, longDouble * work,
689                    int nLeft, int iBlock, int jBlock,
690                    int numberBlocks)
691 {
692      if (nThis <= BLOCK && nLeft <= BLOCK) {
693           ClpCholeskyCtriRecLeaf(/*thisStruct,*/ aTri, aUnder, diagonal, work, nLeft);
694      } else if (nThis < nLeft) {
695           int nb = number_blocks((nLeft + 1) >> 1);
696           int nLeft2 = number_rows(nb);
697           ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder, diagonal, work, nLeft2, iBlock, jBlock, numberBlocks);
698           ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder + number_entries(nb), diagonal, work, nLeft - nLeft2,
699                              iBlock + nb, jBlock, numberBlocks);
700      } else {
701           int nb = number_blocks((nThis + 1) >> 1);
702           int nThis2 = number_rows(nb);
703           longDouble * aother;
704           int kBlock = jBlock + nb;
705           int i;
706           int nintri = (nb * (nb + 1)) >> 1;
707           int nbelow = (numberBlocks - nb) * nb;
708           ClpCholeskyCtriRec(thisStruct, aTri, nThis2, aUnder, diagonal, work, nLeft, iBlock, jBlock, numberBlocks);
709           /* and rectangular update */
710           i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) -
711                (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
712           aother = aUnder + number_entries(i);
713           ClpCholeskyCrecRec(thisStruct, aTri + number_entries(nb), nThis - nThis2, nLeft, nThis2, aUnder, aother,
714                              work, kBlock, jBlock, numberBlocks);
715           ClpCholeskyCtriRec(thisStruct, aTri + number_entries(nintri + nbelow), nThis - nThis2, aother, diagonal + nThis2,
716                              work + nThis2, nLeft,
717                              iBlock - nb, kBlock - nb, numberBlocks - nb);
718      }
719 }
720 /* Non leaf recursive rectangle triangle update*/
721 void
ClpCholeskyCrecTri(ClpCholeskyDenseC * thisStruct,longDouble * aUnder,int nTri,int nDo,int iBlock,int jBlock,longDouble * aTri,longDouble * diagonal,longDouble * work,int numberBlocks)722 ClpCholeskyCrecTri(ClpCholeskyDenseC * thisStruct, longDouble * aUnder, int nTri, int nDo,
723                    int iBlock, int jBlock, longDouble * aTri,
724                    longDouble * diagonal, longDouble * work,
725                    int numberBlocks)
726 {
727      if (nTri <= BLOCK && nDo <= BLOCK) {
728           ClpCholeskyCrecTriLeaf(/*thisStruct,*/ aUnder, aTri,/*diagonal,*/work, nTri);
729      } else if (nTri < nDo) {
730           int nb = number_blocks((nDo + 1) >> 1);
731           int nDo2 = number_rows(nb);
732           longDouble * aother;
733           int i;
734           ClpCholeskyCrecTri(thisStruct, aUnder, nTri, nDo2, iBlock, jBlock, aTri, diagonal, work, numberBlocks);
735           i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) -
736                (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
737           aother = aUnder + number_entries(i);
738           ClpCholeskyCrecTri(thisStruct, aother, nTri, nDo - nDo2, iBlock - nb, jBlock, aTri, diagonal + nDo2,
739                              work + nDo2, numberBlocks - nb);
740      } else {
741           int nb = number_blocks((nTri + 1) >> 1);
742           int nTri2 = number_rows(nb);
743           longDouble * aother;
744           int i;
745           ClpCholeskyCrecTri(thisStruct, aUnder, nTri2, nDo, iBlock, jBlock, aTri, diagonal, work, numberBlocks);
746           /* and rectangular update */
747           i = ((numberBlocks - iBlock) * (numberBlocks - iBlock + 1) -
748                (numberBlocks - iBlock - nb) * (numberBlocks - iBlock - nb + 1)) >> 1;
749           aother = aTri + number_entries(nb);
750           ClpCholeskyCrecRec(thisStruct, aUnder, nTri2, nTri - nTri2, nDo, aUnder + number_entries(nb), aother,
751                              work, iBlock, jBlock, numberBlocks);
752           ClpCholeskyCrecTri(thisStruct, aUnder + number_entries(nb), nTri - nTri2, nDo, iBlock + nb, jBlock,
753                              aTri + number_entries(i), diagonal, work, numberBlocks);
754      }
755 }
756 /* Non leaf recursive rectangle rectangle update,
757    nUnder is number of rows in iBlock,
758    nUnderK is number of rows in kBlock
759 */
760 void
ClpCholeskyCrecRec(ClpCholeskyDenseC * thisStruct,longDouble * above,int nUnder,int nUnderK,int nDo,longDouble * aUnder,longDouble * aOther,longDouble * work,int iBlock,int jBlock,int numberBlocks)761 ClpCholeskyCrecRec(ClpCholeskyDenseC * thisStruct, longDouble * above, int nUnder, int nUnderK,
762                    int nDo, longDouble * aUnder, longDouble *aOther,
763                    longDouble * work,
764                    int iBlock, int jBlock,
765                    int numberBlocks)
766 {
767      if (nDo <= BLOCK && nUnder <= BLOCK && nUnderK <= BLOCK) {
768           assert (nDo == BLOCK && nUnder == BLOCK);
769           ClpCholeskyCrecRecLeaf(/*thisStruct,*/ above , aUnder ,  aOther, work, nUnderK);
770      } else if (nDo <= nUnderK && nUnder <= nUnderK) {
771           int nb = number_blocks((nUnderK + 1) >> 1);
772           int nUnder2 = number_rows(nb);
773           ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnder2, nDo, aUnder, aOther, work,
774                              iBlock, jBlock, numberBlocks);
775           ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK - nUnder2, nDo, aUnder + number_entries(nb),
776                              aOther + number_entries(nb), work, iBlock, jBlock, numberBlocks);
777      } else if (nUnderK <= nDo && nUnder <= nDo) {
778           int nb = number_blocks((nDo + 1) >> 1);
779           int nDo2 = number_rows(nb);
780           int i;
781           ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK, nDo2, aUnder, aOther, work,
782                              iBlock, jBlock, numberBlocks);
783           i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) -
784                (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
785           ClpCholeskyCrecRec(thisStruct, above + number_entries(i), nUnder, nUnderK, nDo - nDo2,
786                              aUnder + number_entries(i),
787                              aOther, work + nDo2,
788                              iBlock - nb, jBlock, numberBlocks - nb);
789      } else {
790           int nb = number_blocks((nUnder + 1) >> 1);
791           int nUnder2 = number_rows(nb);
792           int i;
793           ClpCholeskyCrecRec(thisStruct, above, nUnder2, nUnderK, nDo, aUnder, aOther, work,
794                              iBlock, jBlock, numberBlocks);
795           i = ((numberBlocks - iBlock) * (numberBlocks - iBlock - 1) -
796                (numberBlocks - iBlock - nb) * (numberBlocks - iBlock - nb - 1)) >> 1;
797           ClpCholeskyCrecRec(thisStruct, above + number_entries(nb), nUnder - nUnder2, nUnderK, nDo, aUnder,
798                              aOther + number_entries(i), work, iBlock + nb, jBlock, numberBlocks);
799      }
800 }
801 /* Leaf recursive factor*/
802 void
ClpCholeskyCfactorLeaf(ClpCholeskyDenseC * thisStruct,longDouble * a,int n,longDouble * diagonal,longDouble * work,int * rowsDropped)803 ClpCholeskyCfactorLeaf(ClpCholeskyDenseC * thisStruct, longDouble * a, int n,
804                        longDouble * diagonal, longDouble * work, int * rowsDropped)
805 {
806      double dropValue = thisStruct->doubleParameters_[0];
807      int firstPositive = thisStruct->integerParameters_[0];
808      int rowOffset = static_cast<int>(diagonal - thisStruct->diagonal_);
809      int i, j, k;
810      CoinWorkDouble t00, temp1;
811      longDouble * aa;
812      aa = a - BLOCK;
813      for (j = 0; j < n; j ++) {
814           bool dropColumn;
815           CoinWorkDouble useT00;
816           aa += BLOCK;
817           t00 = aa[j];
818           for (k = 0; k < j; ++k) {
819                CoinWorkDouble multiplier = work[k];
820                t00 -= a[j + k * BLOCK] * a[j + k * BLOCK] * multiplier;
821           }
822           dropColumn = false;
823           useT00 = t00;
824           if (j + rowOffset < firstPositive) {
825                /* must be negative*/
826                if (t00 <= -dropValue) {
827                     /*aa[j]=t00;*/
828                     t00 = 1.0 / t00;
829                } else {
830                     dropColumn = true;
831                     /*aa[j]=-1.0e100;*/
832                     useT00 = -1.0e-100;
833                     t00 = 0.0;
834                }
835           } else {
836                /* must be positive*/
837                if (t00 >= dropValue) {
838                     /*aa[j]=t00;*/
839                     t00 = 1.0 / t00;
840                } else {
841                     dropColumn = true;
842                     /*aa[j]=1.0e100;*/
843                     useT00 = 1.0e-100;
844                     t00 = 0.0;
845                }
846           }
847           if (!dropColumn) {
848                diagonal[j] = t00;
849                work[j] = useT00;
850                temp1 = t00;
851                for (i = j + 1; i < n; i++) {
852                     t00 = aa[i];
853                     for (k = 0; k < j; ++k) {
854                          CoinWorkDouble multiplier = work[k];
855                          t00 -= a[i + k * BLOCK] * a[j + k * BLOCK] * multiplier;
856                     }
857                     aa[i] = t00 * temp1;
858                }
859           } else {
860                /* drop column*/
861                rowsDropped[j+rowOffset] = 2;
862                diagonal[j] = 0.0;
863                /*aa[j]=1.0e100;*/
864                work[j] = 1.0e100;
865                for (i = j + 1; i < n; i++) {
866                     aa[i] = 0.0;
867                }
868           }
869      }
870 }
871 /* Leaf recursive triangle rectangle update*/
872 void
ClpCholeskyCtriRecLeaf(longDouble * aTri,longDouble * aUnder,longDouble * diagonal,longDouble * work,int nUnder)873 ClpCholeskyCtriRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble * aTri, longDouble * aUnder, longDouble * diagonal, longDouble * work,
874           int nUnder)
875 {
876 #ifdef POS_DEBUG
877      int iru, icu;
878      int iu = bNumber(aUnder, iru, icu);
879      int irt, ict;
880      int it = bNumber(aTri, irt, ict);
881      /*printf("%d %d\n",iu,it);*/
882      printf("trirecleaf  under (%d,%d), tri (%d,%d)\n",
883             iru, icu, irt, ict);
884      assert (diagonal == thisStruct->diagonal_ + ict * BLOCK);
885 #endif
886      int j;
887      longDouble * aa;
888 #ifdef BLOCKUNROLL
889      if (nUnder == BLOCK) {
890           aa = aTri - 2 * BLOCK;
891           for (j = 0; j < BLOCK; j += 2) {
892                int i;
893                CoinWorkDouble temp0 = diagonal[j];
894                CoinWorkDouble temp1 = diagonal[j+1];
895                aa += 2 * BLOCK;
896                for ( i = 0; i < BLOCK; i += 2) {
897                     CoinWorkDouble at1;
898                     CoinWorkDouble t00 = aUnder[i+j*BLOCK];
899                     CoinWorkDouble t10 = aUnder[i+ BLOCK + j*BLOCK];
900                     CoinWorkDouble t01 = aUnder[i+1+j*BLOCK];
901                     CoinWorkDouble t11 = aUnder[i+1+ BLOCK + j*BLOCK];
902                     int k;
903                     for (k = 0; k < j; ++k) {
904                          CoinWorkDouble multiplier = work[k];
905                          CoinWorkDouble au0 = aUnder[i + k * BLOCK] * multiplier;
906                          CoinWorkDouble au1 = aUnder[i + 1 + k * BLOCK] * multiplier;
907                          CoinWorkDouble at0 = aTri[j + k * BLOCK];
908                          CoinWorkDouble at1 = aTri[j + 1 + k * BLOCK];
909                          t00 -= au0 * at0;
910                          t10 -= au0 * at1;
911                          t01 -= au1 * at0;
912                          t11 -= au1 * at1;
913                     }
914                     t00 *= temp0;
915                     at1 = aTri[j + 1 + j*BLOCK] * work[j];
916                     t10 -= t00 * at1;
917                     t01 *= temp0;
918                     t11 -= t01 * at1;
919                     aUnder[i+j*BLOCK] = t00;
920                     aUnder[i+1+j*BLOCK] = t01;
921                     aUnder[i+ BLOCK + j*BLOCK] = t10 * temp1;
922                     aUnder[i+1+ BLOCK + j*BLOCK] = t11 * temp1;
923                }
924           }
925      } else {
926 #endif
927           aa = aTri - BLOCK;
928           for (j = 0; j < BLOCK; j ++) {
929                int i;
930                CoinWorkDouble temp1 = diagonal[j];
931                aa += BLOCK;
932                for (i = 0; i < nUnder; i++) {
933                     int k;
934                     CoinWorkDouble t00 = aUnder[i+j*BLOCK];
935                     for ( k = 0; k < j; ++k) {
936                          CoinWorkDouble multiplier = work[k];
937                          t00 -= aUnder[i + k * BLOCK] * aTri[j + k * BLOCK] * multiplier;
938                     }
939                     aUnder[i+j*BLOCK] = t00 * temp1;
940                }
941           }
942 #ifdef BLOCKUNROLL
943      }
944 #endif
945 }
946 /* Leaf recursive rectangle triangle update*/
ClpCholeskyCrecTriLeaf(longDouble * aUnder,longDouble * aTri,longDouble * work,int nUnder)947 void ClpCholeskyCrecTriLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble * aUnder, longDouble * aTri,
948           /*longDouble * diagonal,*/ longDouble * work, int nUnder)
949 {
950 #ifdef POS_DEBUG
951      int iru, icu;
952      int iu = bNumber(aUnder, iru, icu);
953      int irt, ict;
954      int it = bNumber(aTri, irt, ict);
955      /*printf("%d %d\n",iu,it);*/
956      printf("rectrileaf  under (%d,%d), tri (%d,%d)\n",
957             iru, icu, irt, ict);
958      assert (diagonal == thisStruct->diagonal_ + icu * BLOCK);
959 #endif
960      int i, j, k;
961      CoinWorkDouble t00;
962      longDouble * aa;
963 #ifdef BLOCKUNROLL
964      if (nUnder == BLOCK) {
965           longDouble * aUnder2 = aUnder - 2;
966           aa = aTri - 2 * BLOCK;
967           for (j = 0; j < BLOCK; j += 2) {
968                CoinWorkDouble t00, t01, t10, t11;
969                aa += 2 * BLOCK;
970                aUnder2 += 2;
971                t00 = aa[j];
972                t01 = aa[j+1];
973                t10 = aa[j+1+BLOCK];
974                for (k = 0; k < BLOCK; ++k) {
975                     CoinWorkDouble multiplier = work[k];
976                     CoinWorkDouble a0 = aUnder2[k * BLOCK];
977                     CoinWorkDouble a1 = aUnder2[1 + k * BLOCK];
978                     CoinWorkDouble x0 = a0 * multiplier;
979                     CoinWorkDouble x1 = a1 * multiplier;
980                     t00 -= a0 * x0;
981                     t01 -= a1 * x0;
982                     t10 -= a1 * x1;
983                }
984                aa[j] = t00;
985                aa[j+1] = t01;
986                aa[j+1+BLOCK] = t10;
987                for (i = j + 2; i < BLOCK; i += 2) {
988                     t00 = aa[i];
989                     t01 = aa[i+BLOCK];
990                     t10 = aa[i+1];
991                     t11 = aa[i+1+BLOCK];
992                     for (k = 0; k < BLOCK; ++k) {
993                          CoinWorkDouble multiplier = work[k];
994                          CoinWorkDouble a0 = aUnder2[k * BLOCK] * multiplier;
995                          CoinWorkDouble a1 = aUnder2[1 + k * BLOCK] * multiplier;
996                          t00 -= aUnder[i + k * BLOCK] * a0;
997                          t01 -= aUnder[i + k * BLOCK] * a1;
998                          t10 -= aUnder[i + 1 + k * BLOCK] * a0;
999                          t11 -= aUnder[i + 1 + k * BLOCK] * a1;
1000                     }
1001                     aa[i] = t00;
1002                     aa[i+BLOCK] = t01;
1003                     aa[i+1] = t10;
1004                     aa[i+1+BLOCK] = t11;
1005                }
1006           }
1007      } else {
1008 #endif
1009           aa = aTri - BLOCK;
1010           for (j = 0; j < nUnder; j ++) {
1011                aa += BLOCK;
1012                for (i = j; i < nUnder; i++) {
1013                     t00 = aa[i];
1014                     for (k = 0; k < BLOCK; ++k) {
1015                          CoinWorkDouble multiplier = work[k];
1016                          t00 -= aUnder[i + k * BLOCK] * aUnder[j + k * BLOCK] * multiplier;
1017                     }
1018                     aa[i] = t00;
1019                }
1020           }
1021 #ifdef BLOCKUNROLL
1022      }
1023 #endif
1024 }
1025 /* Leaf recursive rectangle rectangle update,
1026    nUnder is number of rows in iBlock,
1027    nUnderK is number of rows in kBlock
1028 */
1029 void
ClpCholeskyCrecRecLeaf(const longDouble * COIN_RESTRICT above,const longDouble * COIN_RESTRICT aUnder,longDouble * COIN_RESTRICT aOther,const longDouble * COIN_RESTRICT work,int nUnder)1030 ClpCholeskyCrecRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/
1031      const longDouble * COIN_RESTRICT above,
1032      const longDouble * COIN_RESTRICT aUnder,
1033      longDouble * COIN_RESTRICT aOther,
1034      const longDouble * COIN_RESTRICT work,
1035      int nUnder)
1036 {
1037 #ifdef POS_DEBUG
1038      int ira, ica;
1039      int ia = bNumber(above, ira, ica);
1040      int iru, icu;
1041      int iu = bNumber(aUnder, iru, icu);
1042      int iro, ico;
1043      int io = bNumber(aOther, iro, ico);
1044      /*printf("%d %d %d\n",ia,iu,io);*/
1045      printf("recrecleaf above (%d,%d), under (%d,%d), other (%d,%d)\n",
1046             ira, ica, iru, icu, iro, ico);
1047 #endif
1048      int i, j, k;
1049      longDouble * aa;
1050 #ifdef BLOCKUNROLL
1051      aa = aOther - 4 * BLOCK;
1052      if (nUnder == BLOCK) {
1053           /*#define INTEL*/
1054 #ifdef INTEL
1055           aa += 2 * BLOCK;
1056           for (j = 0; j < BLOCK; j += 2) {
1057                aa += 2 * BLOCK;
1058                for (i = 0; i < BLOCK; i += 2) {
1059                     CoinWorkDouble t00 = aa[i+0*BLOCK];
1060                     CoinWorkDouble t10 = aa[i+1*BLOCK];
1061                     CoinWorkDouble t01 = aa[i+1+0*BLOCK];
1062                     CoinWorkDouble t11 = aa[i+1+1*BLOCK];
1063                     for (k = 0; k < BLOCK; k++) {
1064                          CoinWorkDouble multiplier = work[k];
1065                          CoinWorkDouble a00 = aUnder[i+k*BLOCK] * multiplier;
1066                          CoinWorkDouble a01 = aUnder[i+1+k*BLOCK] * multiplier;
1067                          t00 -= a00 * above[j + 0 + k * BLOCK];
1068                          t10 -= a00 * above[j + 1 + k * BLOCK];
1069                          t01 -= a01 * above[j + 0 + k * BLOCK];
1070                          t11 -= a01 * above[j + 1 + k * BLOCK];
1071                     }
1072                     aa[i+0*BLOCK] = t00;
1073                     aa[i+1*BLOCK] = t10;
1074                     aa[i+1+0*BLOCK] = t01;
1075                     aa[i+1+1*BLOCK] = t11;
1076                }
1077           }
1078 #else
1079           for (j = 0; j < BLOCK; j += 4) {
1080                aa += 4 * BLOCK;
1081                for (i = 0; i < BLOCK; i += 4) {
1082                     CoinWorkDouble t00 = aa[i+0+0*BLOCK];
1083                     CoinWorkDouble t10 = aa[i+0+1*BLOCK];
1084                     CoinWorkDouble t20 = aa[i+0+2*BLOCK];
1085                     CoinWorkDouble t30 = aa[i+0+3*BLOCK];
1086                     CoinWorkDouble t01 = aa[i+1+0*BLOCK];
1087                     CoinWorkDouble t11 = aa[i+1+1*BLOCK];
1088                     CoinWorkDouble t21 = aa[i+1+2*BLOCK];
1089                     CoinWorkDouble t31 = aa[i+1+3*BLOCK];
1090                     CoinWorkDouble t02 = aa[i+2+0*BLOCK];
1091                     CoinWorkDouble t12 = aa[i+2+1*BLOCK];
1092                     CoinWorkDouble t22 = aa[i+2+2*BLOCK];
1093                     CoinWorkDouble t32 = aa[i+2+3*BLOCK];
1094                     CoinWorkDouble t03 = aa[i+3+0*BLOCK];
1095                     CoinWorkDouble t13 = aa[i+3+1*BLOCK];
1096                     CoinWorkDouble t23 = aa[i+3+2*BLOCK];
1097                     CoinWorkDouble t33 = aa[i+3+3*BLOCK];
1098                     const longDouble * COIN_RESTRICT aUnderNow = aUnder + i;
1099                     const longDouble * COIN_RESTRICT aboveNow = above + j;
1100                     for (k = 0; k < BLOCK; k++) {
1101                          CoinWorkDouble multiplier = work[k];
1102                          CoinWorkDouble a00 = aUnderNow[0] * multiplier;
1103                          CoinWorkDouble a01 = aUnderNow[1] * multiplier;
1104                          CoinWorkDouble a02 = aUnderNow[2] * multiplier;
1105                          CoinWorkDouble a03 = aUnderNow[3] * multiplier;
1106                          t00 -= a00 * aboveNow[0];
1107                          t10 -= a00 * aboveNow[1];
1108                          t20 -= a00 * aboveNow[2];
1109                          t30 -= a00 * aboveNow[3];
1110                          t01 -= a01 * aboveNow[0];
1111                          t11 -= a01 * aboveNow[1];
1112                          t21 -= a01 * aboveNow[2];
1113                          t31 -= a01 * aboveNow[3];
1114                          t02 -= a02 * aboveNow[0];
1115                          t12 -= a02 * aboveNow[1];
1116                          t22 -= a02 * aboveNow[2];
1117                          t32 -= a02 * aboveNow[3];
1118                          t03 -= a03 * aboveNow[0];
1119                          t13 -= a03 * aboveNow[1];
1120                          t23 -= a03 * aboveNow[2];
1121                          t33 -= a03 * aboveNow[3];
1122                          aUnderNow += BLOCK;
1123                          aboveNow += BLOCK;
1124                     }
1125                     aa[i+0+0*BLOCK] = t00;
1126                     aa[i+0+1*BLOCK] = t10;
1127                     aa[i+0+2*BLOCK] = t20;
1128                     aa[i+0+3*BLOCK] = t30;
1129                     aa[i+1+0*BLOCK] = t01;
1130                     aa[i+1+1*BLOCK] = t11;
1131                     aa[i+1+2*BLOCK] = t21;
1132                     aa[i+1+3*BLOCK] = t31;
1133                     aa[i+2+0*BLOCK] = t02;
1134                     aa[i+2+1*BLOCK] = t12;
1135                     aa[i+2+2*BLOCK] = t22;
1136                     aa[i+2+3*BLOCK] = t32;
1137                     aa[i+3+0*BLOCK] = t03;
1138                     aa[i+3+1*BLOCK] = t13;
1139                     aa[i+3+2*BLOCK] = t23;
1140                     aa[i+3+3*BLOCK] = t33;
1141                }
1142           }
1143 #endif
1144      } else {
1145           int odd = nUnder & 1;
1146           int n = nUnder - odd;
1147           for (j = 0; j < BLOCK; j += 4) {
1148                aa += 4 * BLOCK;
1149                for (i = 0; i < n; i += 2) {
1150                     CoinWorkDouble t00 = aa[i+0*BLOCK];
1151                     CoinWorkDouble t10 = aa[i+1*BLOCK];
1152                     CoinWorkDouble t20 = aa[i+2*BLOCK];
1153                     CoinWorkDouble t30 = aa[i+3*BLOCK];
1154                     CoinWorkDouble t01 = aa[i+1+0*BLOCK];
1155                     CoinWorkDouble t11 = aa[i+1+1*BLOCK];
1156                     CoinWorkDouble t21 = aa[i+1+2*BLOCK];
1157                     CoinWorkDouble t31 = aa[i+1+3*BLOCK];
1158                     const longDouble * COIN_RESTRICT aUnderNow = aUnder + i;
1159                     const longDouble * COIN_RESTRICT aboveNow = above + j;
1160                     for (k = 0; k < BLOCK; k++) {
1161                          CoinWorkDouble multiplier = work[k];
1162                          CoinWorkDouble a00 = aUnderNow[0] * multiplier;
1163                          CoinWorkDouble a01 = aUnderNow[1] * multiplier;
1164                          t00 -= a00 * aboveNow[0];
1165                          t10 -= a00 * aboveNow[1];
1166                          t20 -= a00 * aboveNow[2];
1167                          t30 -= a00 * aboveNow[3];
1168                          t01 -= a01 * aboveNow[0];
1169                          t11 -= a01 * aboveNow[1];
1170                          t21 -= a01 * aboveNow[2];
1171                          t31 -= a01 * aboveNow[3];
1172                          aUnderNow += BLOCK;
1173                          aboveNow += BLOCK;
1174                     }
1175                     aa[i+0*BLOCK] = t00;
1176                     aa[i+1*BLOCK] = t10;
1177                     aa[i+2*BLOCK] = t20;
1178                     aa[i+3*BLOCK] = t30;
1179                     aa[i+1+0*BLOCK] = t01;
1180                     aa[i+1+1*BLOCK] = t11;
1181                     aa[i+1+2*BLOCK] = t21;
1182                     aa[i+1+3*BLOCK] = t31;
1183                }
1184                if (odd) {
1185                     CoinWorkDouble t0 = aa[n+0*BLOCK];
1186                     CoinWorkDouble t1 = aa[n+1*BLOCK];
1187                     CoinWorkDouble t2 = aa[n+2*BLOCK];
1188                     CoinWorkDouble t3 = aa[n+3*BLOCK];
1189                     CoinWorkDouble a0;
1190                     for (k = 0; k < BLOCK; k++) {
1191                          a0 = aUnder[n+k*BLOCK] * work[k];
1192                          t0 -= a0 * above[j + 0 + k * BLOCK];
1193                          t1 -= a0 * above[j + 1 + k * BLOCK];
1194                          t2 -= a0 * above[j + 2 + k * BLOCK];
1195                          t3 -= a0 * above[j + 3 + k * BLOCK];
1196                     }
1197                     aa[n+0*BLOCK] = t0;
1198                     aa[n+1*BLOCK] = t1;
1199                     aa[n+2*BLOCK] = t2;
1200                     aa[n+3*BLOCK] = t3;
1201                }
1202           }
1203      }
1204 #else
1205      aa = aOther - BLOCK;
1206      for (j = 0; j < BLOCK; j ++) {
1207           aa += BLOCK;
1208           for (i = 0; i < nUnder; i++) {
1209                CoinWorkDouble t00 = aa[i+0*BLOCK];
1210                for (k = 0; k < BLOCK; k++) {
1211                     CoinWorkDouble a00 = aUnder[i+k*BLOCK] * work[k];
1212                     t00 -= a00 * above[j + k * BLOCK];
1213                }
1214                aa[i] = t00;
1215           }
1216      }
1217 #endif
1218 }
1219 /* Uses factorization to solve. */
1220 void
solve(CoinWorkDouble * region)1221 ClpCholeskyDense::solve (CoinWorkDouble * region)
1222 {
1223 #ifdef CHOL_COMPARE
1224      double * region2 = NULL;
1225      if (numberRows_ < 200) {
1226           longDouble * xx = sparseFactor_;
1227           longDouble * yy = diagonal_;
1228           diagonal_ = sparseFactor_ + 40000;
1229           sparseFactor_ = diagonal_ + numberRows_;
1230           region2 = ClpCopyOfArray(region, numberRows_);
1231           int iRow, iColumn;
1232           int addOffset = numberRows_ - 1;
1233           longDouble * work = sparseFactor_ - 1;
1234           for (iColumn = 0; iColumn < numberRows_; iColumn++) {
1235                double value = region2[iColumn];
1236                for (iRow = iColumn + 1; iRow < numberRows_; iRow++)
1237                     region2[iRow] -= value * work[iRow];
1238                addOffset--;
1239                work += addOffset;
1240           }
1241           for (iColumn = numberRows_ - 1; iColumn >= 0; iColumn--) {
1242                double value = region2[iColumn] * diagonal_[iColumn];
1243                work -= addOffset;
1244                addOffset++;
1245                for (iRow = iColumn + 1; iRow < numberRows_; iRow++)
1246                     value -= region2[iRow] * work[iRow];
1247                region2[iColumn] = value;
1248           }
1249           sparseFactor_ = xx;
1250           diagonal_ = yy;
1251      }
1252 #endif
1253      /*longDouble * xx = sparseFactor_;*/
1254      /*longDouble * yy = diagonal_;*/
1255      /*diagonal_ = sparseFactor_ + 40000;*/
1256      /*sparseFactor_=diagonal_ + numberRows_;*/
1257      int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
1258      /* later align on boundary*/
1259      longDouble * a = sparseFactor_ + BLOCKSQ * numberBlocks;
1260      int iBlock;
1261      longDouble * aa = a;
1262      int iColumn;
1263      for (iBlock = 0; iBlock < numberBlocks; iBlock++) {
1264           int nChunk;
1265           int jBlock;
1266           int iDo = iBlock * BLOCK;
1267           int base = iDo;
1268           if (iDo + BLOCK > numberRows_) {
1269                nChunk = numberRows_ - iDo;
1270           } else {
1271                nChunk = BLOCK;
1272           }
1273           solveF1(aa, nChunk, region + iDo);
1274           for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) {
1275                base += BLOCK;
1276                aa += BLOCKSQ;
1277                if (base + BLOCK > numberRows_) {
1278                     nChunk = numberRows_ - base;
1279                } else {
1280                     nChunk = BLOCK;
1281                }
1282                solveF2(aa, nChunk, region + iDo, region + base);
1283           }
1284           aa += BLOCKSQ;
1285      }
1286      /* do diagonal outside*/
1287      for (iColumn = 0; iColumn < numberRows_; iColumn++)
1288           region[iColumn] *= diagonal_[iColumn];
1289      int offset = ((numberBlocks * (numberBlocks + 1)) >> 1);
1290      aa = a + number_entries(offset - 1);
1291      int lBase = (numberBlocks - 1) * BLOCK;
1292      for (iBlock = numberBlocks - 1; iBlock >= 0; iBlock--) {
1293           int nChunk;
1294           int jBlock;
1295           int triBase = iBlock * BLOCK;
1296           int iBase = lBase;
1297           for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) {
1298                if (iBase + BLOCK > numberRows_) {
1299                     nChunk = numberRows_ - iBase;
1300                } else {
1301                     nChunk = BLOCK;
1302                }
1303                solveB2(aa, nChunk, region + triBase, region + iBase);
1304                iBase -= BLOCK;
1305                aa -= BLOCKSQ;
1306           }
1307           if (triBase + BLOCK > numberRows_) {
1308                nChunk = numberRows_ - triBase;
1309           } else {
1310                nChunk = BLOCK;
1311           }
1312           solveB1(aa, nChunk, region + triBase);
1313           aa -= BLOCKSQ;
1314      }
1315 #ifdef CHOL_COMPARE
1316      if (numberRows_ < 200) {
1317           for (int i = 0; i < numberRows_; i++) {
1318                assert(CoinAbs(region[i] - region2[i]) < 1.0e-3);
1319           }
1320           delete [] region2;
1321      }
1322 #endif
1323 }
1324 /* Forward part of solve 1*/
1325 void
solveF1(longDouble * a,int n,CoinWorkDouble * region)1326 ClpCholeskyDense::solveF1(longDouble * a, int n, CoinWorkDouble * region)
1327 {
1328      int j, k;
1329      CoinWorkDouble t00;
1330      for (j = 0; j < n; j ++) {
1331           t00 = region[j];
1332           for (k = 0; k < j; ++k) {
1333                t00 -= region[k] * a[j + k * BLOCK];
1334           }
1335           /*t00*=a[j + j * BLOCK];*/
1336           region[j] = t00;
1337      }
1338 }
1339 /* Forward part of solve 2*/
1340 void
solveF2(longDouble * a,int n,CoinWorkDouble * region,CoinWorkDouble * region2)1341 ClpCholeskyDense::solveF2(longDouble * a, int n, CoinWorkDouble * region, CoinWorkDouble * region2)
1342 {
1343      int j, k;
1344 #ifdef BLOCKUNROLL
1345      if (n == BLOCK) {
1346           for (k = 0; k < BLOCK; k += 4) {
1347                CoinWorkDouble t0 = region2[0];
1348                CoinWorkDouble t1 = region2[1];
1349                CoinWorkDouble t2 = region2[2];
1350                CoinWorkDouble t3 = region2[3];
1351                t0 -= region[0] * a[0 + 0 * BLOCK];
1352                t1 -= region[0] * a[1 + 0 * BLOCK];
1353                t2 -= region[0] * a[2 + 0 * BLOCK];
1354                t3 -= region[0] * a[3 + 0 * BLOCK];
1355 
1356                t0 -= region[1] * a[0 + 1 * BLOCK];
1357                t1 -= region[1] * a[1 + 1 * BLOCK];
1358                t2 -= region[1] * a[2 + 1 * BLOCK];
1359                t3 -= region[1] * a[3 + 1 * BLOCK];
1360 
1361                t0 -= region[2] * a[0 + 2 * BLOCK];
1362                t1 -= region[2] * a[1 + 2 * BLOCK];
1363                t2 -= region[2] * a[2 + 2 * BLOCK];
1364                t3 -= region[2] * a[3 + 2 * BLOCK];
1365 
1366                t0 -= region[3] * a[0 + 3 * BLOCK];
1367                t1 -= region[3] * a[1 + 3 * BLOCK];
1368                t2 -= region[3] * a[2 + 3 * BLOCK];
1369                t3 -= region[3] * a[3 + 3 * BLOCK];
1370 
1371                t0 -= region[4] * a[0 + 4 * BLOCK];
1372                t1 -= region[4] * a[1 + 4 * BLOCK];
1373                t2 -= region[4] * a[2 + 4 * BLOCK];
1374                t3 -= region[4] * a[3 + 4 * BLOCK];
1375 
1376                t0 -= region[5] * a[0 + 5 * BLOCK];
1377                t1 -= region[5] * a[1 + 5 * BLOCK];
1378                t2 -= region[5] * a[2 + 5 * BLOCK];
1379                t3 -= region[5] * a[3 + 5 * BLOCK];
1380 
1381                t0 -= region[6] * a[0 + 6 * BLOCK];
1382                t1 -= region[6] * a[1 + 6 * BLOCK];
1383                t2 -= region[6] * a[2 + 6 * BLOCK];
1384                t3 -= region[6] * a[3 + 6 * BLOCK];
1385 
1386                t0 -= region[7] * a[0 + 7 * BLOCK];
1387                t1 -= region[7] * a[1 + 7 * BLOCK];
1388                t2 -= region[7] * a[2 + 7 * BLOCK];
1389                t3 -= region[7] * a[3 + 7 * BLOCK];
1390 #if BLOCK>8
1391                t0 -= region[8] * a[0 + 8 * BLOCK];
1392                t1 -= region[8] * a[1 + 8 * BLOCK];
1393                t2 -= region[8] * a[2 + 8 * BLOCK];
1394                t3 -= region[8] * a[3 + 8 * BLOCK];
1395 
1396                t0 -= region[9] * a[0 + 9 * BLOCK];
1397                t1 -= region[9] * a[1 + 9 * BLOCK];
1398                t2 -= region[9] * a[2 + 9 * BLOCK];
1399                t3 -= region[9] * a[3 + 9 * BLOCK];
1400 
1401                t0 -= region[10] * a[0 + 10 * BLOCK];
1402                t1 -= region[10] * a[1 + 10 * BLOCK];
1403                t2 -= region[10] * a[2 + 10 * BLOCK];
1404                t3 -= region[10] * a[3 + 10 * BLOCK];
1405 
1406                t0 -= region[11] * a[0 + 11 * BLOCK];
1407                t1 -= region[11] * a[1 + 11 * BLOCK];
1408                t2 -= region[11] * a[2 + 11 * BLOCK];
1409                t3 -= region[11] * a[3 + 11 * BLOCK];
1410 
1411                t0 -= region[12] * a[0 + 12 * BLOCK];
1412                t1 -= region[12] * a[1 + 12 * BLOCK];
1413                t2 -= region[12] * a[2 + 12 * BLOCK];
1414                t3 -= region[12] * a[3 + 12 * BLOCK];
1415 
1416                t0 -= region[13] * a[0 + 13 * BLOCK];
1417                t1 -= region[13] * a[1 + 13 * BLOCK];
1418                t2 -= region[13] * a[2 + 13 * BLOCK];
1419                t3 -= region[13] * a[3 + 13 * BLOCK];
1420 
1421                t0 -= region[14] * a[0 + 14 * BLOCK];
1422                t1 -= region[14] * a[1 + 14 * BLOCK];
1423                t2 -= region[14] * a[2 + 14 * BLOCK];
1424                t3 -= region[14] * a[3 + 14 * BLOCK];
1425 
1426                t0 -= region[15] * a[0 + 15 * BLOCK];
1427                t1 -= region[15] * a[1 + 15 * BLOCK];
1428                t2 -= region[15] * a[2 + 15 * BLOCK];
1429                t3 -= region[15] * a[3 + 15 * BLOCK];
1430 #endif
1431                region2[0] = t0;
1432                region2[1] = t1;
1433                region2[2] = t2;
1434                region2[3] = t3;
1435                region2 += 4;
1436                a += 4;
1437           }
1438      } else {
1439 #endif
1440           for (k = 0; k < n; ++k) {
1441                CoinWorkDouble t00 = region2[k];
1442                for (j = 0; j < BLOCK; j ++) {
1443                     t00 -= region[j] * a[k + j * BLOCK];
1444                }
1445                region2[k] = t00;
1446           }
1447 #ifdef BLOCKUNROLL
1448      }
1449 #endif
1450 }
1451 /* Backward part of solve 1*/
1452 void
solveB1(longDouble * a,int n,CoinWorkDouble * region)1453 ClpCholeskyDense::solveB1(longDouble * a, int n, CoinWorkDouble * region)
1454 {
1455      int j, k;
1456      CoinWorkDouble t00;
1457      for (j = n - 1; j >= 0; j --) {
1458           t00 = region[j];
1459           for (k = j + 1; k < n; ++k) {
1460                t00 -= region[k] * a[k + j * BLOCK];
1461           }
1462           /*t00*=a[j + j * BLOCK];*/
1463           region[j] = t00;
1464      }
1465 }
1466 /* Backward part of solve 2*/
1467 void
solveB2(longDouble * a,int n,CoinWorkDouble * region,CoinWorkDouble * region2)1468 ClpCholeskyDense::solveB2(longDouble * a, int n, CoinWorkDouble * region, CoinWorkDouble * region2)
1469 {
1470      int j, k;
1471 #ifdef BLOCKUNROLL
1472      if (n == BLOCK) {
1473           for (j = 0; j < BLOCK; j += 4) {
1474                CoinWorkDouble t0 = region[0];
1475                CoinWorkDouble t1 = region[1];
1476                CoinWorkDouble t2 = region[2];
1477                CoinWorkDouble t3 = region[3];
1478                t0 -= region2[0] * a[0 + 0*BLOCK];
1479                t1 -= region2[0] * a[0 + 1*BLOCK];
1480                t2 -= region2[0] * a[0 + 2*BLOCK];
1481                t3 -= region2[0] * a[0 + 3*BLOCK];
1482 
1483                t0 -= region2[1] * a[1 + 0*BLOCK];
1484                t1 -= region2[1] * a[1 + 1*BLOCK];
1485                t2 -= region2[1] * a[1 + 2*BLOCK];
1486                t3 -= region2[1] * a[1 + 3*BLOCK];
1487 
1488                t0 -= region2[2] * a[2 + 0*BLOCK];
1489                t1 -= region2[2] * a[2 + 1*BLOCK];
1490                t2 -= region2[2] * a[2 + 2*BLOCK];
1491                t3 -= region2[2] * a[2 + 3*BLOCK];
1492 
1493                t0 -= region2[3] * a[3 + 0*BLOCK];
1494                t1 -= region2[3] * a[3 + 1*BLOCK];
1495                t2 -= region2[3] * a[3 + 2*BLOCK];
1496                t3 -= region2[3] * a[3 + 3*BLOCK];
1497 
1498                t0 -= region2[4] * a[4 + 0*BLOCK];
1499                t1 -= region2[4] * a[4 + 1*BLOCK];
1500                t2 -= region2[4] * a[4 + 2*BLOCK];
1501                t3 -= region2[4] * a[4 + 3*BLOCK];
1502 
1503                t0 -= region2[5] * a[5 + 0*BLOCK];
1504                t1 -= region2[5] * a[5 + 1*BLOCK];
1505                t2 -= region2[5] * a[5 + 2*BLOCK];
1506                t3 -= region2[5] * a[5 + 3*BLOCK];
1507 
1508                t0 -= region2[6] * a[6 + 0*BLOCK];
1509                t1 -= region2[6] * a[6 + 1*BLOCK];
1510                t2 -= region2[6] * a[6 + 2*BLOCK];
1511                t3 -= region2[6] * a[6 + 3*BLOCK];
1512 
1513                t0 -= region2[7] * a[7 + 0*BLOCK];
1514                t1 -= region2[7] * a[7 + 1*BLOCK];
1515                t2 -= region2[7] * a[7 + 2*BLOCK];
1516                t3 -= region2[7] * a[7 + 3*BLOCK];
1517 #if BLOCK>8
1518 
1519                t0 -= region2[8] * a[8 + 0*BLOCK];
1520                t1 -= region2[8] * a[8 + 1*BLOCK];
1521                t2 -= region2[8] * a[8 + 2*BLOCK];
1522                t3 -= region2[8] * a[8 + 3*BLOCK];
1523 
1524                t0 -= region2[9] * a[9 + 0*BLOCK];
1525                t1 -= region2[9] * a[9 + 1*BLOCK];
1526                t2 -= region2[9] * a[9 + 2*BLOCK];
1527                t3 -= region2[9] * a[9 + 3*BLOCK];
1528 
1529                t0 -= region2[10] * a[10 + 0*BLOCK];
1530                t1 -= region2[10] * a[10 + 1*BLOCK];
1531                t2 -= region2[10] * a[10 + 2*BLOCK];
1532                t3 -= region2[10] * a[10 + 3*BLOCK];
1533 
1534                t0 -= region2[11] * a[11 + 0*BLOCK];
1535                t1 -= region2[11] * a[11 + 1*BLOCK];
1536                t2 -= region2[11] * a[11 + 2*BLOCK];
1537                t3 -= region2[11] * a[11 + 3*BLOCK];
1538 
1539                t0 -= region2[12] * a[12 + 0*BLOCK];
1540                t1 -= region2[12] * a[12 + 1*BLOCK];
1541                t2 -= region2[12] * a[12 + 2*BLOCK];
1542                t3 -= region2[12] * a[12 + 3*BLOCK];
1543 
1544                t0 -= region2[13] * a[13 + 0*BLOCK];
1545                t1 -= region2[13] * a[13 + 1*BLOCK];
1546                t2 -= region2[13] * a[13 + 2*BLOCK];
1547                t3 -= region2[13] * a[13 + 3*BLOCK];
1548 
1549                t0 -= region2[14] * a[14 + 0*BLOCK];
1550                t1 -= region2[14] * a[14 + 1*BLOCK];
1551                t2 -= region2[14] * a[14 + 2*BLOCK];
1552                t3 -= region2[14] * a[14 + 3*BLOCK];
1553 
1554                t0 -= region2[15] * a[15 + 0*BLOCK];
1555                t1 -= region2[15] * a[15 + 1*BLOCK];
1556                t2 -= region2[15] * a[15 + 2*BLOCK];
1557                t3 -= region2[15] * a[15 + 3*BLOCK];
1558 #endif
1559                region[0] = t0;
1560                region[1] = t1;
1561                region[2] = t2;
1562                region[3] = t3;
1563                a += 4 * BLOCK;
1564                region += 4;
1565           }
1566      } else {
1567 #endif
1568           for (j = 0; j < BLOCK; j ++) {
1569                CoinWorkDouble t00 = region[j];
1570                for (k = 0; k < n; ++k) {
1571                     t00 -= region2[k] * a[k + j * BLOCK];
1572                }
1573                region[j] = t00;
1574           }
1575 #ifdef BLOCKUNROLL
1576      }
1577 #endif
1578 }
1579