1 /* $Id: ClpCholeskyDense.cpp 2385 2019-01-06 19:43:06Z unxusr $ */
2 /*
3   Copyright (C) 2003, International Business Machines Corporation
4   and others.  All Rights Reserved.
5 
6   This code is licensed under the terms of the Eclipse Public License (EPL).
7 */
8 #include "CoinPragma.hpp"
9 #include "CoinHelperFunctions.hpp"
10 #include "ClpConfig.h"
11 #include "ClpHelperFunctions.hpp"
12 #include "ClpInterior.hpp"
13 #include "ClpCholeskyDense.hpp"
14 #include "ClpMessage.hpp"
15 #include "ClpQuadraticObjective.hpp"
16 #if CLP_HAS_ABC
17 #include "CoinAbcCommon.hpp"
18 #endif
19 #if CLP_HAS_ABC < 3
20 #undef cilk_for
21 #undef cilk_spawn
22 #undef cilk_sync
23 #define cilk_for for
24 #define cilk_spawn
25 #define cilk_sync
26 #endif
27 
28 /*#############################################################################*/
29 /* Constructors / Destructor / Assignment*/
30 /*#############################################################################*/
31 
32 /*-------------------------------------------------------------------*/
33 /* Default Constructor */
34 /*-------------------------------------------------------------------*/
ClpCholeskyDense()35 ClpCholeskyDense::ClpCholeskyDense()
36   : ClpCholeskyBase()
37   , borrowSpace_(false)
38 {
39   type_ = 11;
40   ;
41 }
42 
43 /*-------------------------------------------------------------------*/
44 /* Copy constructor */
45 /*-------------------------------------------------------------------*/
ClpCholeskyDense(const ClpCholeskyDense & rhs)46 ClpCholeskyDense::ClpCholeskyDense(const ClpCholeskyDense &rhs)
47   : ClpCholeskyBase(rhs)
48   , borrowSpace_(rhs.borrowSpace_)
49 {
50   assert(!rhs.borrowSpace_ || !rhs.sizeFactor_); /* can't do if borrowing space*/
51 }
52 
53 /*-------------------------------------------------------------------*/
54 /* Destructor */
55 /*-------------------------------------------------------------------*/
~ClpCholeskyDense()56 ClpCholeskyDense::~ClpCholeskyDense()
57 {
58   if (borrowSpace_) {
59     /* set NULL*/
60     sparseFactor_ = NULL;
61     workDouble_ = NULL;
62     diagonal_ = NULL;
63   }
64 }
65 
66 /*----------------------------------------------------------------*/
67 /* Assignment operator */
68 /*-------------------------------------------------------------------*/
69 ClpCholeskyDense &
operator =(const ClpCholeskyDense & rhs)70 ClpCholeskyDense::operator=(const ClpCholeskyDense &rhs)
71 {
72   if (this != &rhs) {
73     assert(!rhs.borrowSpace_ || !rhs.sizeFactor_); /* can't do if borrowing space*/
74     ClpCholeskyBase::operator=(rhs);
75     borrowSpace_ = rhs.borrowSpace_;
76   }
77   return *this;
78 }
79 /*-------------------------------------------------------------------*/
80 /* Clone*/
81 /*-------------------------------------------------------------------*/
clone() const82 ClpCholeskyBase *ClpCholeskyDense::clone() const
83 {
84   return new ClpCholeskyDense(*this);
85 }
86 /* If not power of 2 then need to redo a bit*/
87 #define BLOCK 16
88 #define BLOCKSHIFT 4
89 /* Block unroll if power of 2 and at least 8*/
90 #define BLOCKUNROLL
91 
92 #define BLOCKSQ (BLOCK * BLOCK)
93 #define BLOCKSQSHIFT (BLOCKSHIFT + BLOCKSHIFT)
94 #define number_blocks(x) (((x) + BLOCK - 1) >> BLOCKSHIFT)
95 #define number_rows(x) ((x) << BLOCKSHIFT)
96 #define number_entries(x) ((x) << BLOCKSQSHIFT)
97 /* Gets space */
reserveSpace(const ClpCholeskyBase * factor,int numberRows)98 int ClpCholeskyDense::reserveSpace(const ClpCholeskyBase *factor, int numberRows)
99 {
100   numberRows_ = numberRows;
101   int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
102   /* allow one stripe extra*/
103   numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2);
104   sizeFactor_ = numberBlocks * BLOCKSQ;
105   /*#define CHOL_COMPARE*/
106 #ifdef CHOL_COMPARE
107   sizeFactor_ += 95000;
108 #endif
109   if (!factor) {
110     sparseFactor_ = new longDouble[sizeFactor_];
111     rowsDropped_ = new char[numberRows_];
112     memset(rowsDropped_, 0, numberRows_);
113     workDouble_ = new longDouble[numberRows_];
114     diagonal_ = new longDouble[numberRows_];
115   } else {
116     borrowSpace_ = true;
117     int numberFull = factor->numberRows();
118     sparseFactor_ = factor->sparseFactor() + (factor->size() - sizeFactor_);
119     workDouble_ = factor->workDouble() + (numberFull - numberRows_);
120     diagonal_ = factor->diagonal() + (numberFull - numberRows_);
121   }
122   numberRowsDropped_ = 0;
123   return 0;
124 }
125 /* Returns space needed */
space(int numberRows) const126 int ClpCholeskyDense::space(int numberRows) const
127 {
128   int numberBlocks = (numberRows + BLOCK - 1) >> BLOCKSHIFT;
129   /* allow one stripe extra*/
130   numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2);
131   int sizeFactor = numberBlocks * BLOCKSQ;
132 #ifdef CHOL_COMPARE
133   sizeFactor += 95000;
134 #endif
135   return sizeFactor;
136 }
137 /* Orders rows and saves pointer to matrix.and model */
order(ClpInterior * model)138 int ClpCholeskyDense::order(ClpInterior *model)
139 {
140   model_ = model;
141   int numberRows;
142   int numberRowsModel = model_->numberRows();
143   int numberColumns = model_->numberColumns();
144   if (!doKKT_) {
145     numberRows = numberRowsModel;
146   } else {
147     numberRows = 2 * numberRowsModel + numberColumns;
148   }
149   reserveSpace(NULL, numberRows);
150   rowCopy_ = model->clpMatrix()->reverseOrderedCopy();
151   return 0;
152 }
153 /* Does Symbolic factorization given permutation.
154    This is called immediately after order.  If user provides this then
155    user must provide factorize and solve.  Otherwise the default factorization is used
156    returns non-zero if not enough memory */
symbolic()157 int ClpCholeskyDense::symbolic()
158 {
159   return 0;
160 }
161 /* Factorize - filling in rowsDropped and returning number dropped */
factorize(const CoinWorkDouble * diagonal,int * rowsDropped)162 int ClpCholeskyDense::factorize(const CoinWorkDouble *diagonal, int *rowsDropped)
163 {
164   assert(!borrowSpace_);
165   const CoinBigIndex *columnStart = model_->clpMatrix()->getVectorStarts();
166   const int *columnLength = model_->clpMatrix()->getVectorLengths();
167   const int *row = model_->clpMatrix()->getIndices();
168   const double *element = model_->clpMatrix()->getElements();
169   const CoinBigIndex *rowStart = rowCopy_->getVectorStarts();
170   const int *rowLength = rowCopy_->getVectorLengths();
171   const int *column = rowCopy_->getIndices();
172   const double *elementByRow = rowCopy_->getElements();
173   int numberColumns = model_->clpMatrix()->getNumCols();
174   CoinZeroN(sparseFactor_, sizeFactor_);
175   /*perturbation*/
176   CoinWorkDouble perturbation = model_->diagonalPerturbation() * model_->diagonalNorm();
177   perturbation = perturbation * perturbation;
178   if (perturbation > 1.0) {
179 #ifdef COIN_DEVELOP
180     /*if (model_->model()->logLevel()&4) */
181     std::cout << "large perturbation " << perturbation << std::endl;
182 #endif
183     perturbation = CoinSqrt(perturbation);
184     ;
185     perturbation = 1.0;
186   }
187   int iRow;
188   int newDropped = 0;
189   CoinWorkDouble largest = 1.0;
190   CoinWorkDouble smallest = COIN_DBL_MAX;
191   CoinWorkDouble delta2 = model_->delta(); /* add delta*delta to diagonal*/
192   delta2 *= delta2;
193   if (!doKKT_) {
194     longDouble *work = sparseFactor_;
195     work--; /* skip diagonal*/
196     int addOffset = numberRows_ - 1;
197     const CoinWorkDouble *diagonalSlack = diagonal + numberColumns;
198     /* largest in initial matrix*/
199     CoinWorkDouble largest2 = 1.0e-20;
200     for (iRow = 0; iRow < numberRows_; iRow++) {
201       if (!rowsDropped_[iRow]) {
202         CoinBigIndex startRow = rowStart[iRow];
203         CoinBigIndex endRow = rowStart[iRow] + rowLength[iRow];
204         CoinWorkDouble diagonalValue = diagonalSlack[iRow] + delta2;
205         for (CoinBigIndex k = startRow; k < endRow; k++) {
206           int iColumn = column[k];
207           CoinBigIndex start = columnStart[iColumn];
208           CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
209           CoinWorkDouble multiplier = diagonal[iColumn] * elementByRow[k];
210           for (CoinBigIndex j = start; j < end; j++) {
211             int jRow = row[j];
212             if (!rowsDropped_[jRow]) {
213               if (jRow > iRow) {
214                 CoinWorkDouble value = element[j] * multiplier;
215                 work[jRow] += value;
216               } else if (jRow == iRow) {
217                 CoinWorkDouble value = element[j] * multiplier;
218                 diagonalValue += value;
219               }
220             }
221           }
222         }
223         for (int j = iRow + 1; j < numberRows_; j++)
224           largest2 = CoinMax(largest2, CoinAbs(work[j]));
225         diagonal_[iRow] = diagonalValue;
226         largest2 = CoinMax(largest2, CoinAbs(diagonalValue));
227       } else {
228         /* dropped*/
229         diagonal_[iRow] = 1.0;
230       }
231       addOffset--;
232       work += addOffset;
233     }
234     /*check sizes*/
235     largest2 *= 1.0e-20;
236     largest = CoinMin(largest2, CHOL_SMALL_VALUE);
237     int numberDroppedBefore = 0;
238     for (iRow = 0; iRow < numberRows_; iRow++) {
239       int dropped = rowsDropped_[iRow];
240       /* Move to int array*/
241       rowsDropped[iRow] = dropped;
242       if (!dropped) {
243         CoinWorkDouble diagonal = diagonal_[iRow];
244         if (diagonal > largest2) {
245           diagonal_[iRow] = diagonal + perturbation;
246         } else {
247           diagonal_[iRow] = diagonal + perturbation;
248           rowsDropped[iRow] = 2;
249           numberDroppedBefore++;
250         }
251       }
252     }
253     doubleParameters_[10] = CoinMax(1.0e-20, largest);
254     integerParameters_[20] = 0;
255     doubleParameters_[3] = 0.0;
256     doubleParameters_[4] = COIN_DBL_MAX;
257     integerParameters_[34] = 0; /* say all must be positive*/
258 #ifdef CHOL_COMPARE
259     if (numberRows_ < 200)
260       factorizePart3(rowsDropped);
261 #endif
262     factorizePart2(rowsDropped);
263     newDropped = integerParameters_[20] + numberDroppedBefore;
264     largest = doubleParameters_[3];
265     smallest = doubleParameters_[4];
266     if (model_->messageHandler()->logLevel() > 1)
267       std::cout << "Cholesky - largest " << largest << " smallest " << smallest << std::endl;
268     choleskyCondition_ = largest / smallest;
269     /*drop fresh makes some formADAT easier*/
270     if (newDropped || numberRowsDropped_) {
271       newDropped = 0;
272       for (int i = 0; i < numberRows_; i++) {
273         char dropped = static_cast< char >(rowsDropped[i]);
274         rowsDropped_[i] = dropped;
275         if (dropped == 2) {
276           /*dropped this time*/
277           rowsDropped[newDropped++] = i;
278           rowsDropped_[i] = 0;
279         }
280       }
281       numberRowsDropped_ = newDropped;
282       newDropped = -(2 + newDropped);
283     }
284   } else {
285     /* KKT*/
286     CoinPackedMatrix *quadratic = NULL;
287     ClpQuadraticObjective *quadraticObj = (dynamic_cast< ClpQuadraticObjective * >(model_->objectiveAsObject()));
288     if (quadraticObj)
289       quadratic = quadraticObj->quadraticObjective();
290     /* matrix*/
291     int numberRowsModel = model_->numberRows();
292     int numberColumns = model_->numberColumns();
293     int numberTotal = numberColumns + numberRowsModel;
294     longDouble *work = sparseFactor_;
295     work--; /* skip diagonal*/
296     int addOffset = numberRows_ - 1;
297     int iColumn;
298     if (!quadratic) {
299       for (iColumn = 0; iColumn < numberColumns; iColumn++) {
300         CoinWorkDouble value = diagonal[iColumn];
301         if (CoinAbs(value) > 1.0e-100) {
302           value = 1.0 / value;
303           largest = CoinMax(largest, CoinAbs(value));
304           diagonal_[iColumn] = -value;
305           CoinBigIndex start = columnStart[iColumn];
306           CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
307           for (CoinBigIndex j = start; j < end; j++) {
308             /*choleskyRow_[numberElements]=row[j]+numberTotal;*/
309             /*sparseFactor_[numberElements++]=element[j];*/
310             work[row[j] + numberTotal] = element[j];
311             largest = CoinMax(largest, CoinAbs(element[j]));
312           }
313         } else {
314           diagonal_[iColumn] = -value;
315         }
316         addOffset--;
317         work += addOffset;
318       }
319     } else {
320       /* Quadratic*/
321       const int *columnQuadratic = quadratic->getIndices();
322       const CoinBigIndex *columnQuadraticStart = quadratic->getVectorStarts();
323       const int *columnQuadraticLength = quadratic->getVectorLengths();
324       const double *quadraticElement = quadratic->getElements();
325       for (iColumn = 0; iColumn < numberColumns; iColumn++) {
326         CoinWorkDouble value = diagonal[iColumn];
327         CoinBigIndex j;
328         if (CoinAbs(value) > 1.0e-100) {
329           value = 1.0 / value;
330           for (j = columnQuadraticStart[iColumn];
331                j < columnQuadraticStart[iColumn] + columnQuadraticLength[iColumn]; j++) {
332             int jColumn = columnQuadratic[j];
333             if (jColumn > iColumn) {
334               work[jColumn] = -quadraticElement[j];
335             } else if (iColumn == jColumn) {
336               value += quadraticElement[j];
337             }
338           }
339           largest = CoinMax(largest, CoinAbs(value));
340           diagonal_[iColumn] = -value;
341           CoinBigIndex start = columnStart[iColumn];
342           CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn];
343           for (j = start; j < end; j++) {
344             work[row[j] + numberTotal] = element[j];
345             largest = CoinMax(largest, CoinAbs(element[j]));
346           }
347         } else {
348           value = 1.0e100;
349           diagonal_[iColumn] = -value;
350         }
351         addOffset--;
352         work += addOffset;
353       }
354     }
355     /* slacks*/
356     for (iColumn = numberColumns; iColumn < numberTotal; iColumn++) {
357       CoinWorkDouble value = diagonal[iColumn];
358       if (CoinAbs(value) > 1.0e-100) {
359         value = 1.0 / value;
360         largest = CoinMax(largest, CoinAbs(value));
361       } else {
362         value = 1.0e100;
363       }
364       diagonal_[iColumn] = -value;
365       work[iColumn - numberColumns + numberTotal] = -1.0;
366       addOffset--;
367       work += addOffset;
368     }
369     /* Finish diagonal*/
370     for (iRow = 0; iRow < numberRowsModel; iRow++) {
371       diagonal_[iRow + numberTotal] = delta2;
372     }
373     /*check sizes*/
374     largest *= 1.0e-20;
375     largest = CoinMin(largest, CHOL_SMALL_VALUE);
376     doubleParameters_[10] = CoinMax(1.0e-20, largest);
377     integerParameters_[20] = 0;
378     doubleParameters_[3] = 0.0;
379     doubleParameters_[4] = COIN_DBL_MAX;
380     /* Set up LDL cutoff*/
381     integerParameters_[34] = numberTotal;
382     /* KKT*/
383     int *rowsDropped2 = new int[numberRows_];
384     CoinZeroN(rowsDropped2, numberRows_);
385 #ifdef CHOL_COMPARE
386     if (numberRows_ < 200)
387       factorizePart3(rowsDropped2);
388 #endif
389     factorizePart2(rowsDropped2);
390     newDropped = integerParameters_[20];
391     largest = doubleParameters_[3];
392     smallest = doubleParameters_[4];
393     if (model_->messageHandler()->logLevel() > 1)
394       COIN_DETAIL_PRINT(std::cout << "Cholesky - largest " << largest << " smallest " << smallest << std::endl);
395     choleskyCondition_ = largest / smallest;
396     /* Should save adjustments in ..R_*/
397     int n1 = 0, n2 = 0;
398     CoinWorkDouble *primalR = model_->primalR();
399     CoinWorkDouble *dualR = model_->dualR();
400     for (iRow = 0; iRow < numberTotal; iRow++) {
401       if (rowsDropped2[iRow]) {
402         n1++;
403         /*printf("row region1 %d dropped\n",iRow);*/
404         /*rowsDropped_[iRow]=1;*/
405         rowsDropped_[iRow] = 0;
406         primalR[iRow] = doubleParameters_[20];
407       } else {
408         rowsDropped_[iRow] = 0;
409         primalR[iRow] = 0.0;
410       }
411     }
412     for (; iRow < numberRows_; iRow++) {
413       if (rowsDropped2[iRow]) {
414         n2++;
415         /*printf("row region2 %d dropped\n",iRow);*/
416         /*rowsDropped_[iRow]=1;*/
417         rowsDropped_[iRow] = 0;
418         dualR[iRow - numberTotal] = doubleParameters_[34];
419       } else {
420         rowsDropped_[iRow] = 0;
421         dualR[iRow - numberTotal] = 0.0;
422       }
423     }
424   }
425   return 0;
426 }
427 /* Factorize - filling in rowsDropped and returning number dropped */
factorizePart3(int * rowsDropped)428 void ClpCholeskyDense::factorizePart3(int *rowsDropped)
429 {
430   int iColumn;
431   longDouble *xx = sparseFactor_;
432   longDouble *yy = diagonal_;
433   diagonal_ = sparseFactor_ + 40000;
434   sparseFactor_ = diagonal_ + numberRows_;
435   /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/
436   CoinMemcpyN(xx, 40000, sparseFactor_);
437   CoinMemcpyN(yy, numberRows_, diagonal_);
438   int numberDropped = 0;
439   CoinWorkDouble largest = 0.0;
440   CoinWorkDouble smallest = COIN_DBL_MAX;
441   double dropValue = doubleParameters_[10];
442   int firstPositive = integerParameters_[34];
443   longDouble *work = sparseFactor_;
444   /* Allow for triangular*/
445   int addOffset = numberRows_ - 1;
446   work--;
447   for (iColumn = 0; iColumn < numberRows_; iColumn++) {
448     int iRow;
449     int addOffsetNow = numberRows_ - 1;
450     ;
451     longDouble *workNow = sparseFactor_ - 1 + iColumn;
452     CoinWorkDouble diagonalValue = diagonal_[iColumn];
453     for (iRow = 0; iRow < iColumn; iRow++) {
454       double aj = *workNow;
455       addOffsetNow--;
456       workNow += addOffsetNow;
457       diagonalValue -= aj * aj * workDouble_[iRow];
458     }
459     bool dropColumn = false;
460     if (iColumn < firstPositive) {
461       /* must be negative*/
462       if (diagonalValue <= -dropValue) {
463         smallest = CoinMin(smallest, -diagonalValue);
464         largest = CoinMax(largest, -diagonalValue);
465         workDouble_[iColumn] = diagonalValue;
466         diagonalValue = 1.0 / diagonalValue;
467       } else {
468         dropColumn = true;
469         workDouble_[iColumn] = -1.0e100;
470         diagonalValue = 0.0;
471         integerParameters_[20]++;
472       }
473     } else {
474       /* must be positive*/
475       if (diagonalValue >= dropValue) {
476         smallest = CoinMin(smallest, diagonalValue);
477         largest = CoinMax(largest, diagonalValue);
478         workDouble_[iColumn] = diagonalValue;
479         diagonalValue = 1.0 / diagonalValue;
480       } else {
481         dropColumn = true;
482         workDouble_[iColumn] = 1.0e100;
483         diagonalValue = 0.0;
484         integerParameters_[20]++;
485       }
486     }
487     if (!dropColumn) {
488       diagonal_[iColumn] = diagonalValue;
489       for (iRow = iColumn + 1; iRow < numberRows_; iRow++) {
490         double value = work[iRow];
491         workNow = sparseFactor_ - 1;
492         int addOffsetNow = numberRows_ - 1;
493         ;
494         for (int jColumn = 0; jColumn < iColumn; jColumn++) {
495           double aj = workNow[iColumn];
496           double multiplier = workDouble_[jColumn];
497           double ai = workNow[iRow];
498           addOffsetNow--;
499           workNow += addOffsetNow;
500           value -= aj * ai * multiplier;
501         }
502         work[iRow] = value * diagonalValue;
503       }
504     } else {
505       /* drop column*/
506       rowsDropped[iColumn] = 2;
507       numberDropped++;
508       diagonal_[iColumn] = 0.0;
509       for (iRow = iColumn + 1; iRow < numberRows_; iRow++) {
510         work[iRow] = 0.0;
511       }
512     }
513     addOffset--;
514     work += addOffset;
515   }
516   doubleParameters_[3] = largest;
517   doubleParameters_[4] = smallest;
518   integerParameters_[20] = numberDropped;
519   sparseFactor_ = xx;
520   diagonal_ = yy;
521 }
522 /*#define POS_DEBUG*/
523 #ifdef POS_DEBUG
524 static int counter = 0;
bNumber(const longDouble * array,int & iRow,int & iCol)525 int ClpCholeskyDense::bNumber(const longDouble *array, int &iRow, int &iCol)
526 {
527   int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
528   longDouble *a = sparseFactor_ + BLOCKSQ * numberBlocks;
529   int k = array - a;
530   assert((k % BLOCKSQ) == 0);
531   iCol = 0;
532   int kk = k >> BLOCKSQSHIFT;
533   /*printf("%d %d %d %d\n",k,kk,BLOCKSQ,BLOCKSQSHIFT);*/
534   k = kk;
535   while (k >= numberBlocks) {
536     iCol++;
537     k -= numberBlocks;
538     numberBlocks--;
539   }
540   iRow = iCol + k;
541   counter++;
542   if (counter > 100000)
543     exit(77);
544   return kk;
545 }
546 #endif
547 /* Factorize - filling in rowsDropped and returning number dropped */
factorizePart2(int * rowsDropped)548 void ClpCholeskyDense::factorizePart2(int *rowsDropped)
549 {
550   int iColumn;
551   /*longDouble * xx = sparseFactor_;*/
552   /*longDouble * yy = diagonal_;*/
553   /*diagonal_ = sparseFactor_ + 40000;*/
554   /*sparseFactor_=diagonal_ + numberRows_;*/
555   /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/
556   /*memcpy(sparseFactor_,xx,40000*sizeof(double));*/
557   /*memcpy(diagonal_,yy,numberRows_*sizeof(double));*/
558   int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
559   /* later align on boundary*/
560   longDouble *a = sparseFactor_ + BLOCKSQ * numberBlocks;
561   int n = numberRows_;
562   int nRound = numberRows_ & (~(BLOCK - 1));
563   /* adjust if exact*/
564   if (nRound == n)
565     nRound -= BLOCK;
566   int sizeLastBlock = n - nRound;
567   int get = n * (n - 1) / 2; /* ? as no diagonal*/
568   int block = numberBlocks * (numberBlocks + 1) / 2;
569   int ifOdd;
570   int rowLast;
571   if (sizeLastBlock != BLOCK) {
572     longDouble *aa = &a[(block - 1) * BLOCKSQ];
573     rowLast = nRound - 1;
574     ifOdd = 1;
575     int put = BLOCKSQ;
576     /* do last separately*/
577     put -= (BLOCK - sizeLastBlock) * (BLOCK + 1);
578     for (iColumn = numberRows_ - 1; iColumn >= nRound; iColumn--) {
579       int put2 = put;
580       put -= BLOCK;
581       for (int iRow = numberRows_ - 1; iRow > iColumn; iRow--) {
582         aa[--put2] = sparseFactor_[--get];
583         assert(aa + put2 >= sparseFactor_ + get);
584       }
585       /* save diagonal as well*/
586       aa[--put2] = diagonal_[iColumn];
587     }
588     n = nRound;
589     block--;
590   } else {
591     /* exact fit*/
592     rowLast = numberRows_ - 1;
593     ifOdd = 0;
594   }
595   /* Now main loop*/
596   int nBlock = 0;
597   for (; n > 0; n -= BLOCK) {
598     longDouble *aa = &a[(block - 1) * BLOCKSQ];
599     longDouble *aaLast = NULL;
600     int put = BLOCKSQ;
601     int putLast = 0;
602     /* see if we have small block*/
603     if (ifOdd) {
604       aaLast = &a[(block - 1) * BLOCKSQ];
605       aa = aaLast - BLOCKSQ;
606       putLast = BLOCKSQ - BLOCK + sizeLastBlock;
607     }
608     for (iColumn = n - 1; iColumn >= n - BLOCK; iColumn--) {
609       if (aaLast) {
610         /* last bit*/
611         for (int iRow = numberRows_ - 1; iRow > rowLast; iRow--) {
612           aaLast[--putLast] = sparseFactor_[--get];
613           assert(aaLast + putLast >= sparseFactor_ + get);
614         }
615         putLast -= BLOCK - sizeLastBlock;
616       }
617       longDouble *aPut = aa;
618       int j = rowLast;
619       for (int jBlock = 0; jBlock <= nBlock; jBlock++) {
620         int put2 = put;
621         int last = CoinMax(j - BLOCK, iColumn);
622         for (int iRow = j; iRow > last; iRow--) {
623           aPut[--put2] = sparseFactor_[--get];
624           assert(aPut + put2 >= sparseFactor_ + get);
625         }
626         if (j - BLOCK < iColumn) {
627           /* save diagonal as well*/
628           aPut[--put2] = diagonal_[iColumn];
629         }
630         j -= BLOCK;
631         aPut -= BLOCKSQ;
632       }
633       put -= BLOCK;
634     }
635     nBlock++;
636     block -= nBlock + ifOdd;
637   }
638   ClpCholeskyDenseC info;
639   info.diagonal_ = diagonal_;
640   info.doubleParameters_[0] = doubleParameters_[10];
641   info.integerParameters_[0] = integerParameters_[34];
642 #ifndef CLP_CILK
643   ClpCholeskyCfactor(&info, a, numberRows_, numberBlocks,
644     diagonal_, workDouble_, rowsDropped);
645 #else
646   info.a = a;
647   info.n = numberRows_;
648   info.numberBlocks = numberBlocks;
649   info.work = workDouble_;
650   info.rowsDropped = rowsDropped;
651   info.integerParameters_[1] = model_->numberThreads();
652   ClpCholeskySpawn(&info);
653 #endif
654   double largest = 0.0;
655   double smallest = COIN_DBL_MAX;
656   int numberDropped = 0;
657   for (int i = 0; i < numberRows_; i++) {
658     if (diagonal_[i]) {
659       largest = CoinMax(largest, CoinAbs(diagonal_[i]));
660       smallest = CoinMin(smallest, CoinAbs(diagonal_[i]));
661     } else {
662       numberDropped++;
663     }
664   }
665   doubleParameters_[3] = CoinMax(doubleParameters_[3], 1.0 / smallest);
666   doubleParameters_[4] = CoinMin(doubleParameters_[4], 1.0 / largest);
667   integerParameters_[20] += numberDropped;
668 }
669 /* Non leaf recursive factor*/
ClpCholeskyCfactor(ClpCholeskyDenseC * thisStruct,longDouble * a,int n,int numberBlocks,longDouble * diagonal,longDouble * work,int * rowsDropped)670 void ClpCholeskyCfactor(ClpCholeskyDenseC *thisStruct, longDouble *a, int n, int numberBlocks,
671   longDouble *diagonal, longDouble *work, int *rowsDropped)
672 {
673   if (n <= BLOCK) {
674     ClpCholeskyCfactorLeaf(thisStruct, a, n, diagonal, work, rowsDropped);
675   } else {
676     int nb = number_blocks((n + 1) >> 1);
677     int nThis = number_rows(nb);
678     longDouble *aother;
679     int nLeft = n - nThis;
680     int nintri = (nb * (nb + 1)) >> 1;
681     int nbelow = (numberBlocks - nb) * nb;
682     ClpCholeskyCfactor(thisStruct, a, nThis, numberBlocks, diagonal, work, rowsDropped);
683     ClpCholeskyCtriRec(thisStruct, a, nThis, a + number_entries(nb), diagonal, work, nLeft, nb, 0, numberBlocks);
684     aother = a + number_entries(nintri + nbelow);
685     ClpCholeskyCrecTri(thisStruct, a + number_entries(nb), nLeft, nThis, nb, 0, aother, diagonal, work, numberBlocks);
686     ClpCholeskyCfactor(thisStruct, aother, nLeft,
687       numberBlocks - nb, diagonal + nThis, work + nThis, rowsDropped);
688   }
689 }
690 /* Non leaf recursive triangle rectangle update*/
ClpCholeskyCtriRec(ClpCholeskyDenseC * thisStruct,longDouble * aTri,int nThis,longDouble * aUnder,longDouble * diagonal,longDouble * work,int nLeft,int iBlock,int jBlock,int numberBlocks)691 void ClpCholeskyCtriRec(ClpCholeskyDenseC *thisStruct, longDouble *aTri, int nThis, longDouble *aUnder,
692   longDouble *diagonal, longDouble *work,
693   int nLeft, int iBlock, int jBlock,
694   int numberBlocks)
695 {
696   if (nThis <= BLOCK && nLeft <= BLOCK) {
697     ClpCholeskyCtriRecLeaf(/*thisStruct,*/ aTri, aUnder, diagonal, work, nLeft);
698   } else if (nThis < nLeft) {
699     int nb = number_blocks((nLeft + 1) >> 1);
700     int nLeft2 = number_rows(nb);
701     cilk_spawn ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder, diagonal, work, nLeft2, iBlock, jBlock, numberBlocks);
702     ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder + number_entries(nb), diagonal, work, nLeft - nLeft2,
703       iBlock + nb, jBlock, numberBlocks);
704     cilk_sync;
705   } else {
706     int nb = number_blocks((nThis + 1) >> 1);
707     int nThis2 = number_rows(nb);
708     longDouble *aother;
709     int kBlock = jBlock + nb;
710     int i;
711     int nintri = (nb * (nb + 1)) >> 1;
712     int nbelow = (numberBlocks - nb) * nb;
713     ClpCholeskyCtriRec(thisStruct, aTri, nThis2, aUnder, diagonal, work, nLeft, iBlock, jBlock, numberBlocks);
714     /* and rectangular update */
715     i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) - (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
716     aother = aUnder + number_entries(i);
717     ClpCholeskyCrecRec(thisStruct, aTri + number_entries(nb), nThis - nThis2, nLeft, nThis2, aUnder, aother,
718       work, kBlock, jBlock, numberBlocks);
719     ClpCholeskyCtriRec(thisStruct, aTri + number_entries(nintri + nbelow), nThis - nThis2, aother, diagonal + nThis2,
720       work + nThis2, nLeft,
721       iBlock - nb, kBlock - nb, numberBlocks - nb);
722   }
723 }
724 /* Non leaf recursive rectangle triangle update*/
ClpCholeskyCrecTri(ClpCholeskyDenseC * thisStruct,longDouble * aUnder,int nTri,int nDo,int iBlock,int jBlock,longDouble * aTri,longDouble * diagonal,longDouble * work,int numberBlocks)725 void ClpCholeskyCrecTri(ClpCholeskyDenseC *thisStruct, longDouble *aUnder, int nTri, int nDo,
726   int iBlock, int jBlock, longDouble *aTri,
727   longDouble *diagonal, longDouble *work,
728   int numberBlocks)
729 {
730   if (nTri <= BLOCK && nDo <= BLOCK) {
731     ClpCholeskyCrecTriLeaf(/*thisStruct,*/ aUnder, aTri, /*diagonal,*/ work, nTri);
732   } else if (nTri < nDo) {
733     int nb = number_blocks((nDo + 1) >> 1);
734     int nDo2 = number_rows(nb);
735     longDouble *aother;
736     int i;
737     ClpCholeskyCrecTri(thisStruct, aUnder, nTri, nDo2, iBlock, jBlock, aTri, diagonal, work, numberBlocks);
738     i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) - (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
739     aother = aUnder + number_entries(i);
740     ClpCholeskyCrecTri(thisStruct, aother, nTri, nDo - nDo2, iBlock - nb, jBlock, aTri, diagonal + nDo2,
741       work + nDo2, numberBlocks - nb);
742   } else {
743     int nb = number_blocks((nTri + 1) >> 1);
744     int nTri2 = number_rows(nb);
745     longDouble *aother;
746     int i;
747     cilk_spawn ClpCholeskyCrecTri(thisStruct, aUnder, nTri2, nDo, iBlock, jBlock, aTri, diagonal, work, numberBlocks);
748     /* and rectangular update */
749     i = ((numberBlocks - iBlock) * (numberBlocks - iBlock + 1) - (numberBlocks - iBlock - nb) * (numberBlocks - iBlock - nb + 1)) >> 1;
750     aother = aTri + number_entries(nb);
751     ClpCholeskyCrecRec(thisStruct, aUnder, nTri2, nTri - nTri2, nDo, aUnder + number_entries(nb), aother,
752       work, iBlock, jBlock, numberBlocks);
753     ClpCholeskyCrecTri(thisStruct, aUnder + number_entries(nb), nTri - nTri2, nDo, iBlock + nb, jBlock,
754       aTri + number_entries(i), diagonal, work, numberBlocks);
755     cilk_sync;
756   }
757 }
758 /* Non leaf recursive rectangle rectangle update,
759    nUnder is number of rows in iBlock,
760    nUnderK is number of rows in kBlock
761 */
ClpCholeskyCrecRec(ClpCholeskyDenseC * thisStruct,longDouble * above,int nUnder,int nUnderK,int nDo,longDouble * aUnder,longDouble * aOther,longDouble * work,int iBlock,int jBlock,int numberBlocks)762 void ClpCholeskyCrecRec(ClpCholeskyDenseC *thisStruct, longDouble *above, int nUnder, int nUnderK,
763   int nDo, longDouble *aUnder, longDouble *aOther,
764   longDouble *work,
765   int iBlock, int jBlock,
766   int numberBlocks)
767 {
768   if (nDo <= BLOCK && nUnder <= BLOCK && nUnderK <= BLOCK) {
769     assert(nDo == BLOCK && nUnder == BLOCK);
770     ClpCholeskyCrecRecLeaf(/*thisStruct,*/ above, aUnder, aOther, work, nUnderK);
771   } else if (nDo <= nUnderK && nUnder <= nUnderK) {
772     int nb = number_blocks((nUnderK + 1) >> 1);
773     int nUnder2 = number_rows(nb);
774     cilk_spawn ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnder2, nDo, aUnder, aOther, work,
775       iBlock, jBlock, numberBlocks);
776     ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK - nUnder2, nDo, aUnder + number_entries(nb),
777       aOther + number_entries(nb), work, iBlock, jBlock, numberBlocks);
778     cilk_sync;
779   } else if (nUnderK <= nDo && nUnder <= nDo) {
780     int nb = number_blocks((nDo + 1) >> 1);
781     int nDo2 = number_rows(nb);
782     int i;
783     ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK, nDo2, aUnder, aOther, work,
784       iBlock, jBlock, numberBlocks);
785     i = ((numberBlocks - jBlock) * (numberBlocks - jBlock - 1) - (numberBlocks - jBlock - nb) * (numberBlocks - jBlock - nb - 1)) >> 1;
786     ClpCholeskyCrecRec(thisStruct, above + number_entries(i), nUnder, nUnderK, nDo - nDo2,
787       aUnder + number_entries(i),
788       aOther, work + nDo2,
789       iBlock - nb, jBlock, numberBlocks - nb);
790   } else {
791     int nb = number_blocks((nUnder + 1) >> 1);
792     int nUnder2 = number_rows(nb);
793     int i;
794     cilk_spawn ClpCholeskyCrecRec(thisStruct, above, nUnder2, nUnderK, nDo, aUnder, aOther, work,
795       iBlock, jBlock, numberBlocks);
796     i = ((numberBlocks - iBlock) * (numberBlocks - iBlock - 1) - (numberBlocks - iBlock - nb) * (numberBlocks - iBlock - nb - 1)) >> 1;
797     ClpCholeskyCrecRec(thisStruct, above + number_entries(nb), nUnder - nUnder2, nUnderK, nDo, aUnder,
798       aOther + number_entries(i), work, iBlock + nb, jBlock, numberBlocks);
799     cilk_sync;
800   }
801 }
802 /* Leaf recursive factor*/
ClpCholeskyCfactorLeaf(ClpCholeskyDenseC * thisStruct,longDouble * a,int n,longDouble * diagonal,longDouble * work,int * rowsDropped)803 void ClpCholeskyCfactorLeaf(ClpCholeskyDenseC *thisStruct, longDouble *a, int n,
804   longDouble *diagonal, longDouble *work, int *rowsDropped)
805 {
806   double dropValue = thisStruct->doubleParameters_[0];
807   int firstPositive = thisStruct->integerParameters_[0];
808   int rowOffset = static_cast< int >(diagonal - thisStruct->diagonal_);
809   int i, j, k;
810   CoinWorkDouble t00, temp1;
811   longDouble *aa;
812   aa = a - BLOCK;
813   for (j = 0; j < n; j++) {
814     bool dropColumn;
815     CoinWorkDouble useT00;
816     aa += BLOCK;
817     t00 = aa[j];
818     for (k = 0; k < j; ++k) {
819       CoinWorkDouble multiplier = work[k];
820       t00 -= a[j + k * BLOCK] * a[j + k * BLOCK] * multiplier;
821     }
822     dropColumn = false;
823     useT00 = t00;
824     if (j + rowOffset < firstPositive) {
825       /* must be negative*/
826       if (t00 <= -dropValue) {
827         /*aa[j]=t00;*/
828         t00 = 1.0 / t00;
829       } else {
830         dropColumn = true;
831         /*aa[j]=-1.0e100;*/
832         useT00 = -1.0e-100;
833         t00 = 0.0;
834       }
835     } else {
836       /* must be positive*/
837       if (t00 >= dropValue) {
838         /*aa[j]=t00;*/
839         t00 = 1.0 / t00;
840       } else {
841         dropColumn = true;
842         /*aa[j]=1.0e100;*/
843         useT00 = 1.0e-100;
844         t00 = 0.0;
845       }
846     }
847     if (!dropColumn) {
848       diagonal[j] = t00;
849       work[j] = useT00;
850       temp1 = t00;
851       for (i = j + 1; i < n; i++) {
852         t00 = aa[i];
853         for (k = 0; k < j; ++k) {
854           CoinWorkDouble multiplier = work[k];
855           t00 -= a[i + k * BLOCK] * a[j + k * BLOCK] * multiplier;
856         }
857         aa[i] = t00 * temp1;
858       }
859     } else {
860       /* drop column*/
861       rowsDropped[j + rowOffset] = 2;
862       diagonal[j] = 0.0;
863       /*aa[j]=1.0e100;*/
864       work[j] = 1.0e100;
865       for (i = j + 1; i < n; i++) {
866         aa[i] = 0.0;
867       }
868     }
869   }
870 }
871 /* Leaf recursive triangle rectangle update*/
ClpCholeskyCtriRecLeaf(longDouble * aTri,longDouble * aUnder,longDouble * diagonal,longDouble * work,int nUnder)872 void ClpCholeskyCtriRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble *aTri, longDouble *aUnder, longDouble *diagonal, longDouble *work,
873   int nUnder)
874 {
875 #ifdef POS_DEBUG
876   int iru, icu;
877   int iu = bNumber(aUnder, iru, icu);
878   int irt, ict;
879   int it = bNumber(aTri, irt, ict);
880   /*printf("%d %d\n",iu,it);*/
881   printf("trirecleaf  under (%d,%d), tri (%d,%d)\n",
882     iru, icu, irt, ict);
883   assert(diagonal == thisStruct->diagonal_ + ict * BLOCK);
884 #endif
885   int j;
886   longDouble *aa;
887 #ifdef BLOCKUNROLL
888   if (nUnder == BLOCK) {
889     aa = aTri - 2 * BLOCK;
890     for (j = 0; j < BLOCK; j += 2) {
891       int i;
892       CoinWorkDouble temp0 = diagonal[j];
893       CoinWorkDouble temp1 = diagonal[j + 1];
894       aa += 2 * BLOCK;
895       for (i = 0; i < BLOCK; i += 2) {
896         CoinWorkDouble at1;
897         CoinWorkDouble t00 = aUnder[i + j * BLOCK];
898         CoinWorkDouble t10 = aUnder[i + BLOCK + j * BLOCK];
899         CoinWorkDouble t01 = aUnder[i + 1 + j * BLOCK];
900         CoinWorkDouble t11 = aUnder[i + 1 + BLOCK + j * BLOCK];
901         int k;
902         for (k = 0; k < j; ++k) {
903           CoinWorkDouble multiplier = work[k];
904           CoinWorkDouble au0 = aUnder[i + k * BLOCK] * multiplier;
905           CoinWorkDouble au1 = aUnder[i + 1 + k * BLOCK] * multiplier;
906           CoinWorkDouble at0 = aTri[j + k * BLOCK];
907           CoinWorkDouble at1 = aTri[j + 1 + k * BLOCK];
908           t00 -= au0 * at0;
909           t10 -= au0 * at1;
910           t01 -= au1 * at0;
911           t11 -= au1 * at1;
912         }
913         t00 *= temp0;
914         at1 = aTri[j + 1 + j * BLOCK] * work[j];
915         t10 -= t00 * at1;
916         t01 *= temp0;
917         t11 -= t01 * at1;
918         aUnder[i + j * BLOCK] = t00;
919         aUnder[i + 1 + j * BLOCK] = t01;
920         aUnder[i + BLOCK + j * BLOCK] = t10 * temp1;
921         aUnder[i + 1 + BLOCK + j * BLOCK] = t11 * temp1;
922       }
923     }
924   } else {
925 #endif
926     aa = aTri - BLOCK;
927     for (j = 0; j < BLOCK; j++) {
928       int i;
929       CoinWorkDouble temp1 = diagonal[j];
930       aa += BLOCK;
931       for (i = 0; i < nUnder; i++) {
932         int k;
933         CoinWorkDouble t00 = aUnder[i + j * BLOCK];
934         for (k = 0; k < j; ++k) {
935           CoinWorkDouble multiplier = work[k];
936           t00 -= aUnder[i + k * BLOCK] * aTri[j + k * BLOCK] * multiplier;
937         }
938         aUnder[i + j * BLOCK] = t00 * temp1;
939       }
940     }
941 #ifdef BLOCKUNROLL
942   }
943 #endif
944 }
945 /* Leaf recursive rectangle triangle update*/
ClpCholeskyCrecTriLeaf(longDouble * aUnder,longDouble * aTri,longDouble * work,int nUnder)946 void ClpCholeskyCrecTriLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble *aUnder, longDouble *aTri,
947   /*longDouble * diagonal,*/ longDouble *work, int nUnder)
948 {
949 #ifdef POS_DEBUG
950   int iru, icu;
951   int iu = bNumber(aUnder, iru, icu);
952   int irt, ict;
953   int it = bNumber(aTri, irt, ict);
954   /*printf("%d %d\n",iu,it);*/
955   printf("rectrileaf  under (%d,%d), tri (%d,%d)\n",
956     iru, icu, irt, ict);
957   assert(diagonal == thisStruct->diagonal_ + icu * BLOCK);
958 #endif
959   int i, j, k;
960   CoinWorkDouble t00;
961   longDouble *aa;
962 #ifdef BLOCKUNROLL
963   if (nUnder == BLOCK) {
964     longDouble *aUnder2 = aUnder - 2;
965     aa = aTri - 2 * BLOCK;
966     for (j = 0; j < BLOCK; j += 2) {
967       CoinWorkDouble t00, t01, t10, t11;
968       aa += 2 * BLOCK;
969       aUnder2 += 2;
970       t00 = aa[j];
971       t01 = aa[j + 1];
972       t10 = aa[j + 1 + BLOCK];
973       for (k = 0; k < BLOCK; ++k) {
974         CoinWorkDouble multiplier = work[k];
975         CoinWorkDouble a0 = aUnder2[k * BLOCK];
976         CoinWorkDouble a1 = aUnder2[1 + k * BLOCK];
977         CoinWorkDouble x0 = a0 * multiplier;
978         CoinWorkDouble x1 = a1 * multiplier;
979         t00 -= a0 * x0;
980         t01 -= a1 * x0;
981         t10 -= a1 * x1;
982       }
983       aa[j] = t00;
984       aa[j + 1] = t01;
985       aa[j + 1 + BLOCK] = t10;
986       for (i = j + 2; i < BLOCK; i += 2) {
987         t00 = aa[i];
988         t01 = aa[i + BLOCK];
989         t10 = aa[i + 1];
990         t11 = aa[i + 1 + BLOCK];
991         for (k = 0; k < BLOCK; ++k) {
992           CoinWorkDouble multiplier = work[k];
993           CoinWorkDouble a0 = aUnder2[k * BLOCK] * multiplier;
994           CoinWorkDouble a1 = aUnder2[1 + k * BLOCK] * multiplier;
995           t00 -= aUnder[i + k * BLOCK] * a0;
996           t01 -= aUnder[i + k * BLOCK] * a1;
997           t10 -= aUnder[i + 1 + k * BLOCK] * a0;
998           t11 -= aUnder[i + 1 + k * BLOCK] * a1;
999         }
1000         aa[i] = t00;
1001         aa[i + BLOCK] = t01;
1002         aa[i + 1] = t10;
1003         aa[i + 1 + BLOCK] = t11;
1004       }
1005     }
1006   } else {
1007 #endif
1008     aa = aTri - BLOCK;
1009     for (j = 0; j < nUnder; j++) {
1010       aa += BLOCK;
1011       for (i = j; i < nUnder; i++) {
1012         t00 = aa[i];
1013         for (k = 0; k < BLOCK; ++k) {
1014           CoinWorkDouble multiplier = work[k];
1015           t00 -= aUnder[i + k * BLOCK] * aUnder[j + k * BLOCK] * multiplier;
1016         }
1017         aa[i] = t00;
1018       }
1019     }
1020 #ifdef BLOCKUNROLL
1021   }
1022 #endif
1023 }
1024 /* Leaf recursive rectangle rectangle update,
1025    nUnder is number of rows in iBlock,
1026    nUnderK is number of rows in kBlock
1027 */
ClpCholeskyCrecRecLeaf(const longDouble * COIN_RESTRICT above,const longDouble * COIN_RESTRICT aUnder,longDouble * COIN_RESTRICT aOther,const longDouble * COIN_RESTRICT work,int nUnder)1028 void ClpCholeskyCrecRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/
1029   const longDouble *COIN_RESTRICT above,
1030   const longDouble *COIN_RESTRICT aUnder,
1031   longDouble *COIN_RESTRICT aOther,
1032   const longDouble *COIN_RESTRICT work,
1033   int nUnder)
1034 {
1035 #ifdef POS_DEBUG
1036   int ira, ica;
1037   int ia = bNumber(above, ira, ica);
1038   int iru, icu;
1039   int iu = bNumber(aUnder, iru, icu);
1040   int iro, ico;
1041   int io = bNumber(aOther, iro, ico);
1042   /*printf("%d %d %d\n",ia,iu,io);*/
1043   printf("recrecleaf above (%d,%d), under (%d,%d), other (%d,%d)\n",
1044     ira, ica, iru, icu, iro, ico);
1045 #endif
1046   int i, j, k;
1047   longDouble *aa;
1048 #ifdef BLOCKUNROLL
1049   aa = aOther - 4 * BLOCK;
1050   if (nUnder == BLOCK) {
1051     /*#define INTEL*/
1052 #ifdef INTEL
1053     aa += 2 * BLOCK;
1054     for (j = 0; j < BLOCK; j += 2) {
1055       aa += 2 * BLOCK;
1056       for (i = 0; i < BLOCK; i += 2) {
1057         CoinWorkDouble t00 = aa[i + 0 * BLOCK];
1058         CoinWorkDouble t10 = aa[i + 1 * BLOCK];
1059         CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK];
1060         CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK];
1061         for (k = 0; k < BLOCK; k++) {
1062           CoinWorkDouble multiplier = work[k];
1063           CoinWorkDouble a00 = aUnder[i + k * BLOCK] * multiplier;
1064           CoinWorkDouble a01 = aUnder[i + 1 + k * BLOCK] * multiplier;
1065           t00 -= a00 * above[j + 0 + k * BLOCK];
1066           t10 -= a00 * above[j + 1 + k * BLOCK];
1067           t01 -= a01 * above[j + 0 + k * BLOCK];
1068           t11 -= a01 * above[j + 1 + k * BLOCK];
1069         }
1070         aa[i + 0 * BLOCK] = t00;
1071         aa[i + 1 * BLOCK] = t10;
1072         aa[i + 1 + 0 * BLOCK] = t01;
1073         aa[i + 1 + 1 * BLOCK] = t11;
1074       }
1075     }
1076 #else
1077     for (j = 0; j < BLOCK; j += 4) {
1078       aa += 4 * BLOCK;
1079       for (i = 0; i < BLOCK; i += 4) {
1080         CoinWorkDouble t00 = aa[i + 0 + 0 * BLOCK];
1081         CoinWorkDouble t10 = aa[i + 0 + 1 * BLOCK];
1082         CoinWorkDouble t20 = aa[i + 0 + 2 * BLOCK];
1083         CoinWorkDouble t30 = aa[i + 0 + 3 * BLOCK];
1084         CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK];
1085         CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK];
1086         CoinWorkDouble t21 = aa[i + 1 + 2 * BLOCK];
1087         CoinWorkDouble t31 = aa[i + 1 + 3 * BLOCK];
1088         CoinWorkDouble t02 = aa[i + 2 + 0 * BLOCK];
1089         CoinWorkDouble t12 = aa[i + 2 + 1 * BLOCK];
1090         CoinWorkDouble t22 = aa[i + 2 + 2 * BLOCK];
1091         CoinWorkDouble t32 = aa[i + 2 + 3 * BLOCK];
1092         CoinWorkDouble t03 = aa[i + 3 + 0 * BLOCK];
1093         CoinWorkDouble t13 = aa[i + 3 + 1 * BLOCK];
1094         CoinWorkDouble t23 = aa[i + 3 + 2 * BLOCK];
1095         CoinWorkDouble t33 = aa[i + 3 + 3 * BLOCK];
1096         const longDouble *COIN_RESTRICT aUnderNow = aUnder + i;
1097         const longDouble *COIN_RESTRICT aboveNow = above + j;
1098         for (k = 0; k < BLOCK; k++) {
1099           CoinWorkDouble multiplier = work[k];
1100           CoinWorkDouble a00 = aUnderNow[0] * multiplier;
1101           CoinWorkDouble a01 = aUnderNow[1] * multiplier;
1102           CoinWorkDouble a02 = aUnderNow[2] * multiplier;
1103           CoinWorkDouble a03 = aUnderNow[3] * multiplier;
1104           t00 -= a00 * aboveNow[0];
1105           t10 -= a00 * aboveNow[1];
1106           t20 -= a00 * aboveNow[2];
1107           t30 -= a00 * aboveNow[3];
1108           t01 -= a01 * aboveNow[0];
1109           t11 -= a01 * aboveNow[1];
1110           t21 -= a01 * aboveNow[2];
1111           t31 -= a01 * aboveNow[3];
1112           t02 -= a02 * aboveNow[0];
1113           t12 -= a02 * aboveNow[1];
1114           t22 -= a02 * aboveNow[2];
1115           t32 -= a02 * aboveNow[3];
1116           t03 -= a03 * aboveNow[0];
1117           t13 -= a03 * aboveNow[1];
1118           t23 -= a03 * aboveNow[2];
1119           t33 -= a03 * aboveNow[3];
1120           aUnderNow += BLOCK;
1121           aboveNow += BLOCK;
1122         }
1123         aa[i + 0 + 0 * BLOCK] = t00;
1124         aa[i + 0 + 1 * BLOCK] = t10;
1125         aa[i + 0 + 2 * BLOCK] = t20;
1126         aa[i + 0 + 3 * BLOCK] = t30;
1127         aa[i + 1 + 0 * BLOCK] = t01;
1128         aa[i + 1 + 1 * BLOCK] = t11;
1129         aa[i + 1 + 2 * BLOCK] = t21;
1130         aa[i + 1 + 3 * BLOCK] = t31;
1131         aa[i + 2 + 0 * BLOCK] = t02;
1132         aa[i + 2 + 1 * BLOCK] = t12;
1133         aa[i + 2 + 2 * BLOCK] = t22;
1134         aa[i + 2 + 3 * BLOCK] = t32;
1135         aa[i + 3 + 0 * BLOCK] = t03;
1136         aa[i + 3 + 1 * BLOCK] = t13;
1137         aa[i + 3 + 2 * BLOCK] = t23;
1138         aa[i + 3 + 3 * BLOCK] = t33;
1139       }
1140     }
1141 #endif
1142   } else {
1143     int odd = nUnder & 1;
1144     int n = nUnder - odd;
1145     for (j = 0; j < BLOCK; j += 4) {
1146       aa += 4 * BLOCK;
1147       for (i = 0; i < n; i += 2) {
1148         CoinWorkDouble t00 = aa[i + 0 * BLOCK];
1149         CoinWorkDouble t10 = aa[i + 1 * BLOCK];
1150         CoinWorkDouble t20 = aa[i + 2 * BLOCK];
1151         CoinWorkDouble t30 = aa[i + 3 * BLOCK];
1152         CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK];
1153         CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK];
1154         CoinWorkDouble t21 = aa[i + 1 + 2 * BLOCK];
1155         CoinWorkDouble t31 = aa[i + 1 + 3 * BLOCK];
1156         const longDouble *COIN_RESTRICT aUnderNow = aUnder + i;
1157         const longDouble *COIN_RESTRICT aboveNow = above + j;
1158         for (k = 0; k < BLOCK; k++) {
1159           CoinWorkDouble multiplier = work[k];
1160           CoinWorkDouble a00 = aUnderNow[0] * multiplier;
1161           CoinWorkDouble a01 = aUnderNow[1] * multiplier;
1162           t00 -= a00 * aboveNow[0];
1163           t10 -= a00 * aboveNow[1];
1164           t20 -= a00 * aboveNow[2];
1165           t30 -= a00 * aboveNow[3];
1166           t01 -= a01 * aboveNow[0];
1167           t11 -= a01 * aboveNow[1];
1168           t21 -= a01 * aboveNow[2];
1169           t31 -= a01 * aboveNow[3];
1170           aUnderNow += BLOCK;
1171           aboveNow += BLOCK;
1172         }
1173         aa[i + 0 * BLOCK] = t00;
1174         aa[i + 1 * BLOCK] = t10;
1175         aa[i + 2 * BLOCK] = t20;
1176         aa[i + 3 * BLOCK] = t30;
1177         aa[i + 1 + 0 * BLOCK] = t01;
1178         aa[i + 1 + 1 * BLOCK] = t11;
1179         aa[i + 1 + 2 * BLOCK] = t21;
1180         aa[i + 1 + 3 * BLOCK] = t31;
1181       }
1182       if (odd) {
1183         CoinWorkDouble t0 = aa[n + 0 * BLOCK];
1184         CoinWorkDouble t1 = aa[n + 1 * BLOCK];
1185         CoinWorkDouble t2 = aa[n + 2 * BLOCK];
1186         CoinWorkDouble t3 = aa[n + 3 * BLOCK];
1187         CoinWorkDouble a0;
1188         for (k = 0; k < BLOCK; k++) {
1189           a0 = aUnder[n + k * BLOCK] * work[k];
1190           t0 -= a0 * above[j + 0 + k * BLOCK];
1191           t1 -= a0 * above[j + 1 + k * BLOCK];
1192           t2 -= a0 * above[j + 2 + k * BLOCK];
1193           t3 -= a0 * above[j + 3 + k * BLOCK];
1194         }
1195         aa[n + 0 * BLOCK] = t0;
1196         aa[n + 1 * BLOCK] = t1;
1197         aa[n + 2 * BLOCK] = t2;
1198         aa[n + 3 * BLOCK] = t3;
1199       }
1200     }
1201   }
1202 #else
1203   aa = aOther - BLOCK;
1204   for (j = 0; j < BLOCK; j++) {
1205     aa += BLOCK;
1206     for (i = 0; i < nUnder; i++) {
1207       CoinWorkDouble t00 = aa[i + 0 * BLOCK];
1208       for (k = 0; k < BLOCK; k++) {
1209         CoinWorkDouble a00 = aUnder[i + k * BLOCK] * work[k];
1210         t00 -= a00 * above[j + k * BLOCK];
1211       }
1212       aa[i] = t00;
1213     }
1214   }
1215 #endif
1216 }
1217 /* Uses factorization to solve. */
solve(CoinWorkDouble * region)1218 void ClpCholeskyDense::solve(CoinWorkDouble *region)
1219 {
1220 #ifdef CHOL_COMPARE
1221   double *region2 = NULL;
1222   if (numberRows_ < 200) {
1223     longDouble *xx = sparseFactor_;
1224     longDouble *yy = diagonal_;
1225     diagonal_ = sparseFactor_ + 40000;
1226     sparseFactor_ = diagonal_ + numberRows_;
1227     region2 = ClpCopyOfArray(region, numberRows_);
1228     int iRow, iColumn;
1229     int addOffset = numberRows_ - 1;
1230     longDouble *work = sparseFactor_ - 1;
1231     for (iColumn = 0; iColumn < numberRows_; iColumn++) {
1232       double value = region2[iColumn];
1233       for (iRow = iColumn + 1; iRow < numberRows_; iRow++)
1234         region2[iRow] -= value * work[iRow];
1235       addOffset--;
1236       work += addOffset;
1237     }
1238     for (iColumn = numberRows_ - 1; iColumn >= 0; iColumn--) {
1239       double value = region2[iColumn] * diagonal_[iColumn];
1240       work -= addOffset;
1241       addOffset++;
1242       for (iRow = iColumn + 1; iRow < numberRows_; iRow++)
1243         value -= region2[iRow] * work[iRow];
1244       region2[iColumn] = value;
1245     }
1246     sparseFactor_ = xx;
1247     diagonal_ = yy;
1248   }
1249 #endif
1250   /*longDouble * xx = sparseFactor_;*/
1251   /*longDouble * yy = diagonal_;*/
1252   /*diagonal_ = sparseFactor_ + 40000;*/
1253   /*sparseFactor_=diagonal_ + numberRows_;*/
1254   int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT;
1255   /* later align on boundary*/
1256   longDouble *a = sparseFactor_ + BLOCKSQ * numberBlocks;
1257   int iBlock;
1258   longDouble *aa = a;
1259   int iColumn;
1260   for (iBlock = 0; iBlock < numberBlocks; iBlock++) {
1261     int nChunk;
1262     int jBlock;
1263     int iDo = iBlock * BLOCK;
1264     int base = iDo;
1265     if (iDo + BLOCK > numberRows_) {
1266       nChunk = numberRows_ - iDo;
1267     } else {
1268       nChunk = BLOCK;
1269     }
1270     solveF1(aa, nChunk, region + iDo);
1271     for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) {
1272       base += BLOCK;
1273       aa += BLOCKSQ;
1274       if (base + BLOCK > numberRows_) {
1275         nChunk = numberRows_ - base;
1276       } else {
1277         nChunk = BLOCK;
1278       }
1279       solveF2(aa, nChunk, region + iDo, region + base);
1280     }
1281     aa += BLOCKSQ;
1282   }
1283   /* do diagonal outside*/
1284   for (iColumn = 0; iColumn < numberRows_; iColumn++)
1285     region[iColumn] *= diagonal_[iColumn];
1286   int offset = ((numberBlocks * (numberBlocks + 1)) >> 1);
1287   aa = a + number_entries(offset - 1);
1288   int lBase = (numberBlocks - 1) * BLOCK;
1289   for (iBlock = numberBlocks - 1; iBlock >= 0; iBlock--) {
1290     int nChunk;
1291     int jBlock;
1292     int triBase = iBlock * BLOCK;
1293     int iBase = lBase;
1294     for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) {
1295       if (iBase + BLOCK > numberRows_) {
1296         nChunk = numberRows_ - iBase;
1297       } else {
1298         nChunk = BLOCK;
1299       }
1300       solveB2(aa, nChunk, region + triBase, region + iBase);
1301       iBase -= BLOCK;
1302       aa -= BLOCKSQ;
1303     }
1304     if (triBase + BLOCK > numberRows_) {
1305       nChunk = numberRows_ - triBase;
1306     } else {
1307       nChunk = BLOCK;
1308     }
1309     solveB1(aa, nChunk, region + triBase);
1310     aa -= BLOCKSQ;
1311   }
1312 #ifdef CHOL_COMPARE
1313   if (numberRows_ < 200) {
1314     for (int i = 0; i < numberRows_; i++) {
1315       assert(CoinAbs(region[i] - region2[i]) < 1.0e-3);
1316     }
1317     delete[] region2;
1318   }
1319 #endif
1320 }
1321 /* Forward part of solve 1*/
solveF1(longDouble * a,int n,CoinWorkDouble * region)1322 void ClpCholeskyDense::solveF1(longDouble *a, int n, CoinWorkDouble *region)
1323 {
1324   int j, k;
1325   CoinWorkDouble t00;
1326   for (j = 0; j < n; j++) {
1327     t00 = region[j];
1328     for (k = 0; k < j; ++k) {
1329       t00 -= region[k] * a[j + k * BLOCK];
1330     }
1331     /*t00*=a[j + j * BLOCK];*/
1332     region[j] = t00;
1333   }
1334 }
1335 /* Forward part of solve 2*/
solveF2(longDouble * a,int n,CoinWorkDouble * region,CoinWorkDouble * region2)1336 void ClpCholeskyDense::solveF2(longDouble *a, int n, CoinWorkDouble *region, CoinWorkDouble *region2)
1337 {
1338   int j, k;
1339 #ifdef BLOCKUNROLL
1340   if (n == BLOCK) {
1341     for (k = 0; k < BLOCK; k += 4) {
1342       CoinWorkDouble t0 = region2[0];
1343       CoinWorkDouble t1 = region2[1];
1344       CoinWorkDouble t2 = region2[2];
1345       CoinWorkDouble t3 = region2[3];
1346       t0 -= region[0] * a[0 + 0 * BLOCK];
1347       t1 -= region[0] * a[1 + 0 * BLOCK];
1348       t2 -= region[0] * a[2 + 0 * BLOCK];
1349       t3 -= region[0] * a[3 + 0 * BLOCK];
1350 
1351       t0 -= region[1] * a[0 + 1 * BLOCK];
1352       t1 -= region[1] * a[1 + 1 * BLOCK];
1353       t2 -= region[1] * a[2 + 1 * BLOCK];
1354       t3 -= region[1] * a[3 + 1 * BLOCK];
1355 
1356       t0 -= region[2] * a[0 + 2 * BLOCK];
1357       t1 -= region[2] * a[1 + 2 * BLOCK];
1358       t2 -= region[2] * a[2 + 2 * BLOCK];
1359       t3 -= region[2] * a[3 + 2 * BLOCK];
1360 
1361       t0 -= region[3] * a[0 + 3 * BLOCK];
1362       t1 -= region[3] * a[1 + 3 * BLOCK];
1363       t2 -= region[3] * a[2 + 3 * BLOCK];
1364       t3 -= region[3] * a[3 + 3 * BLOCK];
1365 
1366       t0 -= region[4] * a[0 + 4 * BLOCK];
1367       t1 -= region[4] * a[1 + 4 * BLOCK];
1368       t2 -= region[4] * a[2 + 4 * BLOCK];
1369       t3 -= region[4] * a[3 + 4 * BLOCK];
1370 
1371       t0 -= region[5] * a[0 + 5 * BLOCK];
1372       t1 -= region[5] * a[1 + 5 * BLOCK];
1373       t2 -= region[5] * a[2 + 5 * BLOCK];
1374       t3 -= region[5] * a[3 + 5 * BLOCK];
1375 
1376       t0 -= region[6] * a[0 + 6 * BLOCK];
1377       t1 -= region[6] * a[1 + 6 * BLOCK];
1378       t2 -= region[6] * a[2 + 6 * BLOCK];
1379       t3 -= region[6] * a[3 + 6 * BLOCK];
1380 
1381       t0 -= region[7] * a[0 + 7 * BLOCK];
1382       t1 -= region[7] * a[1 + 7 * BLOCK];
1383       t2 -= region[7] * a[2 + 7 * BLOCK];
1384       t3 -= region[7] * a[3 + 7 * BLOCK];
1385 #if BLOCK > 8
1386       t0 -= region[8] * a[0 + 8 * BLOCK];
1387       t1 -= region[8] * a[1 + 8 * BLOCK];
1388       t2 -= region[8] * a[2 + 8 * BLOCK];
1389       t3 -= region[8] * a[3 + 8 * BLOCK];
1390 
1391       t0 -= region[9] * a[0 + 9 * BLOCK];
1392       t1 -= region[9] * a[1 + 9 * BLOCK];
1393       t2 -= region[9] * a[2 + 9 * BLOCK];
1394       t3 -= region[9] * a[3 + 9 * BLOCK];
1395 
1396       t0 -= region[10] * a[0 + 10 * BLOCK];
1397       t1 -= region[10] * a[1 + 10 * BLOCK];
1398       t2 -= region[10] * a[2 + 10 * BLOCK];
1399       t3 -= region[10] * a[3 + 10 * BLOCK];
1400 
1401       t0 -= region[11] * a[0 + 11 * BLOCK];
1402       t1 -= region[11] * a[1 + 11 * BLOCK];
1403       t2 -= region[11] * a[2 + 11 * BLOCK];
1404       t3 -= region[11] * a[3 + 11 * BLOCK];
1405 
1406       t0 -= region[12] * a[0 + 12 * BLOCK];
1407       t1 -= region[12] * a[1 + 12 * BLOCK];
1408       t2 -= region[12] * a[2 + 12 * BLOCK];
1409       t3 -= region[12] * a[3 + 12 * BLOCK];
1410 
1411       t0 -= region[13] * a[0 + 13 * BLOCK];
1412       t1 -= region[13] * a[1 + 13 * BLOCK];
1413       t2 -= region[13] * a[2 + 13 * BLOCK];
1414       t3 -= region[13] * a[3 + 13 * BLOCK];
1415 
1416       t0 -= region[14] * a[0 + 14 * BLOCK];
1417       t1 -= region[14] * a[1 + 14 * BLOCK];
1418       t2 -= region[14] * a[2 + 14 * BLOCK];
1419       t3 -= region[14] * a[3 + 14 * BLOCK];
1420 
1421       t0 -= region[15] * a[0 + 15 * BLOCK];
1422       t1 -= region[15] * a[1 + 15 * BLOCK];
1423       t2 -= region[15] * a[2 + 15 * BLOCK];
1424       t3 -= region[15] * a[3 + 15 * BLOCK];
1425 #endif
1426       region2[0] = t0;
1427       region2[1] = t1;
1428       region2[2] = t2;
1429       region2[3] = t3;
1430       region2 += 4;
1431       a += 4;
1432     }
1433   } else {
1434 #endif
1435     for (k = 0; k < n; ++k) {
1436       CoinWorkDouble t00 = region2[k];
1437       for (j = 0; j < BLOCK; j++) {
1438         t00 -= region[j] * a[k + j * BLOCK];
1439       }
1440       region2[k] = t00;
1441     }
1442 #ifdef BLOCKUNROLL
1443   }
1444 #endif
1445 }
1446 /* Backward part of solve 1*/
solveB1(longDouble * a,int n,CoinWorkDouble * region)1447 void ClpCholeskyDense::solveB1(longDouble *a, int n, CoinWorkDouble *region)
1448 {
1449   int j, k;
1450   CoinWorkDouble t00;
1451   for (j = n - 1; j >= 0; j--) {
1452     t00 = region[j];
1453     for (k = j + 1; k < n; ++k) {
1454       t00 -= region[k] * a[k + j * BLOCK];
1455     }
1456     /*t00*=a[j + j * BLOCK];*/
1457     region[j] = t00;
1458   }
1459 }
1460 /* Backward part of solve 2*/
solveB2(longDouble * a,int n,CoinWorkDouble * region,CoinWorkDouble * region2)1461 void ClpCholeskyDense::solveB2(longDouble *a, int n, CoinWorkDouble *region, CoinWorkDouble *region2)
1462 {
1463   int j, k;
1464 #ifdef BLOCKUNROLL
1465   if (n == BLOCK) {
1466     for (j = 0; j < BLOCK; j += 4) {
1467       CoinWorkDouble t0 = region[0];
1468       CoinWorkDouble t1 = region[1];
1469       CoinWorkDouble t2 = region[2];
1470       CoinWorkDouble t3 = region[3];
1471       t0 -= region2[0] * a[0 + 0 * BLOCK];
1472       t1 -= region2[0] * a[0 + 1 * BLOCK];
1473       t2 -= region2[0] * a[0 + 2 * BLOCK];
1474       t3 -= region2[0] * a[0 + 3 * BLOCK];
1475 
1476       t0 -= region2[1] * a[1 + 0 * BLOCK];
1477       t1 -= region2[1] * a[1 + 1 * BLOCK];
1478       t2 -= region2[1] * a[1 + 2 * BLOCK];
1479       t3 -= region2[1] * a[1 + 3 * BLOCK];
1480 
1481       t0 -= region2[2] * a[2 + 0 * BLOCK];
1482       t1 -= region2[2] * a[2 + 1 * BLOCK];
1483       t2 -= region2[2] * a[2 + 2 * BLOCK];
1484       t3 -= region2[2] * a[2 + 3 * BLOCK];
1485 
1486       t0 -= region2[3] * a[3 + 0 * BLOCK];
1487       t1 -= region2[3] * a[3 + 1 * BLOCK];
1488       t2 -= region2[3] * a[3 + 2 * BLOCK];
1489       t3 -= region2[3] * a[3 + 3 * BLOCK];
1490 
1491       t0 -= region2[4] * a[4 + 0 * BLOCK];
1492       t1 -= region2[4] * a[4 + 1 * BLOCK];
1493       t2 -= region2[4] * a[4 + 2 * BLOCK];
1494       t3 -= region2[4] * a[4 + 3 * BLOCK];
1495 
1496       t0 -= region2[5] * a[5 + 0 * BLOCK];
1497       t1 -= region2[5] * a[5 + 1 * BLOCK];
1498       t2 -= region2[5] * a[5 + 2 * BLOCK];
1499       t3 -= region2[5] * a[5 + 3 * BLOCK];
1500 
1501       t0 -= region2[6] * a[6 + 0 * BLOCK];
1502       t1 -= region2[6] * a[6 + 1 * BLOCK];
1503       t2 -= region2[6] * a[6 + 2 * BLOCK];
1504       t3 -= region2[6] * a[6 + 3 * BLOCK];
1505 
1506       t0 -= region2[7] * a[7 + 0 * BLOCK];
1507       t1 -= region2[7] * a[7 + 1 * BLOCK];
1508       t2 -= region2[7] * a[7 + 2 * BLOCK];
1509       t3 -= region2[7] * a[7 + 3 * BLOCK];
1510 #if BLOCK > 8
1511 
1512       t0 -= region2[8] * a[8 + 0 * BLOCK];
1513       t1 -= region2[8] * a[8 + 1 * BLOCK];
1514       t2 -= region2[8] * a[8 + 2 * BLOCK];
1515       t3 -= region2[8] * a[8 + 3 * BLOCK];
1516 
1517       t0 -= region2[9] * a[9 + 0 * BLOCK];
1518       t1 -= region2[9] * a[9 + 1 * BLOCK];
1519       t2 -= region2[9] * a[9 + 2 * BLOCK];
1520       t3 -= region2[9] * a[9 + 3 * BLOCK];
1521 
1522       t0 -= region2[10] * a[10 + 0 * BLOCK];
1523       t1 -= region2[10] * a[10 + 1 * BLOCK];
1524       t2 -= region2[10] * a[10 + 2 * BLOCK];
1525       t3 -= region2[10] * a[10 + 3 * BLOCK];
1526 
1527       t0 -= region2[11] * a[11 + 0 * BLOCK];
1528       t1 -= region2[11] * a[11 + 1 * BLOCK];
1529       t2 -= region2[11] * a[11 + 2 * BLOCK];
1530       t3 -= region2[11] * a[11 + 3 * BLOCK];
1531 
1532       t0 -= region2[12] * a[12 + 0 * BLOCK];
1533       t1 -= region2[12] * a[12 + 1 * BLOCK];
1534       t2 -= region2[12] * a[12 + 2 * BLOCK];
1535       t3 -= region2[12] * a[12 + 3 * BLOCK];
1536 
1537       t0 -= region2[13] * a[13 + 0 * BLOCK];
1538       t1 -= region2[13] * a[13 + 1 * BLOCK];
1539       t2 -= region2[13] * a[13 + 2 * BLOCK];
1540       t3 -= region2[13] * a[13 + 3 * BLOCK];
1541 
1542       t0 -= region2[14] * a[14 + 0 * BLOCK];
1543       t1 -= region2[14] * a[14 + 1 * BLOCK];
1544       t2 -= region2[14] * a[14 + 2 * BLOCK];
1545       t3 -= region2[14] * a[14 + 3 * BLOCK];
1546 
1547       t0 -= region2[15] * a[15 + 0 * BLOCK];
1548       t1 -= region2[15] * a[15 + 1 * BLOCK];
1549       t2 -= region2[15] * a[15 + 2 * BLOCK];
1550       t3 -= region2[15] * a[15 + 3 * BLOCK];
1551 #endif
1552       region[0] = t0;
1553       region[1] = t1;
1554       region[2] = t2;
1555       region[3] = t3;
1556       a += 4 * BLOCK;
1557       region += 4;
1558     }
1559   } else {
1560 #endif
1561     for (j = 0; j < BLOCK; j++) {
1562       CoinWorkDouble t00 = region[j];
1563       for (k = 0; k < n; ++k) {
1564         t00 -= region2[k] * a[k + j * BLOCK];
1565       }
1566       region[j] = t00;
1567     }
1568 #ifdef BLOCKUNROLL
1569   }
1570 #endif
1571 }
1572 
1573 /* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
1574 */
1575