1 /* $Id: ClpPackedMatrix.cpp 2468 2019-05-03 04:31:14Z stefan $ */
2 // Copyright (C) 2002, International Business Machines
3 // Corporation and others.  All Rights Reserved.
4 // This code is licensed under the terms of the Eclipse Public License (EPL).
5 
6 #include <cstdio>
7 
8 #include "CoinPragma.hpp"
9 #include "CoinIndexedVector.hpp"
10 #include "CoinHelperFunctions.hpp"
11 //#define THREAD
12 //#define FAKE_CILK
13 #if ABOCA_LITE
14 // 1 is not owner of abcState_
15 #define ABCSTATE_LITE 1
16 #endif
17 #include "ClpSimplex.hpp"
18 #include "ClpSimplexDual.hpp"
19 #include "ClpFactorization.hpp"
20 #ifndef SLIM_CLP
21 #include "ClpQuadraticObjective.hpp"
22 #endif
23 // at end to get min/max!
24 #include "ClpPackedMatrix.hpp"
25 #include "ClpMessage.hpp"
26 #ifdef INTEL_MKL
27 #include "mkl_spblas.h"
28 #endif
29 //#define DO_CHECK_FLAGS 1
30 //=============================================================================
31 #ifdef COIN_PREFETCH
32 #if 1
33 #define coin_prefetch(mem)              \
34   __asm__ __volatile__("prefetchnta %0" \
35                        :                \
36                        : "m"(*(reinterpret_cast< char * >(mem))))
37 #define coin_prefetch_const(mem)        \
38   __asm__ __volatile__("prefetchnta %0" \
39                        :                \
40                        : "m"(*(reinterpret_cast< const char * >(mem))))
41 #else
42 #define coin_prefetch(mem)           \
43   __asm__ __volatile__("prefetch %0" \
44                        :             \
45                        : "m"(*(reinterpret_cast< char * >(mem))))
46 #define coin_prefetch_const(mem)     \
47   __asm__ __volatile__("prefetch %0" \
48                        :             \
49                        : "m"(*(reinterpret_cast< const char * >(mem))))
50 #endif
51 #else
52 // dummy
53 #define coin_prefetch(mem)
54 #define coin_prefetch_const(mem)
55 #endif
56 
57 //#############################################################################
58 // Constructors / Destructor / Assignment
59 //#############################################################################
debug3(int iColumn,double & thisWeight,double pivotSquared,double devex,double pivot,double modification,double oldWeight)60 static void debug3(int iColumn, double &thisWeight, double pivotSquared, double devex,
61   double pivot, double modification, double oldWeight)
62 {
63   //double newWeight=oldWeight+pivotSquared * devex + pivot * modification;
64   //if (/*iColumn==34845 && */newWeight!=thisWeight)
65   //printf("YY %d %.30g %.30g %.30g %.30g %.30g %.30g\n",
66   //	   iColumn,thisWeight,pivotSquared,devex,pivot,modification,oldWeight);
67   thisWeight = oldWeight;
68   thisWeight += pivotSquared * devex + pivot * modification;
69 }
70 //-------------------------------------------------------------------
71 // Default Constructor
72 //-------------------------------------------------------------------
ClpPackedMatrix()73 ClpPackedMatrix::ClpPackedMatrix()
74   : ClpMatrixBase()
75   , matrix_(NULL)
76   , numberActiveColumns_(0)
77   , flags_(2)
78   , rowCopy_(NULL)
79   , columnCopy_(NULL)
80 {
81   setType(1);
82 }
83 
84 //-------------------------------------------------------------------
85 // Copy constructor
86 //-------------------------------------------------------------------
ClpPackedMatrix(const ClpPackedMatrix & rhs)87 ClpPackedMatrix::ClpPackedMatrix(const ClpPackedMatrix &rhs)
88   : ClpMatrixBase(rhs)
89 {
90 #ifdef DO_CHECK_FLAGS
91   rhs.checkFlags(0);
92 #endif
93 #ifndef COIN_SPARSE_MATRIX
94   // Guaranteed no gaps or small elements
95   matrix_ = new CoinPackedMatrix(*(rhs.matrix_), -1, 0);
96   flags_ = rhs.flags_ & (~0x02);
97 #else
98   // Gaps & small elements preserved
99   matrix_ = new CoinPackedMatrix(*(rhs.matrix_), 0, 0);
100   flags_ = rhs.flags_;
101   if (matrix_->hasGaps())
102     flags_ |= 0x02;
103 #endif
104   numberActiveColumns_ = rhs.numberActiveColumns_;
105   int numberRows = matrix_->getNumRows();
106   if (rhs.rhsOffset_ && numberRows) {
107     rhsOffset_ = ClpCopyOfArray(rhs.rhsOffset_, numberRows);
108   } else {
109     rhsOffset_ = NULL;
110   }
111   if (rhs.rowCopy_) {
112     assert((flags_ & 4) != 0);
113     rowCopy_ = new ClpPackedMatrix2(*rhs.rowCopy_);
114   } else {
115     rowCopy_ = NULL;
116   }
117   if (rhs.columnCopy_) {
118     assert((flags_ & (8 + 16)) == 8 + 16);
119     columnCopy_ = new ClpPackedMatrix3(*rhs.columnCopy_);
120   } else {
121     columnCopy_ = NULL;
122   }
123 #ifdef DO_CHECK_FLAGS
124   checkFlags(0);
125 #endif
126 }
127 //-------------------------------------------------------------------
128 // assign matrix (for space reasons)
129 //-------------------------------------------------------------------
ClpPackedMatrix(CoinPackedMatrix * rhs)130 ClpPackedMatrix::ClpPackedMatrix(CoinPackedMatrix *rhs)
131   : ClpMatrixBase()
132 {
133   matrix_ = rhs;
134   flags_ = ((matrix_->hasGaps()) ? 0x02 : 0);
135   numberActiveColumns_ = matrix_->getNumCols();
136   rowCopy_ = NULL;
137   columnCopy_ = NULL;
138   setType(1);
139 #ifdef DO_CHECK_FLAGS
140   checkFlags(0);
141 #endif
142 }
143 
ClpPackedMatrix(const CoinPackedMatrix & rhs)144 ClpPackedMatrix::ClpPackedMatrix(const CoinPackedMatrix &rhs)
145   : ClpMatrixBase()
146 {
147 #ifndef COIN_SPARSE_MATRIX
148   matrix_ = new CoinPackedMatrix(rhs, -1, 0);
149   flags_ = 0;
150 #else
151   matrix_ = new CoinPackedMatrix(rhs, 0, 0);
152   flags_ = ((matrix_->hasGaps()) ? 0x02 : 0);
153 #endif
154   numberActiveColumns_ = matrix_->getNumCols();
155   rowCopy_ = NULL;
156   columnCopy_ = NULL;
157   setType(1);
158 #ifdef DO_CHECK_FLAGS
159   checkFlags(0);
160 #endif
161 }
162 
163 //-------------------------------------------------------------------
164 // Destructor
165 //-------------------------------------------------------------------
~ClpPackedMatrix()166 ClpPackedMatrix::~ClpPackedMatrix()
167 {
168   delete matrix_;
169   delete rowCopy_;
170   delete columnCopy_;
171 }
172 
173 //----------------------------------------------------------------
174 // Assignment operator
175 //-------------------------------------------------------------------
176 ClpPackedMatrix &
operator =(const ClpPackedMatrix & rhs)177 ClpPackedMatrix::operator=(const ClpPackedMatrix &rhs)
178 {
179   if (this != &rhs) {
180     ClpMatrixBase::operator=(rhs);
181     delete matrix_;
182 #ifndef COIN_SPARSE_MATRIX
183     matrix_ = new CoinPackedMatrix(*(rhs.matrix_), -1, 0);
184     flags_ = rhs.flags_ & (~0x02);
185 #else
186     matrix_ = new CoinPackedMatrix(*(rhs.matrix_), 0, 0);
187     flags_ = rhs.flags_;
188     if (matrix_->hasGaps())
189       flags_ |= 0x02;
190 #endif
191     numberActiveColumns_ = rhs.numberActiveColumns_;
192     delete rowCopy_;
193     delete columnCopy_;
194     if (rhs.rowCopy_) {
195       assert((flags_ & 4) != 0);
196       rowCopy_ = new ClpPackedMatrix2(*rhs.rowCopy_);
197     } else {
198       rowCopy_ = NULL;
199     }
200     if (rhs.columnCopy_) {
201       assert((flags_ & (8 + 16)) == 8 + 16);
202       columnCopy_ = new ClpPackedMatrix3(*rhs.columnCopy_);
203     } else {
204       columnCopy_ = NULL;
205     }
206 #ifdef DO_CHECK_FLAGS
207     checkFlags(0);
208 #endif
209   }
210   return *this;
211 }
212 //-------------------------------------------------------------------
213 // Clone
214 //-------------------------------------------------------------------
clone() const215 ClpMatrixBase *ClpPackedMatrix::clone() const
216 {
217   return new ClpPackedMatrix(*this);
218 }
219 // Copy contents - resizing if necessary - otherwise re-use memory
copy(const ClpPackedMatrix * rhs)220 void ClpPackedMatrix::copy(const ClpPackedMatrix *rhs)
221 {
222   //*this = *rhs;
223   assert(numberActiveColumns_ == rhs->numberActiveColumns_);
224   assert(matrix_->isColOrdered() == rhs->matrix_->isColOrdered());
225   matrix_->copyReuseArrays(*rhs->matrix_);
226 #ifdef DO_CHECK_FLAGS
227   checkFlags(0);
228 #endif
229 }
230 /* Subset clone (without gaps).  Duplicates are allowed
231    and order is as given */
232 ClpMatrixBase *
subsetClone(int numberRows,const int * whichRows,int numberColumns,const int * whichColumns) const233 ClpPackedMatrix::subsetClone(int numberRows, const int *whichRows,
234   int numberColumns,
235   const int *whichColumns) const
236 {
237   return new ClpPackedMatrix(*this, numberRows, whichRows,
238     numberColumns, whichColumns);
239 }
240 /* Subset constructor (without gaps).  Duplicates are allowed
241    and order is as given */
ClpPackedMatrix(const ClpPackedMatrix & rhs,int numberRows,const int * whichRows,int numberColumns,const int * whichColumns)242 ClpPackedMatrix::ClpPackedMatrix(
243   const ClpPackedMatrix &rhs,
244   int numberRows, const int *whichRows,
245   int numberColumns, const int *whichColumns)
246   : ClpMatrixBase(rhs)
247 {
248   matrix_ = new CoinPackedMatrix(*(rhs.matrix_), numberRows, whichRows,
249     numberColumns, whichColumns);
250   numberActiveColumns_ = matrix_->getNumCols();
251   rowCopy_ = NULL;
252   flags_ = rhs.flags_ & (~0x02); // no gaps
253   columnCopy_ = NULL;
254 #ifdef DO_CHECK_FLAGS
255   checkFlags(0);
256 #endif
257 }
ClpPackedMatrix(const CoinPackedMatrix & rhs,int numberRows,const int * whichRows,int numberColumns,const int * whichColumns)258 ClpPackedMatrix::ClpPackedMatrix(
259   const CoinPackedMatrix &rhs,
260   int numberRows, const int *whichRows,
261   int numberColumns, const int *whichColumns)
262   : ClpMatrixBase()
263 {
264   matrix_ = new CoinPackedMatrix(rhs, numberRows, whichRows,
265     numberColumns, whichColumns);
266   numberActiveColumns_ = matrix_->getNumCols();
267   rowCopy_ = NULL;
268   flags_ = 0; // no gaps
269   columnCopy_ = NULL;
270   setType(1);
271 #ifdef DO_CHECK_FLAGS
272   checkFlags(0);
273 #endif
274 }
275 
276 /* Returns a new matrix in reverse order without gaps */
277 ClpMatrixBase *
reverseOrderedCopy() const278 ClpPackedMatrix::reverseOrderedCopy() const
279 {
280   ClpPackedMatrix *copy = new ClpPackedMatrix();
281   copy->matrix_ = new CoinPackedMatrix();
282   copy->matrix_->setExtraGap(0.0);
283   copy->matrix_->setExtraMajor(0.0);
284   copy->matrix_->reverseOrderedCopyOf(*matrix_);
285   //copy->matrix_->removeGaps();
286   copy->numberActiveColumns_ = copy->matrix_->getNumCols();
287   copy->flags_ = flags_ & (~0x02); // no gaps
288 #ifdef DO_CHECK_FLAGS
289   checkFlags(0);
290 #endif
291   return copy;
292 }
293 //unscaled versions
times(double scalar,const double * COIN_RESTRICT x,double * COIN_RESTRICT y) const294 void ClpPackedMatrix::times(double scalar,
295   const double *COIN_RESTRICT x, double *COIN_RESTRICT y) const
296 {
297   int iRow, iColumn;
298   // get matrix data pointers
299   const int *COIN_RESTRICT row = matrix_->getIndices();
300   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
301   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
302   //memset(y,0,matrix_->getNumRows()*sizeof(double));
303   assert(((flags_ & 0x02) != 0) == matrix_->hasGaps());
304   if (!(flags_ & 2)) {
305     for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
306       CoinBigIndex j;
307       double value = x[iColumn];
308       if (value) {
309         CoinBigIndex start = columnStart[iColumn];
310         CoinBigIndex end = columnStart[iColumn + 1];
311         value *= scalar;
312         for (j = start; j < end; j++) {
313           iRow = row[j];
314           y[iRow] += value * elementByColumn[j];
315         }
316       }
317     }
318   } else {
319     const int *columnLength = matrix_->getVectorLengths();
320     for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
321       CoinBigIndex j;
322       double value = x[iColumn];
323       if (value) {
324         CoinBigIndex start = columnStart[iColumn];
325         CoinBigIndex end = start + columnLength[iColumn];
326         value *= scalar;
327         for (j = start; j < end; j++) {
328           iRow = row[j];
329           y[iRow] += value * elementByColumn[j];
330         }
331       }
332     }
333   }
334 }
335 #if ABOCA_LITE
336 static void
transposeTimesBit(clpTempInfo & info)337 transposeTimesBit(clpTempInfo &info)
338 {
339   const CoinBigIndex *COIN_RESTRICT columnStart = info.start;
340   const int *COIN_RESTRICT row = info.row;
341   const double *COIN_RESTRICT elementByColumn = info.element;
342   double *COIN_RESTRICT y = info.spare;
343   const double *COIN_RESTRICT x = info.work;
344   int first = info.startColumn;
345   int last = first + info.numberToDo;
346   CoinBigIndex start = columnStart[first];
347   for (int iColumn = first; iColumn < last; iColumn++) {
348     CoinBigIndex j;
349     CoinBigIndex next = columnStart[iColumn + 1];
350     double value = y[iColumn];
351     for (j = start; j < next; j++) {
352       int jRow = row[j];
353       value -= x[jRow] * elementByColumn[j];
354     }
355     start = next;
356     y[iColumn] = value;
357   }
358 }
359 #endif
transposeTimes(double scalar,const double * COIN_RESTRICT x,double * COIN_RESTRICT y) const360 void ClpPackedMatrix::transposeTimes(double scalar,
361   const double *COIN_RESTRICT x, double *COIN_RESTRICT y) const
362 {
363   int iColumn;
364   // get matrix data pointers
365   const int *COIN_RESTRICT row = matrix_->getIndices();
366   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
367   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
368   if (!(flags_ & 2)) {
369     if (scalar == -1.0) {
370 #if ABOCA_LITE
371       int numberThreads = abcState();
372       if (numberThreads) {
373         clpTempInfo info[ABOCA_LITE];
374         int chunk = (numberActiveColumns_ + numberThreads - 1) / numberThreads;
375         int n = 0;
376         for (int i = 0; i < numberThreads; i++) {
377           info[i].spare = y;
378           info[i].work = const_cast< double * >(x);
379           info[i].startColumn = n;
380           info[i].element = elementByColumn;
381           info[i].start = columnStart;
382           info[i].row = row;
383           info[i].numberToDo = CoinMin(chunk, numberActiveColumns_ - n);
384           n += chunk;
385         }
386         for (int i = 0; i < numberThreads; i++) {
387           cilk_spawn transposeTimesBit(info[i]);
388         }
389         cilk_sync;
390       } else {
391 #endif
392         CoinBigIndex start = columnStart[0];
393         for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
394           CoinBigIndex j;
395           CoinBigIndex next = columnStart[iColumn + 1];
396           double value = y[iColumn];
397           for (j = start; j < next; j++) {
398             int jRow = row[j];
399             value -= x[jRow] * elementByColumn[j];
400           }
401           start = next;
402           y[iColumn] = value;
403         }
404 #if ABOCA_LITE
405       }
406 #endif
407     } else {
408       CoinBigIndex start = columnStart[0];
409       for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
410         CoinBigIndex j;
411         CoinBigIndex next = columnStart[iColumn + 1];
412         double value = 0.0;
413         for (j = start; j < next; j++) {
414           int jRow = row[j];
415           value += x[jRow] * elementByColumn[j];
416         }
417         start = next;
418         y[iColumn] += value * scalar;
419       }
420     }
421   } else {
422     const int *columnLength = matrix_->getVectorLengths();
423     for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
424       CoinBigIndex j;
425       double value = 0.0;
426       CoinBigIndex start = columnStart[iColumn];
427       CoinBigIndex end = start + columnLength[iColumn];
428       for (j = start; j < end; j++) {
429         int jRow = row[j];
430         value += x[jRow] * elementByColumn[j];
431       }
432       y[iColumn] += value * scalar;
433     }
434   }
435 }
times(double scalar,const double * COIN_RESTRICT x,double * COIN_RESTRICT y,const double * COIN_RESTRICT rowScale,const double * COIN_RESTRICT columnScale) const436 void ClpPackedMatrix::times(double scalar,
437   const double *COIN_RESTRICT x, double *COIN_RESTRICT y,
438   const double *COIN_RESTRICT rowScale,
439   const double *COIN_RESTRICT columnScale) const
440 {
441   if (rowScale) {
442     int iRow, iColumn;
443     // get matrix data pointers
444     const int *COIN_RESTRICT row = matrix_->getIndices();
445     const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
446     const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
447     if (!(flags_ & 2)) {
448       for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
449         double value = x[iColumn];
450         if (value) {
451           // scaled
452           value *= scalar * columnScale[iColumn];
453           CoinBigIndex start = columnStart[iColumn];
454           CoinBigIndex end = columnStart[iColumn + 1];
455           CoinBigIndex j;
456           for (j = start; j < end; j++) {
457             iRow = row[j];
458             y[iRow] += value * elementByColumn[j] * rowScale[iRow];
459           }
460         }
461       }
462     } else {
463       const int *COIN_RESTRICT columnLength = matrix_->getVectorLengths();
464       for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
465         double value = x[iColumn];
466         if (value) {
467           // scaled
468           value *= scalar * columnScale[iColumn];
469           CoinBigIndex start = columnStart[iColumn];
470           CoinBigIndex end = start + columnLength[iColumn];
471           CoinBigIndex j;
472           for (j = start; j < end; j++) {
473             iRow = row[j];
474             y[iRow] += value * elementByColumn[j] * rowScale[iRow];
475           }
476         }
477       }
478     }
479   } else {
480     times(scalar, x, y);
481   }
482 }
transposeTimes(double scalar,const double * COIN_RESTRICT x,double * COIN_RESTRICT y,const double * COIN_RESTRICT rowScale,const double * COIN_RESTRICT columnScale,double * COIN_RESTRICT spare) const483 void ClpPackedMatrix::transposeTimes(double scalar,
484   const double *COIN_RESTRICT x, double *COIN_RESTRICT y,
485   const double *COIN_RESTRICT rowScale,
486   const double *COIN_RESTRICT columnScale,
487   double *COIN_RESTRICT spare) const
488 {
489   if (rowScale) {
490     int iColumn;
491     // get matrix data pointers
492     const int *COIN_RESTRICT row = matrix_->getIndices();
493     const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
494     const int *COIN_RESTRICT columnLength = matrix_->getVectorLengths();
495     const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
496     if (!spare) {
497       if (!(flags_ & 2)) {
498         CoinBigIndex start = columnStart[0];
499         if (scalar == -1.0) {
500           for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
501             CoinBigIndex j;
502             CoinBigIndex next = columnStart[iColumn + 1];
503             double value = 0.0;
504             // scaled
505             for (j = start; j < next; j++) {
506               int jRow = row[j];
507               value += x[jRow] * elementByColumn[j] * rowScale[jRow];
508             }
509             start = next;
510             y[iColumn] -= value * columnScale[iColumn];
511           }
512         } else {
513           for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
514             CoinBigIndex j;
515             CoinBigIndex next = columnStart[iColumn + 1];
516             double value = 0.0;
517             // scaled
518             for (j = start; j < next; j++) {
519               int jRow = row[j];
520               value += x[jRow] * elementByColumn[j] * rowScale[jRow];
521             }
522             start = next;
523             y[iColumn] += value * scalar * columnScale[iColumn];
524           }
525         }
526       } else {
527         for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
528           CoinBigIndex j;
529           double value = 0.0;
530           // scaled
531           for (j = columnStart[iColumn];
532                j < columnStart[iColumn] + columnLength[iColumn]; j++) {
533             int jRow = row[j];
534             value += x[jRow] * elementByColumn[j] * rowScale[jRow];
535           }
536           y[iColumn] += value * scalar * columnScale[iColumn];
537         }
538       }
539     } else {
540       // can use spare region
541       int iRow;
542       int numberRows = matrix_->getNumRows();
543       for (iRow = 0; iRow < numberRows; iRow++) {
544         double value = x[iRow];
545         if (value)
546           spare[iRow] = value * rowScale[iRow];
547         else
548           spare[iRow] = 0.0;
549       }
550       if (!(flags_ & 2)) {
551         CoinBigIndex start = columnStart[0];
552         for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
553           CoinBigIndex j;
554           CoinBigIndex next = columnStart[iColumn + 1];
555           double value = 0.0;
556           // scaled
557           for (j = start; j < next; j++) {
558             int jRow = row[j];
559             value += spare[jRow] * elementByColumn[j];
560           }
561           start = next;
562           y[iColumn] += value * scalar * columnScale[iColumn];
563         }
564       } else {
565         for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
566           CoinBigIndex j;
567           double value = 0.0;
568           // scaled
569           for (j = columnStart[iColumn];
570                j < columnStart[iColumn] + columnLength[iColumn]; j++) {
571             int jRow = row[j];
572             value += spare[jRow] * elementByColumn[j];
573           }
574           y[iColumn] += value * scalar * columnScale[iColumn];
575         }
576       }
577       // no need to zero out
578       //for (iRow=0;iRow<numberRows;iRow++)
579       //spare[iRow] = 0.0;
580     }
581   } else {
582     transposeTimes(scalar, x, y);
583   }
584 }
585 #if ABOCA_LITE
586 static void
transposeTimesSubsetBit(clpTempInfo & info)587 transposeTimesSubsetBit(clpTempInfo &info)
588 {
589   const CoinBigIndex *COIN_RESTRICT columnStart = info.start;
590   const int *COIN_RESTRICT row = info.row;
591   const double *COIN_RESTRICT elementByColumn = info.element;
592   double *COIN_RESTRICT y = info.spare;
593   const double *COIN_RESTRICT x = info.work;
594   const int *COIN_RESTRICT which = info.which;
595   int first = info.startColumn;
596   int last = first + info.numberToDo;
597   for (int i = first; i < last; i++) {
598     int iColumn = which[i];
599     CoinBigIndex j;
600     CoinBigIndex start = columnStart[iColumn];
601     CoinBigIndex next = columnStart[iColumn + 1];
602     double value = 0.0;
603     for (j = start; j < next; j++) {
604       int jRow = row[j];
605       value += x[jRow] * elementByColumn[j];
606     }
607     start = next;
608     y[iColumn] -= value;
609   }
610 }
611 #endif
transposeTimesSubset(int number,const int * which,const double * COIN_RESTRICT x,double * COIN_RESTRICT y,const double * COIN_RESTRICT rowScale,const double * COIN_RESTRICT columnScale,double * COIN_RESTRICT spare) const612 void ClpPackedMatrix::transposeTimesSubset(int number,
613   const int *which,
614   const double *COIN_RESTRICT x, double *COIN_RESTRICT y,
615   const double *COIN_RESTRICT rowScale,
616   const double *COIN_RESTRICT columnScale,
617   double *COIN_RESTRICT spare) const
618 {
619   // get matrix data pointers
620   const int *COIN_RESTRICT row = matrix_->getIndices();
621   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
622   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
623   if (!spare || !rowScale) {
624     if (rowScale) {
625       for (int jColumn = 0; jColumn < number; jColumn++) {
626         int iColumn = which[jColumn];
627         CoinBigIndex j;
628         CoinBigIndex start = columnStart[iColumn];
629         CoinBigIndex next = columnStart[iColumn + 1];
630         double value = 0.0;
631         for (j = start; j < next; j++) {
632           int jRow = row[j];
633           value += x[jRow] * elementByColumn[j] * rowScale[jRow];
634         }
635         y[iColumn] -= value * columnScale[iColumn];
636       }
637     } else {
638 #if ABOCA_LITE
639       int numberThreads = abcState();
640       if (numberThreads) {
641         clpTempInfo info[ABOCA_LITE];
642         int chunk = (number + numberThreads - 1) / numberThreads;
643         int n = 0;
644         for (int i = 0; i < numberThreads; i++) {
645           info[i].spare = y;
646           info[i].work = const_cast< double * >(x);
647           info[i].which = const_cast< int * >(which);
648           info[i].startColumn = n;
649           info[i].element = elementByColumn;
650           info[i].start = columnStart;
651           info[i].row = row;
652           info[i].numberToDo = CoinMin(chunk, number - n);
653           n += chunk;
654         }
655         for (int i = 0; i < numberThreads; i++) {
656           cilk_spawn transposeTimesSubsetBit(info[i]);
657         }
658         cilk_sync;
659       } else {
660 #endif
661         for (int jColumn = 0; jColumn < number; jColumn++) {
662           int iColumn = which[jColumn];
663           CoinBigIndex j;
664           CoinBigIndex start = columnStart[iColumn];
665           CoinBigIndex next = columnStart[iColumn + 1];
666           double value = 0.0;
667           for (j = start; j < next; j++) {
668             int jRow = row[j];
669             value += x[jRow] * elementByColumn[j];
670           }
671           y[iColumn] -= value;
672         }
673 #if ABOCA_LITE
674       }
675 #endif
676     }
677   } else {
678     // can use spare region
679     int iRow;
680     int numberRows = matrix_->getNumRows();
681     for (iRow = 0; iRow < numberRows; iRow++) {
682       double value = x[iRow];
683       if (value)
684         spare[iRow] = value * rowScale[iRow];
685       else
686         spare[iRow] = 0.0;
687     }
688     for (int jColumn = 0; jColumn < number; jColumn++) {
689       int iColumn = which[jColumn];
690       CoinBigIndex j;
691       CoinBigIndex start = columnStart[iColumn];
692       CoinBigIndex next = columnStart[iColumn + 1];
693       double value = 0.0;
694       for (j = start; j < next; j++) {
695         int jRow = row[j];
696         value += spare[jRow] * elementByColumn[j];
697       }
698       y[iColumn] -= value * columnScale[iColumn];
699     }
700   }
701 }
702 /* Return <code>x * A in <code>z</code>.
703 	Squashes small elements and knows about ClpSimplex */
transposeTimes(const ClpSimplex * model,double scalar,const CoinIndexedVector * rowArray,CoinIndexedVector * y,CoinIndexedVector * columnArray) const704 void ClpPackedMatrix::transposeTimes(const ClpSimplex *model, double scalar,
705   const CoinIndexedVector *rowArray,
706   CoinIndexedVector *y,
707   CoinIndexedVector *columnArray) const
708 {
709   columnArray->clear();
710   double *COIN_RESTRICT pi = rowArray->denseVector();
711   int numberNonZero = 0;
712   int *COIN_RESTRICT index = columnArray->getIndices();
713   double *COIN_RESTRICT array = columnArray->denseVector();
714   int numberInRowArray = rowArray->getNumElements();
715   // maybe I need one in OsiSimplex
716   double zeroTolerance = model->zeroTolerance();
717 #if 0 //def COIN_DEVELOP
718      if (zeroTolerance != 1.0e-13) {
719           printf("small element in matrix - zero tolerance %g\n", zeroTolerance);
720      }
721 #endif
722   int numberRows = model->numberRows();
723   ClpPackedMatrix *rowCopy = static_cast< ClpPackedMatrix * >(model->rowCopy());
724   bool packed = rowArray->packedMode();
725   double factor = (numberRows < 100) ? 0.25 : 0.35;
726   factor = 0.5;
727   // We may not want to do by row if there may be cache problems
728   // It would be nice to find L2 cache size - for moment 512K
729   // Be slightly optimistic
730   if (numberActiveColumns_ * sizeof(double) > 1000000) {
731     if (numberRows * 10 < numberActiveColumns_)
732       factor *= 0.333333333;
733     else if (numberRows * 4 < numberActiveColumns_)
734       factor *= 0.5;
735     else if (numberRows * 2 < numberActiveColumns_)
736       factor *= 0.66666666667;
737     //if (model->numberIterations()%50==0)
738     //printf("%d nonzero\n",numberInRowArray);
739   }
740   // if not packed then bias a bit more towards by column
741   if (!packed)
742     factor *= 0.9;
743   assert(!y->getNumElements());
744   double multiplierX = 0.8;
745   double factor2 = factor * multiplierX;
746   if (packed && rowCopy_ && numberInRowArray > 2 && numberInRowArray > factor2 * numberRows && numberInRowArray < 0.9 * numberRows && 0) {
747     rowCopy_->transposeTimes(model, rowCopy->matrix_, rowArray, y, columnArray);
748     return;
749   }
750   if (columnCopy_)
751     factor *= 0.7;
752   if (numberInRowArray > factor * numberRows || !rowCopy) {
753     // do by column
754     // If no gaps - can do a bit faster
755     if (!(flags_ & 2) || columnCopy_) {
756       transposeTimesByColumn(model, scalar,
757         rowArray, y, columnArray);
758       return;
759     }
760     int iColumn;
761     // get matrix data pointers
762     const int *COIN_RESTRICT row = matrix_->getIndices();
763     const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
764     const int *COIN_RESTRICT columnLength = matrix_->getVectorLengths();
765     const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
766     const double *COIN_RESTRICT rowScale = model->rowScale();
767 #if 0
768           ClpPackedMatrix * scaledMatrix = model->clpScaledMatrix();
769           if (rowScale && scaledMatrix) {
770                rowScale = NULL;
771                // get matrix data pointers
772                row = scaledMatrix->getIndices();
773                columnStart = scaledMatrix->getVectorStarts();
774                columnLength = scaledMatrix->getVectorLengths();
775                elementByColumn = scaledMatrix->getElements();
776           }
777 #endif
778     if (packed) {
779       // need to expand pi into y
780       assert(y->capacity() >= numberRows);
781       double *COIN_RESTRICT piOld = pi;
782       pi = y->denseVector();
783       const int *COIN_RESTRICT whichRow = rowArray->getIndices();
784       int i;
785       if (!rowScale) {
786         // modify pi so can collapse to one loop
787         if (scalar == -1.0) {
788           for (i = 0; i < numberInRowArray; i++) {
789             int iRow = whichRow[i];
790             pi[iRow] = -piOld[i];
791           }
792         } else {
793           for (i = 0; i < numberInRowArray; i++) {
794             int iRow = whichRow[i];
795             pi[iRow] = scalar * piOld[i];
796           }
797         }
798         for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
799           double value = 0.0;
800           CoinBigIndex j;
801           for (j = columnStart[iColumn];
802                j < columnStart[iColumn] + columnLength[iColumn]; j++) {
803             int iRow = row[j];
804             value += pi[iRow] * elementByColumn[j];
805           }
806           if (fabs(value) > zeroTolerance) {
807             array[numberNonZero] = value;
808             index[numberNonZero++] = iColumn;
809           }
810         }
811       } else {
812 #ifdef CLP_INVESTIGATE
813         if (model->clpScaledMatrix())
814           printf("scaledMatrix_ at %d of ClpPackedMatrix\n", __LINE__);
815 #endif
816         // scaled
817         // modify pi so can collapse to one loop
818         if (scalar == -1.0) {
819           for (i = 0; i < numberInRowArray; i++) {
820             int iRow = whichRow[i];
821             pi[iRow] = -piOld[i] * rowScale[iRow];
822           }
823         } else {
824           for (i = 0; i < numberInRowArray; i++) {
825             int iRow = whichRow[i];
826             pi[iRow] = scalar * piOld[i] * rowScale[iRow];
827           }
828         }
829         for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
830           double value = 0.0;
831           CoinBigIndex j;
832           const double *columnScale = model->columnScale();
833           for (j = columnStart[iColumn];
834                j < columnStart[iColumn] + columnLength[iColumn]; j++) {
835             int iRow = row[j];
836             value += pi[iRow] * elementByColumn[j];
837           }
838           value *= columnScale[iColumn];
839           if (fabs(value) > zeroTolerance) {
840             array[numberNonZero] = value;
841             index[numberNonZero++] = iColumn;
842           }
843         }
844       }
845       // zero out
846       for (i = 0; i < numberInRowArray; i++) {
847         int iRow = whichRow[i];
848         pi[iRow] = 0.0;
849       }
850     } else {
851       if (!rowScale) {
852         if (scalar == -1.0) {
853           for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
854             double value = 0.0;
855             CoinBigIndex j;
856             for (j = columnStart[iColumn];
857                  j < columnStart[iColumn] + columnLength[iColumn]; j++) {
858               int iRow = row[j];
859               value += pi[iRow] * elementByColumn[j];
860             }
861             if (fabs(value) > zeroTolerance) {
862               index[numberNonZero++] = iColumn;
863               array[iColumn] = -value;
864             }
865           }
866         } else {
867           for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
868             double value = 0.0;
869             CoinBigIndex j;
870             for (j = columnStart[iColumn];
871                  j < columnStart[iColumn] + columnLength[iColumn]; j++) {
872               int iRow = row[j];
873               value += pi[iRow] * elementByColumn[j];
874             }
875             value *= scalar;
876             if (fabs(value) > zeroTolerance) {
877               index[numberNonZero++] = iColumn;
878               array[iColumn] = value;
879             }
880           }
881         }
882       } else {
883 #ifdef CLP_INVESTIGATE
884         if (model->clpScaledMatrix())
885           printf("scaledMatrix_ at %d of ClpPackedMatrix\n", __LINE__);
886 #endif
887         // scaled
888         if (scalar == -1.0) {
889           for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
890             double value = 0.0;
891             CoinBigIndex j;
892             const double *columnScale = model->columnScale();
893             for (j = columnStart[iColumn];
894                  j < columnStart[iColumn] + columnLength[iColumn]; j++) {
895               int iRow = row[j];
896               value += pi[iRow] * elementByColumn[j] * rowScale[iRow];
897             }
898             value *= columnScale[iColumn];
899             if (fabs(value) > zeroTolerance) {
900               index[numberNonZero++] = iColumn;
901               array[iColumn] = -value;
902             }
903           }
904         } else {
905           for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
906             double value = 0.0;
907             CoinBigIndex j;
908             const double *columnScale = model->columnScale();
909             for (j = columnStart[iColumn];
910                  j < columnStart[iColumn] + columnLength[iColumn]; j++) {
911               int iRow = row[j];
912               value += pi[iRow] * elementByColumn[j] * rowScale[iRow];
913             }
914             value *= scalar * columnScale[iColumn];
915             if (fabs(value) > zeroTolerance) {
916               index[numberNonZero++] = iColumn;
917               array[iColumn] = value;
918             }
919           }
920         }
921       }
922     }
923     columnArray->setNumElements(numberNonZero);
924     y->setNumElements(0);
925   } else {
926     // do by row
927     rowCopy->transposeTimesByRow(model, scalar, rowArray, y, columnArray);
928   }
929   if (packed)
930     columnArray->setPackedMode(true);
931   if (0) {
932     columnArray->checkClean();
933     int numberNonZero = columnArray->getNumElements();
934     ;
935     int *index = columnArray->getIndices();
936     double *array = columnArray->denseVector();
937     int i;
938     for (i = 0; i < numberNonZero; i++) {
939       int j = index[i];
940       double value;
941       if (packed)
942         value = array[i];
943       else
944         value = array[j];
945       printf("Ti %d %d %g\n", i, j, value);
946     }
947   }
948 }
949 //static int xA=0;
950 //static int xB=0;
951 //static int xC=0;
952 //static int xD=0;
953 //static double yA=0.0;
954 //static double yC=0.0;
955 /* Return <code>x * scalar * A in <code>z</code>.
956    Note - If x packed mode - then z packed mode
957    This does by column and knows no gaps
958    Squashes small elements and knows about ClpSimplex */
transposeTimesByColumn(const ClpSimplex * model,double scalar,const CoinIndexedVector * rowArray,CoinIndexedVector * y,CoinIndexedVector * columnArray) const959 void ClpPackedMatrix::transposeTimesByColumn(const ClpSimplex *model, double scalar,
960   const CoinIndexedVector *rowArray,
961   CoinIndexedVector *y,
962   CoinIndexedVector *columnArray) const
963 {
964   double *COIN_RESTRICT pi = rowArray->denseVector();
965   int numberNonZero = 0;
966   int *COIN_RESTRICT index = columnArray->getIndices();
967   double *COIN_RESTRICT array = columnArray->denseVector();
968   int numberInRowArray = rowArray->getNumElements();
969   // maybe I need one in OsiSimplex
970   double zeroTolerance = model->zeroTolerance();
971   bool packed = rowArray->packedMode();
972   // do by column
973   int iColumn;
974   // get matrix data pointers
975   const int *COIN_RESTRICT row = matrix_->getIndices();
976   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
977   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
978   const double *COIN_RESTRICT rowScale = model->rowScale();
979   assert(!y->getNumElements());
980   assert(numberActiveColumns_ > 0);
981   const ClpPackedMatrix *thisMatrix = this;
982 #if 0
983      ClpPackedMatrix * scaledMatrix = model->clpScaledMatrix();
984      if (rowScale && scaledMatrix) {
985           rowScale = NULL;
986           // get matrix data pointers
987           row = scaledMatrix->getIndices();
988           columnStart = scaledMatrix->getVectorStarts();
989           elementByColumn = scaledMatrix->getElements();
990           thisMatrix = scaledMatrix;
991           //printf("scaledMatrix\n");
992      } else if (rowScale) {
993           //printf("no scaledMatrix\n");
994      } else {
995           //printf("no rowScale\n");
996      }
997 #endif
998   if (packed) {
999     // need to expand pi into y
1000     assert(y->capacity() >= model->numberRows());
1001     double *COIN_RESTRICT piOld = pi;
1002     pi = y->denseVector();
1003     const int *COIN_RESTRICT whichRow = rowArray->getIndices();
1004     int i;
1005     if (!rowScale) {
1006       // modify pi so can collapse to one loop
1007       if (scalar == -1.0) {
1008         //yA += numberInRowArray;
1009         for (i = 0; i < numberInRowArray; i++) {
1010           int iRow = whichRow[i];
1011           pi[iRow] = -piOld[i];
1012         }
1013       } else {
1014         for (i = 0; i < numberInRowArray; i++) {
1015           int iRow = whichRow[i];
1016           pi[iRow] = scalar * piOld[i];
1017         }
1018       }
1019       if (!columnCopy_) {
1020         if ((model->specialOptions(), 131072) != 0) {
1021           if (model->spareIntArray_[0] > 0) {
1022             CoinIndexedVector *spareArray = model->rowArray(3);
1023             // also do dualColumn stuff
1024             double *COIN_RESTRICT spare = spareArray->denseVector();
1025             int *COIN_RESTRICT spareIndex = spareArray->getIndices();
1026             const double *COIN_RESTRICT reducedCost = model->djRegion(0);
1027             double multiplier[] = { -1.0, 1.0 };
1028             double dualT = -model->currentDualTolerance();
1029             double acceptablePivot = model->spareDoubleArray_[0];
1030             // We can also see if infeasible or pivoting on free
1031             double tentativeTheta = 1.0e15;
1032             double upperTheta = 1.0e31;
1033             int addSequence = model->numberColumns();
1034             const unsigned char *COIN_RESTRICT statusArray = model->statusArray() + addSequence;
1035             int numberRemaining = 0;
1036             assert(scalar == -1.0);
1037             for (i = 0; i < numberInRowArray; i++) {
1038               int iSequence = whichRow[i];
1039               int iStatus = (statusArray[iSequence] & 3) - 1;
1040               if (iStatus) {
1041                 double mult = multiplier[iStatus - 1];
1042                 double alpha = piOld[i] * mult;
1043                 double oldValue;
1044                 double value;
1045                 if (alpha > 0.0) {
1046                   oldValue = reducedCost[iSequence] * mult;
1047                   value = oldValue - tentativeTheta * alpha;
1048                   if (value < dualT) {
1049                     value = oldValue - upperTheta * alpha;
1050                     if (value < dualT && alpha >= acceptablePivot) {
1051                       upperTheta = (oldValue - dualT) / alpha;
1052                       //tentativeTheta = CoinMin(2.0*upperTheta,tentativeTheta);
1053                     }
1054                     // add to list
1055                     spare[numberRemaining] = alpha * mult;
1056                     spareIndex[numberRemaining++] = iSequence + addSequence;
1057                   }
1058                 }
1059               }
1060             }
1061             numberNonZero = thisMatrix->gutsOfTransposeTimesUnscaled(pi,
1062               columnArray->getIndices(),
1063               columnArray->denseVector(),
1064               model->statusArray(),
1065               spareIndex,
1066               spare,
1067               model->djRegion(1),
1068               upperTheta,
1069               acceptablePivot,
1070               model->currentDualTolerance(),
1071               numberRemaining,
1072               zeroTolerance);
1073             model->spareDoubleArray_[0] = upperTheta;
1074             spareArray->setNumElements(numberRemaining);
1075 #if 0
1076                               columnArray->setNumElements(numberNonZero);
1077 			      spareArray->setPackedMode(true);
1078 			      columnArray->setPackedMode(true);
1079 			      spareArray->checkClean();
1080 			      columnArray->checkClean();
1081 			      printf("col %d spare %d upper %g\n",
1082 				     columnArray->getNumElements(),
1083 				     spareArray->getNumElements(),upperTheta);
1084 #endif
1085             // signal partially done
1086             model->spareIntArray_[0] = -2;
1087           } else {
1088             numberNonZero = thisMatrix->gutsOfTransposeTimesUnscaled(pi,
1089               columnArray->getIndices(),
1090               columnArray->denseVector(),
1091               model->statusArray(),
1092               zeroTolerance);
1093           }
1094         } else {
1095           numberNonZero = thisMatrix->gutsOfTransposeTimesUnscaled(pi,
1096             columnArray->getIndices(),
1097             columnArray->denseVector(),
1098             zeroTolerance);
1099         }
1100         columnArray->setNumElements(numberNonZero);
1101         //xA++;
1102       } else {
1103         if ((model->moreSpecialOptions() & 8) != 0 && model->algorithm() < 0) {
1104           columnCopy_->transposeTimes(model, pi, columnArray,
1105             model->rowArray(3), rowArray);
1106           model->spareIntArray_[0] = -2;
1107         } else {
1108           columnCopy_->transposeTimes(model, pi, columnArray);
1109         }
1110         numberNonZero = columnArray->getNumElements();
1111         //xB++;
1112       }
1113     } else {
1114 #ifdef CLP_INVESTIGATE
1115       if (model->clpScaledMatrix())
1116         printf("scaledMatrix_ at %d of ClpPackedMatrix\n", __LINE__);
1117 #endif
1118       // scaled
1119       // modify pi so can collapse to one loop
1120       if (scalar == -1.0) {
1121         //yC += numberInRowArray;
1122         for (i = 0; i < numberInRowArray; i++) {
1123           int iRow = whichRow[i];
1124           pi[iRow] = -piOld[i] * rowScale[iRow];
1125         }
1126       } else {
1127         for (i = 0; i < numberInRowArray; i++) {
1128           int iRow = whichRow[i];
1129           pi[iRow] = scalar * piOld[i] * rowScale[iRow];
1130         }
1131       }
1132       const double *columnScale = model->columnScale();
1133       if (!columnCopy_) {
1134         if ((model->specialOptions(), 131072) != 0)
1135           numberNonZero = gutsOfTransposeTimesScaled(pi, columnScale,
1136             columnArray->getIndices(),
1137             columnArray->denseVector(),
1138             model->statusArray(),
1139             zeroTolerance);
1140         else
1141           numberNonZero = gutsOfTransposeTimesScaled(pi, columnScale,
1142             columnArray->getIndices(),
1143             columnArray->denseVector(),
1144             zeroTolerance);
1145         columnArray->setNumElements(numberNonZero);
1146         //xC++;
1147       } else {
1148         if ((model->moreSpecialOptions() & 8) != 0 && model->algorithm() < 0) {
1149           columnCopy_->transposeTimes(model, pi, columnArray,
1150             model->rowArray(3), rowArray);
1151           model->spareIntArray_[0] = -2;
1152         } else {
1153           columnCopy_->transposeTimes(model, pi, columnArray);
1154         }
1155         numberNonZero = columnArray->getNumElements();
1156         //xD++;
1157       }
1158     }
1159     // zero out
1160     int numberRows = model->numberRows();
1161     if (numberInRowArray * 4 < numberRows) {
1162       for (i = 0; i < numberInRowArray; i++) {
1163         int iRow = whichRow[i];
1164         pi[iRow] = 0.0;
1165       }
1166     } else {
1167       CoinZeroN(pi, numberRows);
1168     }
1169     //int kA=xA+xB;
1170     //int kC=xC+xD;
1171     //if ((kA+kC)%10000==0)
1172     //printf("AA %d %d %g, CC %d %d %g\n",
1173     //     xA,xB,kA ? yA/(double)(kA): 0.0,xC,xD,kC ? yC/(double) (kC) :0.0);
1174   } else {
1175     if (!rowScale) {
1176       if (scalar == -1.0) {
1177         double value = 0.0;
1178         CoinBigIndex j;
1179         CoinBigIndex end = columnStart[1];
1180         for (j = columnStart[0]; j < end; j++) {
1181           int iRow = row[j];
1182           value += pi[iRow] * elementByColumn[j];
1183         }
1184         for (iColumn = 0; iColumn < numberActiveColumns_ - 1; iColumn++) {
1185           CoinBigIndex start = end;
1186           end = columnStart[iColumn + 2];
1187           if (fabs(value) > zeroTolerance) {
1188             array[iColumn] = -value;
1189             index[numberNonZero++] = iColumn;
1190           }
1191           value = 0.0;
1192           for (j = start; j < end; j++) {
1193             int iRow = row[j];
1194             value += pi[iRow] * elementByColumn[j];
1195           }
1196         }
1197         if (fabs(value) > zeroTolerance) {
1198           array[iColumn] = -value;
1199           index[numberNonZero++] = iColumn;
1200         }
1201       } else {
1202         double value = 0.0;
1203         CoinBigIndex j;
1204         CoinBigIndex end = columnStart[1];
1205         for (j = columnStart[0]; j < end; j++) {
1206           int iRow = row[j];
1207           value += pi[iRow] * elementByColumn[j];
1208         }
1209         for (iColumn = 0; iColumn < numberActiveColumns_ - 1; iColumn++) {
1210           value *= scalar;
1211           CoinBigIndex start = end;
1212           end = columnStart[iColumn + 2];
1213           if (fabs(value) > zeroTolerance) {
1214             array[iColumn] = value;
1215             index[numberNonZero++] = iColumn;
1216           }
1217           value = 0.0;
1218           for (j = start; j < end; j++) {
1219             int iRow = row[j];
1220             value += pi[iRow] * elementByColumn[j];
1221           }
1222         }
1223         value *= scalar;
1224         if (fabs(value) > zeroTolerance) {
1225           array[iColumn] = value;
1226           index[numberNonZero++] = iColumn;
1227         }
1228       }
1229     } else {
1230 #ifdef CLP_INVESTIGATE
1231       if (model->clpScaledMatrix())
1232         printf("scaledMatrix_ at %d of ClpPackedMatrix\n", __LINE__);
1233 #endif
1234       // scaled
1235       if (scalar == -1.0) {
1236         const double *COIN_RESTRICT columnScale = model->columnScale();
1237         double value = 0.0;
1238         double scale = columnScale[0];
1239         CoinBigIndex j;
1240         CoinBigIndex end = columnStart[1];
1241         for (j = columnStart[0]; j < end; j++) {
1242           int iRow = row[j];
1243           value += pi[iRow] * elementByColumn[j] * rowScale[iRow];
1244         }
1245         for (iColumn = 0; iColumn < numberActiveColumns_ - 1; iColumn++) {
1246           value *= scale;
1247           CoinBigIndex start = end;
1248           end = columnStart[iColumn + 2];
1249           scale = columnScale[iColumn + 1];
1250           if (fabs(value) > zeroTolerance) {
1251             array[iColumn] = -value;
1252             index[numberNonZero++] = iColumn;
1253           }
1254           value = 0.0;
1255           for (j = start; j < end; j++) {
1256             int iRow = row[j];
1257             value += pi[iRow] * elementByColumn[j] * rowScale[iRow];
1258           }
1259         }
1260         value *= scale;
1261         if (fabs(value) > zeroTolerance) {
1262           array[iColumn] = -value;
1263           index[numberNonZero++] = iColumn;
1264         }
1265       } else {
1266         const double *COIN_RESTRICT columnScale = model->columnScale();
1267         double value = 0.0;
1268         double scale = columnScale[0] * scalar;
1269         CoinBigIndex j;
1270         CoinBigIndex end = columnStart[1];
1271         for (j = columnStart[0]; j < end; j++) {
1272           int iRow = row[j];
1273           value += pi[iRow] * elementByColumn[j] * rowScale[iRow];
1274         }
1275         for (iColumn = 0; iColumn < numberActiveColumns_ - 1; iColumn++) {
1276           value *= scale;
1277           CoinBigIndex start = end;
1278           end = columnStart[iColumn + 2];
1279           scale = columnScale[iColumn + 1] * scalar;
1280           if (fabs(value) > zeroTolerance) {
1281             array[iColumn] = value;
1282             index[numberNonZero++] = iColumn;
1283           }
1284           value = 0.0;
1285           for (j = start; j < end; j++) {
1286             int iRow = row[j];
1287             value += pi[iRow] * elementByColumn[j] * rowScale[iRow];
1288           }
1289         }
1290         value *= scale;
1291         if (fabs(value) > zeroTolerance) {
1292           array[iColumn] = value;
1293           index[numberNonZero++] = iColumn;
1294         }
1295       }
1296     }
1297   }
1298   columnArray->setNumElements(numberNonZero);
1299   y->setNumElements(0);
1300   if (packed)
1301     columnArray->setPackedMode(true);
1302 }
1303 /* Return <code>x * A in <code>z</code>.
1304 	Squashes small elements and knows about ClpSimplex */
transposeTimesByRow(const ClpSimplex * model,double scalar,const CoinIndexedVector * rowArray,CoinIndexedVector * y,CoinIndexedVector * columnArray) const1305 void ClpPackedMatrix::transposeTimesByRow(const ClpSimplex *model, double scalar,
1306   const CoinIndexedVector *rowArray,
1307   CoinIndexedVector *y,
1308   CoinIndexedVector *columnArray) const
1309 {
1310   columnArray->clear();
1311   double *COIN_RESTRICT pi = rowArray->denseVector();
1312   int numberNonZero = 0;
1313   int *COIN_RESTRICT index = columnArray->getIndices();
1314   double *COIN_RESTRICT array = columnArray->denseVector();
1315   int numberInRowArray = rowArray->getNumElements();
1316   // maybe I need one in OsiSimplex
1317   double zeroTolerance = model->zeroTolerance();
1318   const int *COIN_RESTRICT column = matrix_->getIndices();
1319   const CoinBigIndex *COIN_RESTRICT rowStart = getVectorStarts();
1320   const double *COIN_RESTRICT element = getElements();
1321   const int *COIN_RESTRICT whichRow = rowArray->getIndices();
1322   bool packed = rowArray->packedMode();
1323   if (numberInRowArray > 2) {
1324     // do by rows
1325     // ** Row copy is already scaled
1326     int iRow;
1327     int i;
1328     int numberOriginal = 0;
1329     if (packed) {
1330       int *COIN_RESTRICT index = columnArray->getIndices();
1331       double *COIN_RESTRICT array = columnArray->denseVector();
1332 #if 0
1333 	       {
1334 		 double  * COIN_RESTRICT array2 = y->denseVector();
1335 		 int numberColumns = matrix_->getNumCols();
1336 		 for (int i=0;i<numberColumns;i++) {
1337 		   assert(!array[i]);
1338 		   assert(!array2[i]);
1339 		 }
1340 	       }
1341 #endif
1342 #define COIN_SPARSE_MATRIX 2
1343       CoinBigIndex numberCovered = 0;
1344       int numberColumns = matrix_->getNumCols();
1345       bool sparse = true;
1346       for (int i = 0; i < numberInRowArray; i++) {
1347         int iRow = whichRow[i];
1348         numberCovered += rowStart[iRow + 1] - rowStart[iRow];
1349         // ? exact ratio
1350         if (numberCovered > numberColumns) {
1351           sparse = false;
1352           break;
1353         }
1354       }
1355       if (sparse) {
1356         assert(!y->getNumElements());
1357 #if COIN_SPARSE_MATRIX != 2
1358         // and set up mark as char array
1359         char *COIN_RESTRICT marked = reinterpret_cast< char * >(index + columnArray->capacity());
1360         int *COIN_RESTRICT lookup = y->getIndices();
1361 #ifndef NDEBUG
1362         //int numberColumns = matrix_->getNumCols();
1363         //for (int i=0;i<numberColumns;i++)
1364         //assert(!marked[i]);
1365 #endif
1366         numberNonZero = gutsOfTransposeTimesByRowGE3a(rowArray, index, array,
1367           lookup, marked, zeroTolerance, scalar);
1368 #else
1369         double *COIN_RESTRICT array2 = y->denseVector();
1370         numberNonZero = gutsOfTransposeTimesByRowGE3(rowArray, index, array,
1371           array2, zeroTolerance, scalar);
1372 #endif
1373       } else {
1374         numberNonZero = gutsOfTransposeTimesByRowGEK(rowArray, index, array,
1375           numberColumns, zeroTolerance, scalar);
1376       }
1377       columnArray->setNumElements(numberNonZero);
1378     } else {
1379       double *COIN_RESTRICT markVector = y->denseVector();
1380       numberNonZero = 0;
1381       // and set up mark as char array
1382       char *COIN_RESTRICT marked = reinterpret_cast< char * >(markVector);
1383       for (i = 0; i < numberOriginal; i++) {
1384         int iColumn = index[i];
1385         marked[iColumn] = 0;
1386       }
1387 
1388       for (i = 0; i < numberInRowArray; i++) {
1389         iRow = whichRow[i];
1390         double value = pi[iRow] * scalar;
1391         CoinBigIndex j;
1392         for (j = rowStart[iRow]; j < rowStart[iRow + 1]; j++) {
1393           int iColumn = column[j];
1394           if (!marked[iColumn]) {
1395             marked[iColumn] = 1;
1396             index[numberNonZero++] = iColumn;
1397           }
1398           array[iColumn] += value * element[j];
1399         }
1400       }
1401       // get rid of tiny values and zero out marked
1402       numberOriginal = numberNonZero;
1403       numberNonZero = 0;
1404       for (i = 0; i < numberOriginal; i++) {
1405         int iColumn = index[i];
1406         marked[iColumn] = 0;
1407         if (fabs(array[iColumn]) > zeroTolerance) {
1408           index[numberNonZero++] = iColumn;
1409         } else {
1410           array[iColumn] = 0.0;
1411         }
1412       }
1413     }
1414   } else if (numberInRowArray == 2) {
1415     // do by rows when two rows
1416     int numberOriginal;
1417     int i;
1418     CoinBigIndex j;
1419     numberNonZero = 0;
1420 
1421     double value;
1422     if (packed) {
1423       gutsOfTransposeTimesByRowEQ2(rowArray, columnArray, y, zeroTolerance, scalar);
1424       numberNonZero = columnArray->getNumElements();
1425     } else {
1426       int iRow = whichRow[0];
1427       value = pi[iRow] * scalar;
1428       for (j = rowStart[iRow]; j < rowStart[iRow + 1]; j++) {
1429         int iColumn = column[j];
1430         double value2 = value * element[j];
1431         index[numberNonZero++] = iColumn;
1432         array[iColumn] = value2;
1433       }
1434       iRow = whichRow[1];
1435       value = pi[iRow] * scalar;
1436       for (j = rowStart[iRow]; j < rowStart[iRow + 1]; j++) {
1437         int iColumn = column[j];
1438         double value2 = value * element[j];
1439         // I am assuming no zeros in matrix
1440         if (array[iColumn])
1441           value2 += array[iColumn];
1442         else
1443           index[numberNonZero++] = iColumn;
1444         array[iColumn] = value2;
1445       }
1446       // get rid of tiny values and zero out marked
1447       numberOriginal = numberNonZero;
1448       numberNonZero = 0;
1449       for (i = 0; i < numberOriginal; i++) {
1450         int iColumn = index[i];
1451         if (fabs(array[iColumn]) > zeroTolerance) {
1452           index[numberNonZero++] = iColumn;
1453         } else {
1454           array[iColumn] = 0.0;
1455         }
1456       }
1457     }
1458   } else if (numberInRowArray == 1) {
1459     // Just one row
1460     int iRow = rowArray->getIndices()[0];
1461     numberNonZero = 0;
1462     CoinBigIndex j;
1463     if (packed) {
1464       gutsOfTransposeTimesByRowEQ1(rowArray, columnArray, zeroTolerance, scalar);
1465       numberNonZero = columnArray->getNumElements();
1466     } else {
1467       double value = pi[iRow] * scalar;
1468       for (j = rowStart[iRow]; j < rowStart[iRow + 1]; j++) {
1469         int iColumn = column[j];
1470         double value2 = value * element[j];
1471         if (fabs(value2) > zeroTolerance) {
1472           index[numberNonZero++] = iColumn;
1473           array[iColumn] = value2;
1474         }
1475       }
1476     }
1477   }
1478   columnArray->setNumElements(numberNonZero);
1479   y->setNumElements(0);
1480 }
1481 // Meat of transposeTimes by column when not scaled
gutsOfTransposeTimesUnscaled(const double * COIN_RESTRICT pi,int * COIN_RESTRICT index,double * COIN_RESTRICT array,const double zeroTolerance) const1482 int ClpPackedMatrix::gutsOfTransposeTimesUnscaled(const double *COIN_RESTRICT pi,
1483   int *COIN_RESTRICT index,
1484   double *COIN_RESTRICT array,
1485   const double zeroTolerance) const
1486 {
1487   int numberNonZero = 0;
1488   // get matrix data pointers
1489   const int *COIN_RESTRICT row = matrix_->getIndices();
1490   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
1491   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
1492 #if 1 //ndef INTEL_MKL
1493   double value = 0.0;
1494   CoinBigIndex j;
1495   CoinBigIndex end = columnStart[1];
1496   for (j = columnStart[0]; j < end; j++) {
1497     int iRow = row[j];
1498     value += pi[iRow] * elementByColumn[j];
1499   }
1500   int iColumn;
1501   for (iColumn = 0; iColumn < numberActiveColumns_ - 1; iColumn++) {
1502     CoinBigIndex start = end;
1503     end = columnStart[iColumn + 2];
1504     if (fabs(value) > zeroTolerance) {
1505       array[numberNonZero] = value;
1506       index[numberNonZero++] = iColumn;
1507     }
1508     value = 0.0;
1509     for (j = start; j < end; j++) {
1510       int iRow = row[j];
1511       value += pi[iRow] * elementByColumn[j];
1512     }
1513   }
1514   if (fabs(value) > zeroTolerance) {
1515     array[numberNonZero] = value;
1516     index[numberNonZero++] = iColumn;
1517   }
1518 #else
1519   char transA = 'N';
1520   //int numberRows = matrix_->getNumRows();
1521   mkl_cspblas_dcsrgemv(&transA, const_cast< int * >(&numberActiveColumns_),
1522     const_cast< double * >(elementByColumn),
1523     const_cast< int * >(columnStart),
1524     const_cast< int * >(row),
1525     const_cast< double * >(pi), array);
1526   int iColumn;
1527   for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
1528     double value = array[iColumn];
1529     if (value) {
1530       array[iColumn] = 0.0;
1531       if (fabs(value) > zeroTolerance) {
1532         array[numberNonZero] = value;
1533         index[numberNonZero++] = iColumn;
1534       }
1535     }
1536   }
1537 #endif
1538   return numberNonZero;
1539 }
1540 // Meat of transposeTimes by column when scaled
gutsOfTransposeTimesScaled(const double * COIN_RESTRICT pi,const double * COIN_RESTRICT columnScale,int * COIN_RESTRICT index,double * COIN_RESTRICT array,const double zeroTolerance) const1541 int ClpPackedMatrix::gutsOfTransposeTimesScaled(const double *COIN_RESTRICT pi,
1542   const double *COIN_RESTRICT columnScale,
1543   int *COIN_RESTRICT index,
1544   double *COIN_RESTRICT array,
1545   const double zeroTolerance) const
1546 {
1547   int numberNonZero = 0;
1548   // get matrix data pointers
1549   const int *COIN_RESTRICT row = matrix_->getIndices();
1550   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
1551   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
1552   double value = 0.0;
1553   double scale = columnScale[0];
1554   CoinBigIndex j;
1555   CoinBigIndex end = columnStart[1];
1556   for (j = columnStart[0]; j < end; j++) {
1557     int iRow = row[j];
1558     value += pi[iRow] * elementByColumn[j];
1559   }
1560   int iColumn;
1561   for (iColumn = 0; iColumn < numberActiveColumns_ - 1; iColumn++) {
1562     value *= scale;
1563     CoinBigIndex start = end;
1564     scale = columnScale[iColumn + 1];
1565     end = columnStart[iColumn + 2];
1566     if (fabs(value) > zeroTolerance) {
1567       array[numberNonZero] = value;
1568       index[numberNonZero++] = iColumn;
1569     }
1570     value = 0.0;
1571     for (j = start; j < end; j++) {
1572       int iRow = row[j];
1573       value += pi[iRow] * elementByColumn[j];
1574     }
1575   }
1576   value *= scale;
1577   if (fabs(value) > zeroTolerance) {
1578     array[numberNonZero] = value;
1579     index[numberNonZero++] = iColumn;
1580   }
1581   return numberNonZero;
1582 }
1583 #if ABOCA_LITE
1584 static void
transposeTimesUnscaledBit(clpTempInfo & info)1585 transposeTimesUnscaledBit(clpTempInfo &info)
1586 {
1587   const CoinBigIndex *COIN_RESTRICT columnStart = info.start;
1588   const int *COIN_RESTRICT row = info.row;
1589   const double *COIN_RESTRICT elementByColumn = info.element;
1590   double *COIN_RESTRICT array = info.infeas;
1591   int *COIN_RESTRICT index = info.which;
1592   const unsigned char *COIN_RESTRICT status = info.status;
1593   double zeroTolerance = info.tolerance;
1594   const double *COIN_RESTRICT pi = info.work;
1595   int first = info.startColumn;
1596   int last = first + info.numberToDo;
1597   double value = 0.0;
1598   int jColumn = -1;
1599   int numberNonZero = 0;
1600   for (int iColumn = first; iColumn < last; iColumn++) {
1601     bool wanted = ((status[iColumn] & 3) != 1);
1602     if (fabs(value) > zeroTolerance) {
1603       array[numberNonZero] = value;
1604       index[numberNonZero++] = jColumn;
1605     }
1606     value = 0.0;
1607     if (wanted) {
1608       CoinBigIndex start = columnStart[iColumn];
1609       CoinBigIndex end = columnStart[iColumn + 1];
1610       jColumn = iColumn;
1611       int n = static_cast< int >(end - start);
1612       bool odd = (n & 1) != 0;
1613       n = n >> 1;
1614       const int *COIN_RESTRICT rowThis = row + start;
1615       const double *COIN_RESTRICT elementThis = elementByColumn + start;
1616       for (; n; n--) {
1617         int iRow0 = *rowThis;
1618         int iRow1 = *(rowThis + 1);
1619         rowThis += 2;
1620         value += pi[iRow0] * (*elementThis);
1621         value += pi[iRow1] * (*(elementThis + 1));
1622         elementThis += 2;
1623       }
1624       if (odd) {
1625         int iRow = *rowThis;
1626         value += pi[iRow] * (*elementThis);
1627       }
1628     }
1629   }
1630   if (fabs(value) > zeroTolerance) {
1631     array[numberNonZero] = value;
1632     index[numberNonZero++] = jColumn;
1633   }
1634   info.numberAdded = numberNonZero;
1635 }
1636 #endif
1637 // Meat of transposeTimes by column when not scaled
gutsOfTransposeTimesUnscaled(const double * COIN_RESTRICT pi,int * COIN_RESTRICT index,double * COIN_RESTRICT array,const unsigned char * COIN_RESTRICT status,const double zeroTolerance) const1638 int ClpPackedMatrix::gutsOfTransposeTimesUnscaled(const double *COIN_RESTRICT pi,
1639   int *COIN_RESTRICT index,
1640   double *COIN_RESTRICT array,
1641   const unsigned char *COIN_RESTRICT status,
1642   const double zeroTolerance) const
1643 {
1644   int numberNonZero = 0;
1645   // get matrix data pointers
1646   const int *COIN_RESTRICT row = matrix_->getIndices();
1647   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
1648   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
1649 #if ABOCA_LITE
1650   int numberThreads = abcState();
1651   if (numberThreads) {
1652     clpTempInfo info[ABOCA_LITE];
1653     int chunk = (numberActiveColumns_ + numberThreads - 1) / numberThreads;
1654     int n = 0;
1655     for (int i = 0; i < numberThreads; i++) {
1656       info[i].which = index + n;
1657       info[i].infeas = array + n;
1658       info[i].work = const_cast< double * >(pi);
1659       info[i].status = const_cast< unsigned char * >(status);
1660       info[i].startColumn = n;
1661       info[i].element = elementByColumn;
1662       info[i].start = columnStart;
1663       info[i].row = row;
1664       info[i].numberToDo = CoinMin(chunk, numberActiveColumns_ - n);
1665       info[i].tolerance = zeroTolerance;
1666       n += chunk;
1667     }
1668     for (int i = 0; i < numberThreads; i++) {
1669       cilk_spawn transposeTimesUnscaledBit(info[i]);
1670     }
1671     cilk_sync;
1672     for (int i = 0; i < numberThreads; i++)
1673       numberNonZero += info[i].numberAdded;
1674     moveAndZero(info, 2, NULL);
1675   } else {
1676 #endif
1677     double value = 0.0;
1678     int jColumn = -1;
1679     for (int iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
1680       bool wanted = ((status[iColumn] & 3) != 1);
1681       if (fabs(value) > zeroTolerance) {
1682         array[numberNonZero] = value;
1683         index[numberNonZero++] = jColumn;
1684       }
1685       value = 0.0;
1686       if (wanted) {
1687         CoinBigIndex start = columnStart[iColumn];
1688         CoinBigIndex end = columnStart[iColumn + 1];
1689         jColumn = iColumn;
1690         int n = static_cast< int >(end - start);
1691         bool odd = (n & 1) != 0;
1692         n = n >> 1;
1693         const int *COIN_RESTRICT rowThis = row + start;
1694         const double *COIN_RESTRICT elementThis = elementByColumn + start;
1695         for (; n; n--) {
1696           int iRow0 = *rowThis;
1697           int iRow1 = *(rowThis + 1);
1698           rowThis += 2;
1699           value += pi[iRow0] * (*elementThis);
1700           value += pi[iRow1] * (*(elementThis + 1));
1701           elementThis += 2;
1702         }
1703         if (odd) {
1704           int iRow = *rowThis;
1705           value += pi[iRow] * (*elementThis);
1706         }
1707       }
1708     }
1709     if (fabs(value) > zeroTolerance) {
1710       array[numberNonZero] = value;
1711       index[numberNonZero++] = jColumn;
1712     }
1713 #if ABOCA_LITE
1714   }
1715 #endif
1716   return numberNonZero;
1717 }
1718 #if ABOCA_LITE
1719 static void
transposeTimesUnscaledBit2(clpTempInfo & info)1720 transposeTimesUnscaledBit2(clpTempInfo &info)
1721 {
1722   const CoinBigIndex *COIN_RESTRICT columnStart = info.start;
1723   const int *COIN_RESTRICT row = info.row;
1724   const double *COIN_RESTRICT elementByColumn = info.element;
1725   double *COIN_RESTRICT array = info.infeas;
1726   const double *COIN_RESTRICT reducedCost = info.reducedCost;
1727   int *COIN_RESTRICT index = info.which;
1728   double *COIN_RESTRICT spareArray = info.spare;
1729   int *COIN_RESTRICT spareIndex = info.index;
1730   const unsigned char *COIN_RESTRICT status = info.status;
1731   double zeroTolerance = info.tolerance;
1732   double dualTolerance = info.dualTolerance;
1733   double acceptablePivot = info.acceptablePivot;
1734   const double *COIN_RESTRICT pi = info.work;
1735   double tentativeTheta = 1.0e15;
1736   int numberRemaining = 0;
1737   double upperTheta = info.upperTheta;
1738   int first = info.startColumn;
1739   int last = first + info.numberToDo;
1740   int numberNonZero = 0;
1741   double multiplier[] = { -1.0, 1.0 };
1742   double dualT = -dualTolerance;
1743   for (int iColumn = first; iColumn < last; iColumn++) {
1744     int wanted = (status[iColumn] & 3) - 1;
1745     if (wanted) {
1746       double value = 0.0;
1747       CoinBigIndex start = columnStart[iColumn];
1748       CoinBigIndex end = columnStart[iColumn + 1];
1749       int n = static_cast< int >(end - start);
1750       bool odd = (n & 1) != 0;
1751       n = n >> 1;
1752       const int *COIN_RESTRICT rowThis = row + start;
1753       const double *COIN_RESTRICT elementThis = elementByColumn + start;
1754       for (; n; n--) {
1755         int iRow0 = *rowThis;
1756         int iRow1 = *(rowThis + 1);
1757         rowThis += 2;
1758         value += pi[iRow0] * (*elementThis);
1759         value += pi[iRow1] * (*(elementThis + 1));
1760         elementThis += 2;
1761       }
1762       if (odd) {
1763         int iRow = *rowThis;
1764         value += pi[iRow] * (*elementThis);
1765       }
1766       if (fabs(value) > zeroTolerance) {
1767         double mult = multiplier[wanted - 1];
1768         double alpha = value * mult;
1769         array[numberNonZero] = value;
1770         index[numberNonZero++] = iColumn;
1771         if (alpha > 0.0) {
1772           double oldValue = reducedCost[iColumn] * mult;
1773           double value = oldValue - tentativeTheta * alpha;
1774           if (value < dualT) {
1775             value = oldValue - upperTheta * alpha;
1776             if (value < dualT && alpha >= acceptablePivot) {
1777               upperTheta = (oldValue - dualT) / alpha;
1778               //tentativeTheta = CoinMin(2.0*upperTheta,tentativeTheta);
1779             }
1780             // add to list
1781             spareArray[numberRemaining] = alpha * mult;
1782             spareIndex[numberRemaining++] = iColumn;
1783           }
1784         }
1785       }
1786     }
1787   }
1788   info.numberAdded = numberNonZero;
1789   info.numberRemaining = numberRemaining;
1790   info.upperTheta = upperTheta;
1791 }
1792 #endif
1793 /* Meat of transposeTimes by column when not scaled and skipping
1794    and doing part of dualColumn */
gutsOfTransposeTimesUnscaled(const double * COIN_RESTRICT pi,int * COIN_RESTRICT index,double * COIN_RESTRICT array,const unsigned char * COIN_RESTRICT status,int * COIN_RESTRICT spareIndex,double * COIN_RESTRICT spareArray,const double * COIN_RESTRICT reducedCost,double & upperThetaP,double acceptablePivot,double dualTolerance,int & numberRemainingP,const double zeroTolerance) const1795 int ClpPackedMatrix::gutsOfTransposeTimesUnscaled(const double *COIN_RESTRICT pi,
1796   int *COIN_RESTRICT index,
1797   double *COIN_RESTRICT array,
1798   const unsigned char *COIN_RESTRICT status,
1799   int *COIN_RESTRICT spareIndex,
1800   double *COIN_RESTRICT spareArray,
1801   const double *COIN_RESTRICT reducedCost,
1802   double &upperThetaP,
1803   double acceptablePivot,
1804   double dualTolerance,
1805   int &numberRemainingP,
1806   const double zeroTolerance) const
1807 {
1808   int numberRemaining = numberRemainingP;
1809   double upperTheta = upperThetaP;
1810   int numberNonZero = 0;
1811   // get matrix data pointers
1812   const int *COIN_RESTRICT row = matrix_->getIndices();
1813   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
1814   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
1815 #if ABOCA_LITE
1816   int numberThreads = abcState();
1817   if (numberThreads) {
1818     clpTempInfo info[ABOCA_LITE];
1819     int chunk = (numberActiveColumns_ + numberThreads - 1) / numberThreads;
1820     int n = 0;
1821     for (int i = 0; i < numberThreads; i++) {
1822       info[i].upperTheta = upperThetaP;
1823       info[i].acceptablePivot = acceptablePivot;
1824       info[i].reducedCost = const_cast< double * >(reducedCost);
1825       info[i].index = spareIndex + n + numberRemaining;
1826       info[i].spare = spareArray + n + numberRemaining;
1827       info[i].which = index + n;
1828       info[i].infeas = array + n;
1829       info[i].work = const_cast< double * >(pi);
1830       info[i].status = const_cast< unsigned char * >(status);
1831       info[i].startColumn = n;
1832       info[i].element = elementByColumn;
1833       info[i].start = columnStart;
1834       info[i].row = row;
1835       info[i].numberToDo = CoinMin(chunk, numberActiveColumns_ - n);
1836       info[i].tolerance = zeroTolerance;
1837       info[i].dualTolerance = dualTolerance;
1838       n += chunk;
1839     }
1840     for (int i = 0; i < numberThreads; i++) {
1841       cilk_spawn transposeTimesUnscaledBit2(info[i]);
1842     }
1843     cilk_sync;
1844     for (int i = 0; i < numberThreads; i++) {
1845       numberNonZero += info[i].numberAdded;
1846       numberRemaining += info[i].numberRemaining;
1847       upperTheta = CoinMin(upperTheta, static_cast< double >(info[i].upperTheta));
1848     }
1849     moveAndZero(info, 1, NULL);
1850     moveAndZero(info, 2, NULL);
1851   } else {
1852 #endif
1853     double tentativeTheta = 1.0e15;
1854     double multiplier[] = { -1.0, 1.0 };
1855     double dualT = -dualTolerance;
1856     for (int iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
1857       int wanted = (status[iColumn] & 3) - 1;
1858       if (wanted) {
1859         double value = 0.0;
1860         CoinBigIndex start = columnStart[iColumn];
1861         CoinBigIndex end = columnStart[iColumn + 1];
1862         int n = static_cast< int >(end - start);
1863 #if 1
1864         bool odd = (n & 1) != 0;
1865         n = n >> 1;
1866         const int *COIN_RESTRICT rowThis = row + start;
1867         const double *COIN_RESTRICT elementThis = elementByColumn + start;
1868         for (; n; n--) {
1869           int iRow0 = *rowThis;
1870           int iRow1 = *(rowThis + 1);
1871           rowThis += 2;
1872           value += pi[iRow0] * (*elementThis);
1873           value += pi[iRow1] * (*(elementThis + 1));
1874           elementThis += 2;
1875         }
1876         if (odd) {
1877           int iRow = *rowThis;
1878           value += pi[iRow] * (*elementThis);
1879         }
1880 #else
1881       const int *COIN_RESTRICT rowThis = &row[end - 16];
1882       const double *COIN_RESTRICT elementThis = &elementByColumn[end - 16];
1883       bool odd = (n & 1) != 0;
1884       n = n >> 1;
1885       double value2 = 0.0;
1886       if (odd) {
1887         int iRow = row[start];
1888         value2 = pi[iRow] * elementByColumn[start];
1889       }
1890       switch (n) {
1891       default: {
1892         if (odd) {
1893           start++;
1894         }
1895         n -= 8;
1896         for (; n; n--) {
1897           int iRow0 = row[start];
1898           int iRow1 = row[start + 1];
1899           value += pi[iRow0] * elementByColumn[start];
1900           value2 += pi[iRow1] * elementByColumn[start + 1];
1901           start += 2;
1902         }
1903       case 8: {
1904         int iRow0 = rowThis[16 - 16];
1905         int iRow1 = rowThis[16 - 15];
1906         value += pi[iRow0] * elementThis[16 - 16];
1907         value2 += pi[iRow1] * elementThis[16 - 15];
1908       }
1909       case 7: {
1910         int iRow0 = rowThis[16 - 14];
1911         int iRow1 = rowThis[16 - 13];
1912         value += pi[iRow0] * elementThis[16 - 14];
1913         value2 += pi[iRow1] * elementThis[16 - 13];
1914       }
1915       case 6: {
1916         int iRow0 = rowThis[16 - 12];
1917         int iRow1 = rowThis[16 - 11];
1918         value += pi[iRow0] * elementThis[16 - 12];
1919         value2 += pi[iRow1] * elementThis[16 - 11];
1920       }
1921       case 5: {
1922         int iRow0 = rowThis[16 - 10];
1923         int iRow1 = rowThis[16 - 9];
1924         value += pi[iRow0] * elementThis[16 - 10];
1925         value2 += pi[iRow1] * elementThis[16 - 9];
1926       }
1927       case 4: {
1928         int iRow0 = rowThis[16 - 8];
1929         int iRow1 = rowThis[16 - 7];
1930         value += pi[iRow0] * elementThis[16 - 8];
1931         value2 += pi[iRow1] * elementThis[16 - 7];
1932       }
1933       case 3: {
1934         int iRow0 = rowThis[16 - 6];
1935         int iRow1 = rowThis[16 - 5];
1936         value += pi[iRow0] * elementThis[16 - 6];
1937         value2 += pi[iRow1] * elementThis[16 - 5];
1938       }
1939       case 2: {
1940         int iRow0 = rowThis[16 - 4];
1941         int iRow1 = rowThis[16 - 3];
1942         value += pi[iRow0] * elementThis[16 - 4];
1943         value2 += pi[iRow1] * elementThis[16 - 3];
1944       }
1945       case 1: {
1946         int iRow0 = rowThis[16 - 2];
1947         int iRow1 = rowThis[16 - 1];
1948         value += pi[iRow0] * elementThis[16 - 2];
1949         value2 += pi[iRow1] * elementThis[16 - 1];
1950       }
1951       case 0:;
1952       }
1953       }
1954       value += value2;
1955 #endif
1956         if (fabs(value) > zeroTolerance) {
1957           double mult = multiplier[wanted - 1];
1958           double alpha = value * mult;
1959           array[numberNonZero] = value;
1960           index[numberNonZero++] = iColumn;
1961           if (alpha > 0.0) {
1962             double oldValue = reducedCost[iColumn] * mult;
1963             double value = oldValue - tentativeTheta * alpha;
1964             if (value < dualT) {
1965               value = oldValue - upperTheta * alpha;
1966               if (value < dualT && alpha >= acceptablePivot) {
1967                 upperTheta = (oldValue - dualT) / alpha;
1968                 //tentativeTheta = CoinMin(2.0*upperTheta,tentativeTheta);
1969               }
1970               // add to list
1971               spareArray[numberRemaining] = alpha * mult;
1972               spareIndex[numberRemaining++] = iColumn;
1973             }
1974           }
1975         }
1976       }
1977     }
1978 #if ABOCA_LITE
1979   }
1980 #endif
1981   numberRemainingP = numberRemaining;
1982   upperThetaP = upperTheta;
1983   return numberNonZero;
1984 }
1985 // Meat of transposeTimes by column when scaled
gutsOfTransposeTimesScaled(const double * COIN_RESTRICT pi,const double * COIN_RESTRICT columnScale,int * COIN_RESTRICT index,double * COIN_RESTRICT array,const unsigned char * COIN_RESTRICT status,const double zeroTolerance) const1986 int ClpPackedMatrix::gutsOfTransposeTimesScaled(const double *COIN_RESTRICT pi,
1987   const double *COIN_RESTRICT columnScale,
1988   int *COIN_RESTRICT index,
1989   double *COIN_RESTRICT array,
1990   const unsigned char *COIN_RESTRICT status, const double zeroTolerance) const
1991 {
1992   int numberNonZero = 0;
1993   // get matrix data pointers
1994   const int *COIN_RESTRICT row = matrix_->getIndices();
1995   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
1996   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
1997   double value = 0.0;
1998   int jColumn = -1;
1999   for (int iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
2000     bool wanted = ((status[iColumn] & 3) != 1);
2001     if (fabs(value) > zeroTolerance) {
2002       array[numberNonZero] = value;
2003       index[numberNonZero++] = jColumn;
2004     }
2005     value = 0.0;
2006     if (wanted) {
2007       double scale = columnScale[iColumn];
2008       CoinBigIndex start = columnStart[iColumn];
2009       CoinBigIndex end = columnStart[iColumn + 1];
2010       jColumn = iColumn;
2011       for (CoinBigIndex j = start; j < end; j++) {
2012         int iRow = row[j];
2013         value += pi[iRow] * elementByColumn[j];
2014       }
2015       value *= scale;
2016     }
2017   }
2018   if (fabs(value) > zeroTolerance) {
2019     array[numberNonZero] = value;
2020     index[numberNonZero++] = jColumn;
2021   }
2022   return numberNonZero;
2023 }
2024 #if ABOCA_LITE
2025 static void
packDownBit(clpTempInfo & info)2026 packDownBit(clpTempInfo &info)
2027 {
2028   double *COIN_RESTRICT output = info.infeas;
2029   int *COIN_RESTRICT index = info.which;
2030   double zeroTolerance = info.tolerance;
2031   int first = info.startColumn;
2032   int last = info.numberToDo;
2033   int numberNonZero = 0;
2034   for (int i = 0; i < last; i++) {
2035     double value = output[i];
2036     if (value) {
2037       output[i] = 0.0;
2038       if (fabs(value) > zeroTolerance) {
2039         output[numberNonZero] = value;
2040         index[numberNonZero++] = i + first;
2041       }
2042     }
2043   }
2044   info.numberAdded = numberNonZero;
2045 }
2046 #endif
2047 // Meat of transposeTimes by row n > K if packed - returns number nonzero
gutsOfTransposeTimesByRowGEK(const CoinIndexedVector * COIN_RESTRICT piVector,int * COIN_RESTRICT index,double * COIN_RESTRICT output,int numberColumns,const double tolerance,const double scalar) const2048 int ClpPackedMatrix::gutsOfTransposeTimesByRowGEK(const CoinIndexedVector *COIN_RESTRICT piVector,
2049   int *COIN_RESTRICT index,
2050   double *COIN_RESTRICT output,
2051   int numberColumns,
2052   const double tolerance,
2053   const double scalar) const
2054 {
2055   const double *COIN_RESTRICT pi = piVector->denseVector();
2056   int numberInRowArray = piVector->getNumElements();
2057   const int *COIN_RESTRICT column = matrix_->getIndices();
2058   const CoinBigIndex *COIN_RESTRICT rowStart = matrix_->getVectorStarts();
2059   const double *COIN_RESTRICT element = matrix_->getElements();
2060   const int *COIN_RESTRICT whichRow = piVector->getIndices();
2061   // ** Row copy is already scaled
2062   for (int i = 0; i < numberInRowArray; i++) {
2063     int iRow = whichRow[i];
2064     double value = pi[i] * scalar;
2065     CoinBigIndex start = rowStart[iRow];
2066     CoinBigIndex end = rowStart[iRow + 1];
2067     int n = static_cast< int >(end - start);
2068     const int *COIN_RESTRICT columnThis = column + start;
2069     const double *COIN_RESTRICT elementThis = element + start;
2070 
2071     // could do by twos
2072     for (; n; n--) {
2073       int iColumn = *columnThis;
2074       columnThis++;
2075       double elValue = *elementThis;
2076       elementThis++;
2077       elValue *= value;
2078       output[iColumn] += elValue;
2079     }
2080   }
2081   // get rid of tiny values and count
2082   int numberNonZero = 0;
2083 #if ABOCA_LITE
2084   int numberThreads = abcState();
2085   if (numberThreads) {
2086     clpTempInfo info[ABOCA_LITE];
2087     int chunk = (numberColumns + numberThreads - 1) / numberThreads;
2088     int n = 0;
2089     for (int i = 0; i < numberThreads; i++) {
2090       info[i].which = index + n;
2091       info[i].infeas = output + n;
2092       info[i].startColumn = n;
2093       info[i].numberToDo = CoinMin(chunk, numberColumns - n);
2094       info[i].tolerance = tolerance;
2095       n += chunk;
2096     }
2097     for (int i = 0; i < numberThreads; i++) {
2098       cilk_spawn packDownBit(info[i]);
2099     }
2100     cilk_sync;
2101     for (int i = 0; i < numberThreads; i++) {
2102       numberNonZero += info[i].numberAdded;
2103     }
2104     moveAndZero(info, 2, NULL);
2105   } else {
2106 #endif
2107     for (int i = 0; i < numberColumns; i++) {
2108       double value = output[i];
2109       if (value) {
2110         output[i] = 0.0;
2111         if (fabs(value) > tolerance) {
2112           output[numberNonZero] = value;
2113           index[numberNonZero++] = i;
2114         }
2115       }
2116     }
2117 #if ABOCA_LITE
2118   }
2119 #endif
2120 #ifndef NDEBUG
2121   for (int i = numberNonZero; i < numberColumns; i++)
2122     assert(!output[i]);
2123 #endif
2124   return numberNonZero;
2125 }
2126 // Meat of transposeTimes by row n == 2 if packed
gutsOfTransposeTimesByRowEQ2(const CoinIndexedVector * piVector,CoinIndexedVector * output,CoinIndexedVector * spareVector,const double tolerance,const double scalar) const2127 void ClpPackedMatrix::gutsOfTransposeTimesByRowEQ2(const CoinIndexedVector *piVector, CoinIndexedVector *output,
2128   CoinIndexedVector *spareVector, const double tolerance, const double scalar) const
2129 {
2130   double *COIN_RESTRICT pi = piVector->denseVector();
2131   int numberNonZero = 0;
2132   int *COIN_RESTRICT index = output->getIndices();
2133   double *COIN_RESTRICT array = output->denseVector();
2134   const int *COIN_RESTRICT column = matrix_->getIndices();
2135   const CoinBigIndex *COIN_RESTRICT rowStart = matrix_->getVectorStarts();
2136   const double *COIN_RESTRICT element = matrix_->getElements();
2137   const int *COIN_RESTRICT whichRow = piVector->getIndices();
2138   int iRow0 = whichRow[0];
2139   int iRow1 = whichRow[1];
2140   double pi0 = pi[0];
2141   double pi1 = pi[1];
2142   if (rowStart[iRow0 + 1] - rowStart[iRow0] > rowStart[iRow1 + 1] - rowStart[iRow1]) {
2143     // do one with fewer first
2144     iRow0 = iRow1;
2145     iRow1 = whichRow[0];
2146     pi0 = pi1;
2147     pi1 = pi[0];
2148   }
2149   // and set up mark as char array
2150   char *COIN_RESTRICT marked = reinterpret_cast< char * >(index + output->capacity());
2151   int *COIN_RESTRICT lookup = spareVector->getIndices();
2152   double value = pi0 * scalar;
2153   CoinBigIndex j;
2154   for (j = rowStart[iRow0]; j < rowStart[iRow0 + 1]; j++) {
2155     int iColumn = column[j];
2156     double elValue = element[j];
2157     double value2 = value * elValue;
2158     array[numberNonZero] = value2;
2159     marked[iColumn] = 1;
2160     lookup[iColumn] = numberNonZero;
2161     index[numberNonZero++] = iColumn;
2162   }
2163   value = pi1 * scalar;
2164   for (j = rowStart[iRow1]; j < rowStart[iRow1 + 1]; j++) {
2165     int iColumn = column[j];
2166     double elValue = element[j];
2167     double value2 = value * elValue;
2168     // I am assuming no zeros in matrix
2169     if (marked[iColumn]) {
2170       int iLookup = lookup[iColumn];
2171       array[iLookup] += value2;
2172     } else {
2173       if (fabs(value2) > tolerance) {
2174         array[numberNonZero] = value2;
2175         index[numberNonZero++] = iColumn;
2176       }
2177     }
2178   }
2179   // get rid of tiny values and zero out marked
2180   int i;
2181   int n = numberNonZero;
2182   numberNonZero = 0;
2183   for (i = 0; i < n; i++) {
2184     int iColumn = index[i];
2185     marked[iColumn] = 0;
2186     if (fabs(array[i]) > tolerance) {
2187       array[numberNonZero] = array[i];
2188       index[numberNonZero++] = index[i];
2189     }
2190   }
2191   memset(array + numberNonZero, 0, (n - numberNonZero) * sizeof(double));
2192   output->setNumElements(numberNonZero);
2193   spareVector->setNumElements(0);
2194 }
2195 // Meat of transposeTimes by row n == 1 if packed
gutsOfTransposeTimesByRowEQ1(const CoinIndexedVector * piVector,CoinIndexedVector * output,const double tolerance,const double scalar) const2196 void ClpPackedMatrix::gutsOfTransposeTimesByRowEQ1(const CoinIndexedVector *piVector, CoinIndexedVector *output,
2197   const double tolerance, const double scalar) const
2198 {
2199   double *COIN_RESTRICT pi = piVector->denseVector();
2200   int numberNonZero = 0;
2201   int *COIN_RESTRICT index = output->getIndices();
2202   double *COIN_RESTRICT array = output->denseVector();
2203   const int *COIN_RESTRICT column = matrix_->getIndices();
2204   const CoinBigIndex *COIN_RESTRICT rowStart = matrix_->getVectorStarts();
2205   const double *COIN_RESTRICT element = matrix_->getElements();
2206   int iRow = piVector->getIndices()[0];
2207   numberNonZero = 0;
2208   CoinBigIndex j;
2209   double value = pi[0] * scalar;
2210   for (j = rowStart[iRow]; j < rowStart[iRow + 1]; j++) {
2211     int iColumn = column[j];
2212     double elValue = element[j];
2213     double value2 = value * elValue;
2214     if (fabs(value2) > tolerance) {
2215       array[numberNonZero] = value2;
2216       index[numberNonZero++] = iColumn;
2217     }
2218   }
2219   output->setNumElements(numberNonZero);
2220 }
2221 /* Return <code>x *A in <code>z</code> but
2222    just for indices in y.
2223    Squashes small elements and knows about ClpSimplex */
subsetTransposeTimes(const ClpSimplex * model,const CoinIndexedVector * rowArray,const CoinIndexedVector * y,CoinIndexedVector * columnArray) const2224 void ClpPackedMatrix::subsetTransposeTimes(const ClpSimplex *model,
2225   const CoinIndexedVector *rowArray,
2226   const CoinIndexedVector *y,
2227   CoinIndexedVector *columnArray) const
2228 {
2229   columnArray->clear();
2230   double *COIN_RESTRICT pi = rowArray->denseVector();
2231   double *COIN_RESTRICT array = columnArray->denseVector();
2232   int jColumn;
2233   // get matrix data pointers
2234   const int *COIN_RESTRICT row = matrix_->getIndices();
2235   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
2236   const int *COIN_RESTRICT columnLength = matrix_->getVectorLengths();
2237   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
2238   const double *COIN_RESTRICT rowScale = model->rowScale();
2239   int numberToDo = y->getNumElements();
2240   const int *COIN_RESTRICT which = y->getIndices();
2241   assert(!rowArray->packedMode());
2242   columnArray->setPacked();
2243   ClpPackedMatrix *scaledMatrix = model->clpScaledMatrix();
2244   int flags = flags_;
2245   if (rowScale && scaledMatrix && !(scaledMatrix->flags() & 2)) {
2246     flags = 0;
2247     rowScale = NULL;
2248     // get matrix data pointers
2249     row = scaledMatrix->getIndices();
2250     columnStart = scaledMatrix->getVectorStarts();
2251     elementByColumn = scaledMatrix->getElements();
2252   }
2253   if (!(flags & 2) && numberToDo > 2) {
2254     // no gaps
2255     if (!rowScale) {
2256       int iColumn = which[0];
2257       double value = 0.0;
2258       CoinBigIndex j;
2259       int columnNext = which[1];
2260       CoinBigIndex startNext = columnStart[columnNext];
2261       //coin_prefetch_const(row+startNext);
2262       //coin_prefetch_const(elementByColumn+startNext);
2263       CoinBigIndex endNext = columnStart[columnNext + 1];
2264       for (j = columnStart[iColumn];
2265            j < columnStart[iColumn + 1]; j++) {
2266         int iRow = row[j];
2267         value += pi[iRow] * elementByColumn[j];
2268       }
2269       for (jColumn = 0; jColumn < numberToDo - 2; jColumn++) {
2270         CoinBigIndex start = startNext;
2271         CoinBigIndex end = endNext;
2272         columnNext = which[jColumn + 2];
2273         startNext = columnStart[columnNext];
2274         //coin_prefetch_const(row+startNext);
2275         //coin_prefetch_const(elementByColumn+startNext);
2276         endNext = columnStart[columnNext + 1];
2277         array[jColumn] = value;
2278         value = 0.0;
2279         for (j = start; j < end; j++) {
2280           int iRow = row[j];
2281           value += pi[iRow] * elementByColumn[j];
2282         }
2283       }
2284       array[jColumn++] = value;
2285       value = 0.0;
2286       for (j = startNext; j < endNext; j++) {
2287         int iRow = row[j];
2288         value += pi[iRow] * elementByColumn[j];
2289       }
2290       array[jColumn] = value;
2291     } else {
2292 #ifdef CLP_INVESTIGATE
2293       if (model->clpScaledMatrix())
2294         printf("scaledMatrix_ at %d of ClpPackedMatrix\n", __LINE__);
2295 #endif
2296       // scaled
2297       const double *columnScale = model->columnScale();
2298       int iColumn = which[0];
2299       double value = 0.0;
2300       double scale = columnScale[iColumn];
2301       CoinBigIndex j;
2302       for (j = columnStart[iColumn];
2303            j < columnStart[iColumn + 1]; j++) {
2304         int iRow = row[j];
2305         value += pi[iRow] * elementByColumn[j] * rowScale[iRow];
2306       }
2307       for (jColumn = 0; jColumn < numberToDo - 1; jColumn++) {
2308         int iColumn = which[jColumn + 1];
2309         value *= scale;
2310         scale = columnScale[iColumn];
2311         CoinBigIndex start = columnStart[iColumn];
2312         CoinBigIndex end = columnStart[iColumn + 1];
2313         array[jColumn] = value;
2314         value = 0.0;
2315         for (j = start; j < end; j++) {
2316           int iRow = row[j];
2317           value += pi[iRow] * elementByColumn[j] * rowScale[iRow];
2318         }
2319       }
2320       value *= scale;
2321       array[jColumn] = value;
2322     }
2323   } else if (numberToDo) {
2324     // gaps
2325     if (!rowScale) {
2326       for (jColumn = 0; jColumn < numberToDo; jColumn++) {
2327         int iColumn = which[jColumn];
2328         double value = 0.0;
2329         CoinBigIndex j;
2330         for (j = columnStart[iColumn];
2331              j < columnStart[iColumn] + columnLength[iColumn]; j++) {
2332           int iRow = row[j];
2333           value += pi[iRow] * elementByColumn[j];
2334         }
2335         array[jColumn] = value;
2336       }
2337     } else {
2338 #ifdef CLP_INVESTIGATE
2339       if (model->clpScaledMatrix())
2340         printf("scaledMatrix_ at %d of ClpPackedMatrix - flags %d (%d) n %d\n",
2341           __LINE__, flags_, model->clpScaledMatrix()->flags(), numberToDo);
2342 #endif
2343       // scaled
2344       const double *columnScale = model->columnScale();
2345       for (jColumn = 0; jColumn < numberToDo; jColumn++) {
2346         int iColumn = which[jColumn];
2347         double value = 0.0;
2348         CoinBigIndex j;
2349         for (j = columnStart[iColumn];
2350              j < columnStart[iColumn] + columnLength[iColumn]; j++) {
2351           int iRow = row[j];
2352           value += pi[iRow] * elementByColumn[j] * rowScale[iRow];
2353         }
2354         value *= columnScale[iColumn];
2355         array[jColumn] = value;
2356       }
2357     }
2358   }
2359 }
2360 /* Returns true if can combine transposeTimes and subsetTransposeTimes
2361    and if it would be faster */
canCombine(const ClpSimplex * model,const CoinIndexedVector * pi) const2362 bool ClpPackedMatrix::canCombine(const ClpSimplex *model,
2363   const CoinIndexedVector *pi) const
2364 {
2365   int numberInRowArray = pi->getNumElements();
2366   int numberRows = model->numberRows();
2367   bool packed = pi->packedMode();
2368   // factor should be smaller if doing both with two pi vectors
2369   double factor = 0.30;
2370   // We may not want to do by row if there may be cache problems
2371   // It would be nice to find L2 cache size - for moment 512K
2372   // Be slightly optimistic
2373   if (numberActiveColumns_ * sizeof(double) > 1000000) {
2374     if (numberRows * 10 < numberActiveColumns_)
2375       factor *= 0.333333333;
2376     else if (numberRows * 4 < numberActiveColumns_)
2377       factor *= 0.5;
2378     else if (numberRows * 2 < numberActiveColumns_)
2379       factor *= 0.66666666667;
2380     //if (model->numberIterations()%50==0)
2381     //printf("%d nonzero\n",numberInRowArray);
2382   }
2383   // if not packed then bias a bit more towards by column
2384   if (!packed)
2385     factor *= 0.9;
2386   // bias if columnCopy
2387   if (columnCopy_)
2388     factor *= 0.5;
2389   return ((numberInRowArray > factor * numberRows || !model->rowCopy()) && !(flags_ & 2));
2390 }
2391 #ifndef CLP_ALL_ONE_FILE
2392 // These have to match ClpPrimalColumnSteepest version
2393 #define reference(i) (((reference[i >> 5] >> (i & 31)) & 1) != 0)
2394 #endif
2395 #if ABOCA_LITE
2396 static void
transposeTimes2UnscaledBit(clpTempInfo & info)2397 transposeTimes2UnscaledBit(clpTempInfo &info)
2398 {
2399   const CoinBigIndex *COIN_RESTRICT columnStart = info.start;
2400   const int *COIN_RESTRICT row = info.row;
2401   const double *COIN_RESTRICT elementByColumn = info.element;
2402   double *COIN_RESTRICT array = info.infeas;
2403   double *COIN_RESTRICT weights = const_cast< double * >(info.lower);
2404   double *COIN_RESTRICT reducedCost = info.reducedCost;
2405   double *COIN_RESTRICT infeas = const_cast< double * >(info.upper);
2406   int *COIN_RESTRICT index = info.which;
2407   unsigned int *COIN_RESTRICT reference = reinterpret_cast< unsigned int * >(info.index);
2408   const unsigned char *COIN_RESTRICT status = info.status;
2409   double zeroTolerance = info.tolerance;
2410   double dualTolerance = info.dualTolerance;
2411   const double *COIN_RESTRICT pi = info.work;
2412   const double *COIN_RESTRICT piWeight = info.cost;
2413   double scaleFactor = info.primalRatio;
2414   double devex = info.changeObj;
2415   double referenceIn = info.theta;
2416   int first = info.startColumn;
2417   int last = first + info.numberToDo;
2418   int killDjs = info.numberInfeasibilities;
2419   int numberNonZero = 0;
2420   double mmult[2] = { 1.0, -1.0 };
2421   for (int iColumn = first; iColumn < last; iColumn++) {
2422     int iStat = status[iColumn] & 7;
2423     if (iStat != 1 && iStat != 5) {
2424       double value = 0.0;
2425       CoinBigIndex start = columnStart[iColumn];
2426       CoinBigIndex end = columnStart[iColumn + 1];
2427       int n = static_cast< int >(end - start);
2428       const int *COIN_RESTRICT rowThis = row + start;
2429       const double *COIN_RESTRICT elementThis = elementByColumn + start;
2430       for (int i = 0; i < n; i++) {
2431         int iRow = rowThis[i];
2432         value -= pi[iRow] * elementThis[i];
2433       }
2434       if (fabs(value) > zeroTolerance) {
2435         // and do other array
2436         double modification = 0.0;
2437         for (int i = 0; i < n; i++) {
2438           int iRow = rowThis[i];
2439           modification += piWeight[iRow] * elementThis[i];
2440         }
2441         double thisWeight = weights[iColumn];
2442         double oldWeight = thisWeight;
2443         double pivot = value * scaleFactor;
2444         double pivotSquared = pivot * pivot;
2445         thisWeight += pivotSquared * devex + pivot * modification;
2446         debug3(iColumn, thisWeight, pivotSquared, devex, pivot, modification, oldWeight);
2447         if (thisWeight < DEVEX_TRY_NORM) {
2448           if (referenceIn < 0.0) {
2449             // steepest
2450             thisWeight = CoinMax(DEVEX_TRY_NORM, DEVEX_ADD_ONE + pivotSquared);
2451           } else {
2452             // exact
2453             thisWeight = referenceIn * pivotSquared;
2454             if (reference(iColumn))
2455               thisWeight += 1.0;
2456             thisWeight = CoinMax(thisWeight, DEVEX_TRY_NORM);
2457           }
2458         }
2459         weights[iColumn] = thisWeight;
2460         if (!killDjs) {
2461           value = reducedCost[iColumn] - value;
2462           reducedCost[iColumn] = value;
2463           if (iStat > 1 && iStat < 4) {
2464             value *= mmult[iStat - 2];
2465             if (value <= dualTolerance)
2466               value = 0.0;
2467           } else {
2468             // free or super basic
2469             if (fabs(value) > FREE_ACCEPT * dualTolerance) {
2470               // we are going to bias towards free (but only if reasonable)
2471               value *= FREE_BIAS;
2472             } else {
2473               value = 0.0;
2474             }
2475           }
2476           if (value) {
2477             value *= value;
2478             // store square in list
2479             if (infeas[iColumn]) {
2480               infeas[iColumn] = value; // already there
2481             } else {
2482               array[numberNonZero] = value;
2483               index[numberNonZero++] = iColumn;
2484             }
2485           } else {
2486             array[numberNonZero] = 0.0;
2487             index[numberNonZero++] = iColumn;
2488           }
2489         }
2490       }
2491     }
2492   }
2493   info.numberAdded = numberNonZero;
2494 }
2495 static void
transposeTimes2ScaledBit(clpTempInfo & info)2496 transposeTimes2ScaledBit(clpTempInfo &info)
2497 {
2498   const CoinBigIndex *COIN_RESTRICT columnStart = info.start;
2499   const int *COIN_RESTRICT row = info.row;
2500   const double *COIN_RESTRICT elementByColumn = info.element;
2501   double *COIN_RESTRICT array = info.infeas;
2502   double *COIN_RESTRICT weights = const_cast< double * >(info.lower);
2503   int *COIN_RESTRICT index = info.which;
2504   unsigned int *COIN_RESTRICT reference = reinterpret_cast< unsigned int * >(info.index);
2505   const unsigned char *COIN_RESTRICT status = info.status;
2506   double zeroTolerance = info.tolerance;
2507   double dualTolerance = info.dualTolerance;
2508   double *COIN_RESTRICT reducedCost = info.reducedCost;
2509   double *COIN_RESTRICT infeas = const_cast< double * >(info.upper);
2510   const double *COIN_RESTRICT pi = info.work;
2511   const double *COIN_RESTRICT piWeight = info.cost;
2512   const double *COIN_RESTRICT columnScale = info.solution;
2513   double scaleFactor = info.primalRatio;
2514   double devex = info.changeObj;
2515   double referenceIn = info.theta;
2516   int first = info.startColumn;
2517   int last = first + info.numberToDo;
2518   int killDjs = info.numberInfeasibilities;
2519   int numberNonZero = 0;
2520   double mmult[2] = { 1.0, -1.0 };
2521   for (int iColumn = first; iColumn < last; iColumn++) {
2522     int iStat = status[iColumn] & 7;
2523     if (iStat != 1 && iStat != 5) {
2524       double value = 0.0;
2525       double scale = columnScale[iColumn];
2526       CoinBigIndex start = columnStart[iColumn];
2527       CoinBigIndex end = columnStart[iColumn + 1];
2528       int n = static_cast< int >(end - start);
2529       bool odd = (n & 1) != 0;
2530       n &= ~1;
2531       const int *COIN_RESTRICT rowThis = row + start;
2532       const double *COIN_RESTRICT elementThis = elementByColumn + start;
2533       for (int i = 0; i < n; i += 2) {
2534         int iRow0 = rowThis[i];
2535         int iRow1 = rowThis[i + 1];
2536         value -= pi[iRow0] * elementThis[i];
2537         value -= pi[iRow1] * elementThis[i + 1];
2538       }
2539       if (odd) {
2540         int iRow = rowThis[n];
2541         value -= pi[iRow] * elementThis[n];
2542       }
2543       value *= scale;
2544       if (fabs(value) > zeroTolerance) {
2545         // and do other array
2546         double modification = 0.0;
2547         for (int i = 0; i < n; i += 2) {
2548           int iRow0 = rowThis[i];
2549           int iRow1 = rowThis[i + 1];
2550           modification += piWeight[iRow0] * elementThis[i];
2551           modification += piWeight[iRow1] * elementThis[i + 1];
2552         }
2553         if (odd) {
2554           int iRow = rowThis[n];
2555           modification += piWeight[iRow] * elementThis[n];
2556         }
2557         modification *= scale;
2558         double thisWeight = weights[iColumn];
2559         double pivot = value * scaleFactor;
2560         double pivotSquared = pivot * pivot;
2561         thisWeight += pivotSquared * devex + pivot * modification;
2562         if (thisWeight < DEVEX_TRY_NORM) {
2563           if (referenceIn < 0.0) {
2564             // steepest
2565             thisWeight = CoinMax(DEVEX_TRY_NORM, DEVEX_ADD_ONE + pivotSquared);
2566           } else {
2567             // exact
2568             thisWeight = referenceIn * pivotSquared;
2569             if (reference(iColumn))
2570               thisWeight += 1.0;
2571             thisWeight = CoinMax(thisWeight, DEVEX_TRY_NORM);
2572           }
2573         }
2574         weights[iColumn] = thisWeight;
2575         if (!killDjs) {
2576           value = reducedCost[iColumn] - value;
2577           reducedCost[iColumn] = value;
2578           if (iStat > 1 && iStat < 4) {
2579             value *= mmult[iStat - 2];
2580             if (value <= dualTolerance)
2581               value = 0.0;
2582           } else {
2583             // free or super basic
2584             if (fabs(value) > FREE_ACCEPT * dualTolerance) {
2585               // we are going to bias towards free (but only if reasonable)
2586               value *= FREE_BIAS;
2587             } else {
2588               value = 0.0;
2589             }
2590           }
2591           if (value) {
2592             value *= value;
2593             // store square in list
2594             if (infeas[iColumn]) {
2595               infeas[iColumn] = value; // already there
2596             } else {
2597               array[numberNonZero] = value;
2598               index[numberNonZero++] = iColumn;
2599             }
2600           } else {
2601             array[numberNonZero] = 0.0;
2602             index[numberNonZero++] = iColumn;
2603           }
2604         }
2605       }
2606     }
2607   }
2608   info.numberAdded = numberNonZero;
2609 }
2610 #endif
2611 /* Updates two arrays for steepest and does devex weights
2612    Returns nonzero if updates reduced cost and infeas -
2613    new infeas in dj1 */
2614 // Updates two arrays for steepest
transposeTimes2(const ClpSimplex * model,const CoinIndexedVector * pi1,CoinIndexedVector * dj1,const CoinIndexedVector * pi2,CoinIndexedVector * spare,double * COIN_RESTRICT infeas,double * COIN_RESTRICT reducedCost,double referenceIn,double devex,unsigned int * COIN_RESTRICT reference,double * COIN_RESTRICT weights,double scaleFactor)2615 int ClpPackedMatrix::transposeTimes2(const ClpSimplex *model,
2616   const CoinIndexedVector *pi1, CoinIndexedVector *dj1,
2617   const CoinIndexedVector *pi2,
2618   CoinIndexedVector *spare,
2619   double *COIN_RESTRICT infeas,
2620   double *COIN_RESTRICT reducedCost,
2621   double referenceIn, double devex,
2622   // Array for exact devex to say what is in reference framework
2623   unsigned int *COIN_RESTRICT reference,
2624   double *COIN_RESTRICT weights, double scaleFactor)
2625 {
2626   int returnCode = 0;
2627   // put row of tableau in dj1
2628   double *COIN_RESTRICT pi = pi1->denseVector();
2629   int numberNonZero = 0;
2630   int *COIN_RESTRICT index = dj1->getIndices();
2631   double *COIN_RESTRICT array = dj1->denseVector();
2632   int numberInRowArray = pi1->getNumElements();
2633   double zeroTolerance = model->zeroTolerance();
2634   double dualTolerance = model->currentDualTolerance();
2635   // we can't really trust infeasibilities if there is dual error
2636   // this coding has to mimic coding in checkDualSolution
2637   double error = CoinMin(1.0e-2, model->largestDualError());
2638   // allow tolerance at least slightly bigger than standard
2639   dualTolerance = dualTolerance + error;
2640   bool packed = pi1->packedMode();
2641   // do by column
2642   int iColumn;
2643   // get matrix data pointers
2644   const int *COIN_RESTRICT row = matrix_->getIndices();
2645   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
2646   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
2647   const double *COIN_RESTRICT rowScale = model->rowScale();
2648   assert(!spare->getNumElements());
2649   assert(numberActiveColumns_ > 0);
2650   double *COIN_RESTRICT piWeight = pi2->denseVector();
2651   assert(!pi2->packedMode());
2652   bool killDjs = (scaleFactor == 0.0);
2653   if (!scaleFactor)
2654     scaleFactor = 1.0;
2655   if (packed) {
2656     // need to expand pi into y
2657     assert(spare->capacity() >= model->numberRows());
2658     double *COIN_RESTRICT piOld = pi;
2659     pi = spare->denseVector();
2660     const int *COIN_RESTRICT whichRow = pi1->getIndices();
2661     int i;
2662     ClpPackedMatrix *scaledMatrix = model->clpScaledMatrix();
2663     if (rowScale && scaledMatrix) {
2664       rowScale = NULL;
2665       // get matrix data pointers
2666       row = scaledMatrix->getIndices();
2667       columnStart = scaledMatrix->getVectorStarts();
2668       elementByColumn = scaledMatrix->getElements();
2669     }
2670     if (!rowScale) {
2671       // modify pi so can collapse to one loop
2672       for (i = 0; i < numberInRowArray; i++) {
2673         int iRow = whichRow[i];
2674         pi[iRow] = piOld[i];
2675       }
2676       if (!columnCopy_ || killDjs) {
2677         if (infeas)
2678           returnCode = 1;
2679 #if ABOCA_LITE
2680         int numberThreads = abcState();
2681         if (numberThreads) {
2682           clpTempInfo info[ABOCA_LITE];
2683           int chunk = (numberActiveColumns_ + numberThreads - 1) / numberThreads;
2684           int n = 0;
2685           for (int i = 0; i < numberThreads; i++) {
2686             info[i].theta = referenceIn;
2687             info[i].changeObj = devex;
2688             info[i].primalRatio = scaleFactor;
2689             info[i].lower = weights;
2690             info[i].reducedCost = reducedCost;
2691             info[i].upper = infeas;
2692             info[i].which = index + n;
2693             info[i].infeas = array + n;
2694             info[i].index = reinterpret_cast< int * >(reference);
2695             info[i].work = const_cast< double * >(pi);
2696             info[i].cost = const_cast< double * >(piWeight);
2697             info[i].status = const_cast< unsigned char * >(model->statusArray());
2698             info[i].startColumn = n;
2699             info[i].element = elementByColumn;
2700             info[i].start = columnStart;
2701             info[i].row = row;
2702             info[i].numberToDo = CoinMin(chunk, numberActiveColumns_ - n);
2703             info[i].tolerance = zeroTolerance;
2704             info[i].dualTolerance = dualTolerance;
2705             info[i].numberInfeasibilities = killDjs ? 1 : 0;
2706             n += chunk;
2707           }
2708           for (int i = 0; i < numberThreads; i++) {
2709             cilk_spawn transposeTimes2UnscaledBit(info[i]);
2710           }
2711           cilk_sync;
2712           for (int i = 0; i < numberThreads; i++) {
2713             numberNonZero += info[i].numberAdded;
2714           }
2715           moveAndZero(info, 2, NULL);
2716         } else {
2717 #endif
2718           CoinBigIndex j;
2719           CoinBigIndex end = columnStart[0];
2720           for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
2721             CoinBigIndex start = end;
2722             end = columnStart[iColumn + 1];
2723             ClpSimplex::Status status = model->getStatus(iColumn);
2724             if (status == ClpSimplex::basic || status == ClpSimplex::isFixed)
2725               continue;
2726             double value = 0.0;
2727             for (j = start; j < end; j++) {
2728               int iRow = row[j];
2729               value -= pi[iRow] * elementByColumn[j];
2730             }
2731             if (fabs(value) > zeroTolerance) {
2732               // and do other array
2733               double modification = 0.0;
2734               for (j = start; j < end; j++) {
2735                 int iRow = row[j];
2736                 modification += piWeight[iRow] * elementByColumn[j];
2737               }
2738               double thisWeight = weights[iColumn];
2739               double oldWeight = thisWeight;
2740               double pivot = value * scaleFactor;
2741               double pivotSquared = pivot * pivot;
2742               thisWeight += pivotSquared * devex + pivot * modification;
2743               debug3(iColumn, thisWeight, pivotSquared, devex, pivot, modification, oldWeight);
2744               if (thisWeight < DEVEX_TRY_NORM) {
2745                 if (referenceIn < 0.0) {
2746                   // steepest
2747                   thisWeight = CoinMax(DEVEX_TRY_NORM, DEVEX_ADD_ONE + pivotSquared);
2748                 } else {
2749                   // exact
2750                   thisWeight = referenceIn * pivotSquared;
2751                   if (reference(iColumn))
2752                     thisWeight += 1.0;
2753                   thisWeight = CoinMax(thisWeight, DEVEX_TRY_NORM);
2754                 }
2755               }
2756               weights[iColumn] = thisWeight;
2757               if (!killDjs) {
2758                 value = reducedCost[iColumn] - value;
2759                 reducedCost[iColumn] = value;
2760                 // simplify status
2761                 ClpSimplex::Status status = model->getStatus(iColumn);
2762 
2763                 switch (status) {
2764 
2765                 case ClpSimplex::basic:
2766                 case ClpSimplex::isFixed:
2767                   break;
2768                 case ClpSimplex::isFree:
2769                 case ClpSimplex::superBasic:
2770                   if (fabs(value) > FREE_ACCEPT * dualTolerance) {
2771                     // we are going to bias towards free (but only if reasonable)
2772                     value *= FREE_BIAS;
2773                     value *= value;
2774                     // store square in list
2775                     if (infeas[iColumn]) {
2776                       infeas[iColumn] = value; // already there
2777                     } else {
2778                       array[numberNonZero] = value;
2779                       index[numberNonZero++] = iColumn;
2780                     }
2781                   } else {
2782                     array[numberNonZero] = 0.0;
2783                     index[numberNonZero++] = iColumn;
2784                   }
2785                   break;
2786                 case ClpSimplex::atUpperBound:
2787                   if (value > dualTolerance) {
2788                     value *= value;
2789                     // store square in list
2790                     if (infeas[iColumn]) {
2791                       infeas[iColumn] = value; // already there
2792                     } else {
2793                       array[numberNonZero] = value;
2794                       index[numberNonZero++] = iColumn;
2795                     }
2796                   } else {
2797                     array[numberNonZero] = 0.0;
2798                     index[numberNonZero++] = iColumn;
2799                   }
2800                   break;
2801                 case ClpSimplex::atLowerBound:
2802                   if (value < -dualTolerance) {
2803                     value *= value;
2804                     // store square in list
2805                     if (infeas[iColumn]) {
2806                       infeas[iColumn] = value; // already there
2807                     } else {
2808                       array[numberNonZero] = value;
2809                       index[numberNonZero++] = iColumn;
2810                     }
2811                   } else {
2812                     array[numberNonZero] = 0.0;
2813                     index[numberNonZero++] = iColumn;
2814                   }
2815                 }
2816               }
2817             }
2818           }
2819 #if ABOCA_LITE
2820         }
2821 #endif
2822       } else {
2823         // use special column copy
2824         // reset back
2825         assert(!killDjs);
2826         //if (killDjs)
2827         //   scaleFactor = 0.0;
2828         if (infeas)
2829           returnCode = 1;
2830         columnCopy_->transposeTimes2(model, pi, dj1, piWeight,
2831           infeas, reducedCost,
2832           referenceIn, devex,
2833           reference, weights, scaleFactor);
2834         numberNonZero = dj1->getNumElements();
2835       }
2836     } else {
2837       // scaled
2838       // modify pi so can collapse to one loop
2839       for (i = 0; i < numberInRowArray; i++) {
2840         int iRow = whichRow[i];
2841         pi[iRow] = piOld[i] * rowScale[iRow];
2842       }
2843       // can also scale piWeight as not used again
2844       int numberWeight = pi2->getNumElements();
2845       const int *indexWeight = pi2->getIndices();
2846       for (i = 0; i < numberWeight; i++) {
2847         int iRow = indexWeight[i];
2848         piWeight[iRow] *= rowScale[iRow];
2849       }
2850       if (!columnCopy_ || killDjs) {
2851         if (infeas)
2852           returnCode = 1;
2853         const double *COIN_RESTRICT columnScale = model->columnScale();
2854 #if ABOCA_LITE
2855         int numberThreads = abcState();
2856         if (numberThreads) {
2857           clpTempInfo info[ABOCA_LITE];
2858           int chunk = (numberActiveColumns_ + numberThreads - 1) / numberThreads;
2859           int n = 0;
2860           for (int i = 0; i < numberThreads; i++) {
2861             info[i].theta = referenceIn;
2862             info[i].changeObj = devex;
2863             info[i].primalRatio = scaleFactor;
2864             info[i].lower = weights;
2865             info[i].reducedCost = reducedCost;
2866             info[i].upper = infeas;
2867             info[i].solution = const_cast< double * >(columnScale);
2868             info[i].which = index + n;
2869             info[i].infeas = array + n;
2870             info[i].index = reinterpret_cast< int * >(reference);
2871             info[i].work = const_cast< double * >(pi);
2872             info[i].cost = const_cast< double * >(piWeight);
2873             info[i].status = const_cast< unsigned char * >(model->statusArray());
2874             info[i].startColumn = n;
2875             info[i].element = elementByColumn;
2876             info[i].start = columnStart;
2877             info[i].row = row;
2878             info[i].numberToDo = CoinMin(chunk, numberActiveColumns_ - n);
2879             info[i].tolerance = zeroTolerance;
2880             info[i].dualTolerance = dualTolerance;
2881             info[i].numberInfeasibilities = killDjs ? 1 : 0;
2882             n += chunk;
2883           }
2884           for (int i = 0; i < numberThreads; i++) {
2885             cilk_spawn transposeTimes2ScaledBit(info[i]);
2886           }
2887           cilk_sync;
2888           for (int i = 0; i < numberThreads; i++) {
2889             numberNonZero += info[i].numberAdded;
2890           }
2891           moveAndZero(info, 2, NULL);
2892           if (infeas) {
2893             returnCode = 1;
2894             dj1->setNumElements(numberNonZero);
2895           }
2896         } else {
2897 #endif
2898           CoinBigIndex j;
2899           CoinBigIndex end = columnStart[0];
2900           for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
2901             CoinBigIndex start = end;
2902             end = columnStart[iColumn + 1];
2903             ClpSimplex::Status status = model->getStatus(iColumn);
2904             if (status == ClpSimplex::basic || status == ClpSimplex::isFixed)
2905               continue;
2906             double scale = columnScale[iColumn];
2907             double value = 0.0;
2908             for (j = start; j < end; j++) {
2909               int iRow = row[j];
2910               value -= pi[iRow] * elementByColumn[j];
2911             }
2912             value *= scale;
2913             if (fabs(value) > zeroTolerance) {
2914               double modification = 0.0;
2915               for (j = start; j < end; j++) {
2916                 int iRow = row[j];
2917                 modification += piWeight[iRow] * elementByColumn[j];
2918               }
2919               modification *= scale;
2920               double thisWeight = weights[iColumn];
2921               double pivot = value * scaleFactor;
2922               double pivotSquared = pivot * pivot;
2923               thisWeight += pivotSquared * devex + pivot * modification;
2924               if (thisWeight < DEVEX_TRY_NORM) {
2925                 if (referenceIn < 0.0) {
2926                   // steepest
2927                   thisWeight = CoinMax(DEVEX_TRY_NORM, DEVEX_ADD_ONE + pivotSquared);
2928                 } else {
2929                   // exact
2930                   thisWeight = referenceIn * pivotSquared;
2931                   if (reference(iColumn))
2932                     thisWeight += 1.0;
2933                   thisWeight = CoinMax(thisWeight, DEVEX_TRY_NORM);
2934                 }
2935               }
2936               weights[iColumn] = thisWeight;
2937               if (!killDjs) {
2938                 value = reducedCost[iColumn] - value;
2939                 reducedCost[iColumn] = value;
2940                 // simplify status
2941                 ClpSimplex::Status status = model->getStatus(iColumn);
2942 
2943                 switch (status) {
2944 
2945                 case ClpSimplex::basic:
2946                 case ClpSimplex::isFixed:
2947                   break;
2948                 case ClpSimplex::isFree:
2949                 case ClpSimplex::superBasic:
2950                   if (fabs(value) > FREE_ACCEPT * dualTolerance) {
2951                     // we are going to bias towards free (but only if reasonable)
2952                     value *= FREE_BIAS;
2953                     value *= value;
2954                     // store square in list
2955                     if (infeas[iColumn]) {
2956                       infeas[iColumn] = value; // already there
2957                     } else {
2958                       array[numberNonZero] = value;
2959                       index[numberNonZero++] = iColumn;
2960                     }
2961                   } else {
2962                     array[numberNonZero] = 0.0;
2963                     index[numberNonZero++] = iColumn;
2964                   }
2965                   break;
2966                 case ClpSimplex::atUpperBound:
2967                   if (value > dualTolerance) {
2968                     value *= value;
2969                     // store square in list
2970                     if (infeas[iColumn]) {
2971                       infeas[iColumn] = value; // already there
2972                     } else {
2973                       array[numberNonZero] = value;
2974                       index[numberNonZero++] = iColumn;
2975                     }
2976                   } else {
2977                     array[numberNonZero] = 0.0;
2978                     index[numberNonZero++] = iColumn;
2979                   }
2980                   break;
2981                 case ClpSimplex::atLowerBound:
2982                   if (value < -dualTolerance) {
2983                     value *= value;
2984                     // store square in list
2985                     if (infeas[iColumn]) {
2986                       infeas[iColumn] = value; // already there
2987                     } else {
2988                       array[numberNonZero] = value;
2989                       index[numberNonZero++] = iColumn;
2990                     }
2991                   } else {
2992                     array[numberNonZero] = 0.0;
2993                     index[numberNonZero++] = iColumn;
2994                   }
2995                 }
2996               }
2997             }
2998           }
2999 #if ABOCA_LITE
3000         }
3001 #endif
3002         if (infeas && false) {
3003           double tolerance = model->currentDualTolerance();
3004           // we can't really trust infeasibilities if there is dual error
3005           // this coding has to mimic coding in checkDualSolution
3006           double error = CoinMin(1.0e-2, model->largestDualError());
3007           // allow tolerance at least slightly bigger than standard
3008           tolerance = tolerance + error;
3009           returnCode = 1;
3010           int number = 0;
3011           for (int j = 0; j < numberNonZero; j++) {
3012             int iSequence = index[j];
3013             double value = reducedCost[iSequence];
3014             double value2 = array[j];
3015             array[j] = 0.0;
3016             value -= value2;
3017             reducedCost[iSequence] = value;
3018             // simplify status
3019             ClpSimplex::Status status = model->getStatus(iSequence);
3020 
3021             switch (status) {
3022 
3023             case ClpSimplex::basic:
3024             case ClpSimplex::isFixed:
3025               break;
3026             case ClpSimplex::isFree:
3027             case ClpSimplex::superBasic:
3028               if (fabs(value) > FREE_ACCEPT * tolerance) {
3029                 // we are going to bias towards free (but only if reasonable)
3030                 value *= FREE_BIAS;
3031                 value *= value;
3032                 // store square in list
3033                 if (infeas[iSequence]) {
3034                   infeas[iSequence] = value; // already there
3035                 } else {
3036                   array[number] = value;
3037                   index[number++] = iSequence;
3038                 }
3039               } else {
3040                 array[number] = 0.0;
3041                 index[number++] = iSequence;
3042               }
3043               break;
3044             case ClpSimplex::atUpperBound:
3045               if (value > tolerance) {
3046                 value *= value;
3047                 // store square in list
3048                 if (infeas[iSequence]) {
3049                   infeas[iSequence] = value; // already there
3050                 } else {
3051                   array[number] = value;
3052                   index[number++] = iSequence;
3053                 }
3054               } else {
3055                 array[number] = 0.0;
3056                 index[number++] = iSequence;
3057               }
3058               break;
3059             case ClpSimplex::atLowerBound:
3060               if (value < -tolerance) {
3061                 value *= value;
3062                 // store square in list
3063                 if (infeas[iSequence]) {
3064                   infeas[iSequence] = value; // already there
3065                 } else {
3066                   array[number] = value;
3067                   index[number++] = iSequence;
3068                 }
3069               } else {
3070                 array[number] = 0.0;
3071                 index[number++] = iSequence;
3072               }
3073             }
3074           }
3075           dj1->setNumElements(number);
3076         }
3077       } else {
3078         // use special column copy
3079         // reset back
3080         assert(!killDjs);
3081         //if (killDjs)
3082         //   scaleFactor = 0.0;
3083         if (infeas)
3084           returnCode = 1;
3085         columnCopy_->transposeTimes2(model, pi, dj1, piWeight,
3086           infeas, reducedCost,
3087           referenceIn, devex,
3088           reference, weights, scaleFactor);
3089         numberNonZero = dj1->getNumElements();
3090       }
3091     }
3092     // zero out
3093     int numberRows = model->numberRows();
3094     if (numberInRowArray * 4 < numberRows) {
3095       for (i = 0; i < numberInRowArray; i++) {
3096         int iRow = whichRow[i];
3097         pi[iRow] = 0.0;
3098       }
3099     } else {
3100       CoinZeroN(pi, numberRows);
3101     }
3102   } else {
3103     if (!rowScale) {
3104       CoinBigIndex j;
3105       CoinBigIndex end = columnStart[0];
3106       for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
3107         CoinBigIndex start = end;
3108         end = columnStart[iColumn + 1];
3109         ClpSimplex::Status status = model->getStatus(iColumn);
3110         if (status == ClpSimplex::basic || status == ClpSimplex::isFixed)
3111           continue;
3112         double value = 0.0;
3113         for (j = start; j < end; j++) {
3114           int iRow = row[j];
3115           value -= pi[iRow] * elementByColumn[j];
3116         }
3117         if (fabs(value) > zeroTolerance) {
3118           // and do other array
3119           double modification = 0.0;
3120           for (j = start; j < end; j++) {
3121             int iRow = row[j];
3122             modification += piWeight[iRow] * elementByColumn[j];
3123           }
3124           double thisWeight = weights[iColumn];
3125           double pivot = value * scaleFactor;
3126           double pivotSquared = pivot * pivot;
3127           thisWeight += pivotSquared * devex + pivot * modification;
3128           if (thisWeight < DEVEX_TRY_NORM) {
3129             if (referenceIn < 0.0) {
3130               // steepest
3131               thisWeight = CoinMax(DEVEX_TRY_NORM, DEVEX_ADD_ONE + pivotSquared);
3132             } else {
3133               // exact
3134               thisWeight = referenceIn * pivotSquared;
3135               if (reference(iColumn))
3136                 thisWeight += 1.0;
3137               thisWeight = CoinMax(thisWeight, DEVEX_TRY_NORM);
3138             }
3139           }
3140           weights[iColumn] = thisWeight;
3141           if (!killDjs) {
3142             array[iColumn] = value;
3143             index[numberNonZero++] = iColumn;
3144           }
3145         }
3146       }
3147     } else {
3148 #ifdef CLP_INVESTIGATE
3149       if (model->clpScaledMatrix())
3150         printf("scaledMatrix_ at %d of ClpPackedMatrix\n", __LINE__);
3151 #endif
3152       // scaled
3153       // can also scale piWeight as not used again
3154       int numberWeight = pi2->getNumElements();
3155       const int *COIN_RESTRICT indexWeight = pi2->getIndices();
3156       for (int i = 0; i < numberWeight; i++) {
3157         int iRow = indexWeight[i];
3158         piWeight[iRow] *= rowScale[iRow];
3159       }
3160       const double *COIN_RESTRICT columnScale = model->columnScale();
3161       CoinBigIndex j;
3162       CoinBigIndex end = columnStart[0];
3163       for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
3164         CoinBigIndex start = end;
3165         end = columnStart[iColumn + 1];
3166         ClpSimplex::Status status = model->getStatus(iColumn);
3167         if (status == ClpSimplex::basic || status == ClpSimplex::isFixed)
3168           continue;
3169         double scale = columnScale[iColumn];
3170         double value = 0.0;
3171         for (j = start; j < end; j++) {
3172           int iRow = row[j];
3173           value -= pi[iRow] * elementByColumn[j] * rowScale[iRow];
3174         }
3175         value *= scale;
3176         if (fabs(value) > zeroTolerance) {
3177           double modification = 0.0;
3178           for (j = start; j < end; j++) {
3179             int iRow = row[j];
3180             modification += piWeight[iRow] * elementByColumn[j];
3181           }
3182           modification *= scale;
3183           double thisWeight = weights[iColumn];
3184           double pivot = value * scaleFactor;
3185           double pivotSquared = pivot * pivot;
3186           thisWeight += pivotSquared * devex + pivot * modification;
3187           if (thisWeight < DEVEX_TRY_NORM) {
3188             if (referenceIn < 0.0) {
3189               // steepest
3190               thisWeight = CoinMax(DEVEX_TRY_NORM, DEVEX_ADD_ONE + pivotSquared);
3191             } else {
3192               // exact
3193               thisWeight = referenceIn * pivotSquared;
3194               if (reference(iColumn))
3195                 thisWeight += 1.0;
3196               thisWeight = CoinMax(thisWeight, DEVEX_TRY_NORM);
3197             }
3198           }
3199           weights[iColumn] = thisWeight;
3200           if (!killDjs) {
3201             array[iColumn] = value;
3202             index[numberNonZero++] = iColumn;
3203           }
3204         }
3205       }
3206     }
3207   }
3208   dj1->setNumElements(numberNonZero);
3209   spare->setNumElements(0);
3210   if (packed)
3211     dj1->setPackedMode(true);
3212   return returnCode;
3213 }
3214 // Updates second array for steepest and does devex weights
subsetTimes2(const ClpSimplex * model,CoinIndexedVector * dj1,const CoinIndexedVector * pi2,CoinIndexedVector *,double referenceIn,double devex,unsigned int * COIN_RESTRICT reference,double * COIN_RESTRICT weights,double scaleFactor)3215 void ClpPackedMatrix::subsetTimes2(const ClpSimplex *model,
3216   CoinIndexedVector *dj1,
3217   const CoinIndexedVector *pi2, CoinIndexedVector *,
3218   double referenceIn, double devex,
3219   // Array for exact devex to say what is in reference framework
3220   unsigned int *COIN_RESTRICT reference,
3221   double *COIN_RESTRICT weights, double scaleFactor)
3222 {
3223   int number = dj1->getNumElements();
3224   const int *COIN_RESTRICT index = dj1->getIndices();
3225   double *COIN_RESTRICT array = dj1->denseVector();
3226   assert(dj1->packedMode());
3227 
3228   // get matrix data pointers
3229   const int *COIN_RESTRICT row = matrix_->getIndices();
3230   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
3231   const int *COIN_RESTRICT columnLength = matrix_->getVectorLengths();
3232   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
3233   const double *COIN_RESTRICT rowScale = model->rowScale();
3234   double *COIN_RESTRICT piWeight = pi2->denseVector();
3235   bool killDjs = (scaleFactor == 0.0);
3236   if (!scaleFactor)
3237     scaleFactor = 1.0;
3238   if (!rowScale) {
3239     for (int k = 0; k < number; k++) {
3240       int iColumn = index[k];
3241       double pivot = array[k] * scaleFactor;
3242       if (killDjs)
3243         array[k] = 0.0;
3244       // and do other array
3245       double modification = 0.0;
3246       for (CoinBigIndex j = columnStart[iColumn];
3247            j < columnStart[iColumn] + columnLength[iColumn]; j++) {
3248         int iRow = row[j];
3249         modification += piWeight[iRow] * elementByColumn[j];
3250       }
3251       double thisWeight = weights[iColumn];
3252       double pivotSquared = pivot * pivot;
3253       thisWeight += pivotSquared * devex + pivot * modification;
3254       if (thisWeight < DEVEX_TRY_NORM) {
3255         if (referenceIn < 0.0) {
3256           // steepest
3257           thisWeight = CoinMax(DEVEX_TRY_NORM, DEVEX_ADD_ONE + pivotSquared);
3258         } else {
3259           // exact
3260           thisWeight = referenceIn * pivotSquared;
3261           if (reference(iColumn))
3262             thisWeight += 1.0;
3263           thisWeight = CoinMax(thisWeight, DEVEX_TRY_NORM);
3264         }
3265       }
3266       weights[iColumn] = thisWeight;
3267     }
3268   } else {
3269 #ifdef CLP_INVESTIGATE
3270     if (model->clpScaledMatrix())
3271       printf("scaledMatrix_ at %d of ClpPackedMatrix\n", __LINE__);
3272 #endif
3273     // scaled
3274     const double *columnScale = model->columnScale();
3275     for (int k = 0; k < number; k++) {
3276       int iColumn = index[k];
3277       double pivot = array[k] * scaleFactor;
3278       double scale = columnScale[iColumn];
3279       if (killDjs)
3280         array[k] = 0.0;
3281       // and do other array
3282       double modification = 0.0;
3283       for (CoinBigIndex j = columnStart[iColumn];
3284            j < columnStart[iColumn] + columnLength[iColumn]; j++) {
3285         int iRow = row[j];
3286         modification += piWeight[iRow] * elementByColumn[j] * rowScale[iRow];
3287       }
3288       double thisWeight = weights[iColumn];
3289       modification *= scale;
3290       double pivotSquared = pivot * pivot;
3291       thisWeight += pivotSquared * devex + pivot * modification;
3292       if (thisWeight < DEVEX_TRY_NORM) {
3293         if (referenceIn < 0.0) {
3294           // steepest
3295           thisWeight = CoinMax(DEVEX_TRY_NORM, DEVEX_ADD_ONE + pivotSquared);
3296         } else {
3297           // exact
3298           thisWeight = referenceIn * pivotSquared;
3299           if (reference(iColumn))
3300             thisWeight += 1.0;
3301           thisWeight = CoinMax(thisWeight, DEVEX_TRY_NORM);
3302         }
3303       }
3304       weights[iColumn] = thisWeight;
3305     }
3306   }
3307 }
3308 /// returns number of elements in column part of basis,
countBasis(const int * whichColumn,int & numberColumnBasic)3309 int ClpPackedMatrix::countBasis(const int *whichColumn,
3310   int &numberColumnBasic)
3311 {
3312   const int *columnLength = matrix_->getVectorLengths();
3313   int i;
3314   CoinBigIndex numberElements = 0;
3315   // just count - can be over so ignore zero problem
3316   for (i = 0; i < numberColumnBasic; i++) {
3317     int iColumn = whichColumn[i];
3318     numberElements += columnLength[iColumn];
3319   }
3320 #if COIN_BIG_INDEX
3321   if (numberElements > COIN_INT_MAX) {
3322     printf("Factorization too large\n");
3323     abort();
3324   }
3325 #endif
3326   return static_cast< int >(numberElements);
3327 }
fillBasis(ClpSimplex * model,const int * COIN_RESTRICT whichColumn,int & numberColumnBasic,int * COIN_RESTRICT indexRowU,int * COIN_RESTRICT start,int * COIN_RESTRICT rowCount,int * COIN_RESTRICT columnCount,CoinFactorizationDouble * COIN_RESTRICT elementU)3328 void ClpPackedMatrix::fillBasis(ClpSimplex *model,
3329   const int *COIN_RESTRICT whichColumn,
3330   int &numberColumnBasic,
3331   int *COIN_RESTRICT indexRowU,
3332   int *COIN_RESTRICT start,
3333   int *COIN_RESTRICT rowCount,
3334   int *COIN_RESTRICT columnCount,
3335   CoinFactorizationDouble *COIN_RESTRICT elementU)
3336 {
3337   const int *COIN_RESTRICT columnLength = matrix_->getVectorLengths();
3338   int i;
3339   CoinBigIndex numberElements = start[0];
3340   // fill
3341   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
3342   const double *COIN_RESTRICT rowScale = model->rowScale();
3343   const int *COIN_RESTRICT row = matrix_->getIndices();
3344   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
3345   ClpPackedMatrix *scaledMatrix = model->clpScaledMatrix();
3346   if (scaledMatrix && true) {
3347     columnLength = scaledMatrix->matrix_->getVectorLengths();
3348     columnStart = scaledMatrix->matrix_->getVectorStarts();
3349     rowScale = NULL;
3350     row = scaledMatrix->matrix_->getIndices();
3351     elementByColumn = scaledMatrix->matrix_->getElements();
3352   }
3353   if ((flags_ & 1) == 0) {
3354     if (!rowScale) {
3355       // no scaling
3356       for (i = 0; i < numberColumnBasic; i++) {
3357         int iColumn = whichColumn[i];
3358         int length = columnLength[iColumn];
3359         CoinBigIndex startThis = columnStart[iColumn];
3360         columnCount[i] = length;
3361         CoinBigIndex endThis = startThis + length;
3362         for (CoinBigIndex j = startThis; j < endThis; j++) {
3363           int iRow = row[j];
3364           indexRowU[numberElements] = iRow;
3365           rowCount[iRow]++;
3366           assert(elementByColumn[j]);
3367           elementU[numberElements++] = elementByColumn[j];
3368         }
3369         start[i + 1] = static_cast< int >(numberElements);
3370       }
3371     } else {
3372       // scaling
3373       const double *COIN_RESTRICT columnScale = model->columnScale();
3374       for (i = 0; i < numberColumnBasic; i++) {
3375         int iColumn = whichColumn[i];
3376         double scale = columnScale[iColumn];
3377         int length = columnLength[iColumn];
3378         CoinBigIndex startThis = columnStart[iColumn];
3379         columnCount[i] = length;
3380         CoinBigIndex endThis = startThis + length;
3381         for (CoinBigIndex j = startThis; j < endThis; j++) {
3382           int iRow = row[j];
3383           indexRowU[numberElements] = iRow;
3384           rowCount[iRow]++;
3385           assert(elementByColumn[j]);
3386           elementU[numberElements++] = elementByColumn[j] * scale * rowScale[iRow];
3387         }
3388         start[i + 1] = static_cast< int >(numberElements);
3389       }
3390     }
3391   } else {
3392     // there are zero elements so need to look more closely
3393     if (!rowScale) {
3394       // no scaling
3395       for (i = 0; i < numberColumnBasic; i++) {
3396         int iColumn = whichColumn[i];
3397         CoinBigIndex j;
3398         for (j = columnStart[iColumn];
3399              j < columnStart[iColumn] + columnLength[iColumn]; j++) {
3400           double value = elementByColumn[j];
3401           if (value) {
3402             int iRow = row[j];
3403             indexRowU[numberElements] = iRow;
3404             rowCount[iRow]++;
3405             elementU[numberElements++] = value;
3406           }
3407         }
3408         start[i + 1] = static_cast< int >(numberElements);
3409         columnCount[i] = static_cast< int >(numberElements) - start[i];
3410       }
3411     } else {
3412       // scaling
3413       const double *columnScale = model->columnScale();
3414       for (i = 0; i < numberColumnBasic; i++) {
3415         int iColumn = whichColumn[i];
3416         CoinBigIndex j;
3417         double scale = columnScale[iColumn];
3418         for (j = columnStart[iColumn];
3419              j < columnStart[iColumn] + columnLength[i]; j++) {
3420           double value = elementByColumn[j];
3421           if (value) {
3422             int iRow = row[j];
3423             indexRowU[numberElements] = iRow;
3424             rowCount[iRow]++;
3425             elementU[numberElements++] = value * scale * rowScale[iRow];
3426           }
3427         }
3428         start[i + 1] = static_cast< int >(numberElements);
3429         columnCount[i] = static_cast< int >(numberElements) - start[i];
3430       }
3431     }
3432   }
3433 #if COIN_BIG_INDEX
3434   if (numberElements > COIN_INT_MAX)
3435     abort();
3436 #endif
3437 }
3438 #if 0
3439 int
3440 ClpPackedMatrix::scale2(ClpModel * model) const
3441 {
3442      ClpSimplex * baseModel = NULL;
3443 #ifndef NDEBUG
3444      //checkFlags();
3445 #endif
3446      int numberRows = model->numberRows();
3447      int numberColumns = matrix_->getNumCols();
3448      model->setClpScaledMatrix(NULL); // get rid of any scaled matrix
3449      // If empty - return as sanityCheck will trap
3450      if (!numberRows || !numberColumns) {
3451           model->setRowScale(NULL);
3452           model->setColumnScale(NULL);
3453           return 1;
3454      }
3455      ClpMatrixBase * rowCopyBase = model->rowCopy();
3456      double * rowScale;
3457      double * columnScale;
3458      //assert (!model->rowScale());
3459      bool arraysExist;
3460      double * inverseRowScale = NULL;
3461      double * inverseColumnScale = NULL;
3462      if (!model->rowScale()) {
3463           rowScale = new double [numberRows*2];
3464           columnScale = new double [numberColumns*2];
3465           inverseRowScale = rowScale + numberRows;
3466           inverseColumnScale = columnScale + numberColumns;
3467           arraysExist = false;
3468      } else {
3469           rowScale = model->mutableRowScale();
3470           columnScale = model->mutableColumnScale();
3471           inverseRowScale = model->mutableInverseRowScale();
3472           inverseColumnScale = model->mutableInverseColumnScale();
3473           arraysExist = true;
3474      }
3475      assert (inverseRowScale == rowScale + numberRows);
3476      assert (inverseColumnScale == columnScale + numberColumns);
3477      // we are going to mark bits we are interested in
3478      char * usefulRow = new char [numberRows];
3479      char * usefulColumn = new char [numberColumns];
3480      double * rowLower = model->rowLower();
3481      double * rowUpper = model->rowUpper();
3482      double * columnLower = model->columnLower();
3483      double * columnUpper = model->columnUpper();
3484      int iColumn, iRow;
3485      //#define LEAVE_FIXED
3486      // mark free rows
3487      for (iRow = 0; iRow < numberRows; iRow++) {
3488 #if 0 //ndef LEAVE_FIXED
3489           if (rowUpper[iRow] < 1.0e20 ||
3490                     rowLower[iRow] > -1.0e20)
3491                usefulRow[iRow] = 1;
3492           else
3493                usefulRow[iRow] = 0;
3494 #else
3495           usefulRow[iRow] = 1;
3496 #endif
3497      }
3498      // mark empty and fixed columns
3499      // also see if worth scaling
3500      assert (model->scalingFlag() <= 4);
3501      //  scale_stats[model->scalingFlag()]++;
3502      double largest = 0.0;
3503      double smallest = 1.0e50;
3504      // get matrix data pointers
3505      int * row = matrix_->getMutableIndices();
3506      const CoinBigIndex * columnStart = matrix_->getVectorStarts();
3507      int * columnLength = matrix_->getMutableVectorLengths();
3508      double * elementByColumn = matrix_->getMutableElements();
3509      bool deletedElements = false;
3510      for (iColumn = 0; iColumn < numberColumns; iColumn++) {
3511           CoinBigIndex j;
3512           char useful = 0;
3513           bool deleteSome = false;
3514           int start = columnStart[iColumn];
3515           int end = start + columnLength[iColumn];
3516 #ifndef LEAVE_FIXED
3517           if (columnUpper[iColumn] >
3518                     columnLower[iColumn] + 1.0e-12) {
3519 #endif
3520                for (j = start; j < end; j++) {
3521                     iRow = row[j];
3522                     double value = fabs(elementByColumn[j]);
3523                     if (value > 1.0e-20) {
3524                          if(usefulRow[iRow]) {
3525                               useful = 1;
3526                               largest = CoinMax(largest, fabs(elementByColumn[j]));
3527                               smallest = CoinMin(smallest, fabs(elementByColumn[j]));
3528                          }
3529                     } else {
3530                          // small
3531                          deleteSome = true;
3532                     }
3533                }
3534 #ifndef LEAVE_FIXED
3535           } else {
3536                // just check values
3537                for (j = start; j < end; j++) {
3538                     double value = fabs(elementByColumn[j]);
3539                     if (value <= 1.0e-20) {
3540                          // small
3541                          deleteSome = true;
3542                     }
3543                }
3544           }
3545 #endif
3546           usefulColumn[iColumn] = useful;
3547           if (deleteSome) {
3548                deletedElements = true;
3549                CoinBigIndex put = start;
3550                for (j = start; j < end; j++) {
3551                     double value = elementByColumn[j];
3552                     if (fabs(value) > 1.0e-20) {
3553                          row[put] = row[j];
3554                          elementByColumn[put++] = value;
3555                     }
3556                }
3557                columnLength[iColumn] = put - start;
3558           }
3559      }
3560      model->messageHandler()->message(CLP_PACKEDSCALE_INITIAL, *model->messagesPointer())
3561                << smallest << largest
3562                << CoinMessageEol;
3563      if (smallest >= 0.5 && largest <= 2.0 && !deletedElements) {
3564           // don't bother scaling
3565           model->messageHandler()->message(CLP_PACKEDSCALE_FORGET, *model->messagesPointer())
3566                     << CoinMessageEol;
3567           if (!arraysExist) {
3568                delete [] rowScale;
3569                delete [] columnScale;
3570           } else {
3571                model->setRowScale(NULL);
3572                model->setColumnScale(NULL);
3573           }
3574           delete [] usefulRow;
3575           delete [] usefulColumn;
3576           return 1;
3577      } else {
3578 #ifdef CLP_INVESTIGATE
3579           if (deletedElements)
3580                printf("DEL_ELS\n");
3581 #endif
3582           if (!rowCopyBase) {
3583                // temporary copy
3584                rowCopyBase = reverseOrderedCopy();
3585           } else if (deletedElements) {
3586                rowCopyBase = reverseOrderedCopy();
3587                model->setNewRowCopy(rowCopyBase);
3588           }
3589 #ifndef NDEBUG
3590           ClpPackedMatrix* rowCopy =
3591                dynamic_cast< ClpPackedMatrix*>(rowCopyBase);
3592           // Make sure it is really a ClpPackedMatrix
3593           assert (rowCopy != NULL);
3594 #else
3595           ClpPackedMatrix* rowCopy =
3596                static_cast< ClpPackedMatrix*>(rowCopyBase);
3597 #endif
3598 
3599           const int * column = rowCopy->getIndices();
3600           const CoinBigIndex * rowStart = rowCopy->getVectorStarts();
3601           const double * element = rowCopy->getElements();
3602           // need to scale
3603           if (largest > 1.0e13 * smallest) {
3604                // safer to have smaller zero tolerance
3605                double ratio = smallest / largest;
3606                ClpSimplex * simplex = static_cast<ClpSimplex *> (model);
3607                double newTolerance = CoinMax(ratio * 0.5, 1.0e-18);
3608                if (simplex->zeroTolerance() > newTolerance)
3609                     simplex->setZeroTolerance(newTolerance);
3610           }
3611           int scalingMethod = model->scalingFlag();
3612           if (scalingMethod == 4) {
3613                // As auto
3614                scalingMethod = 3;
3615           }
3616           // and see if there any empty rows
3617           for (iRow = 0; iRow < numberRows; iRow++) {
3618                if (usefulRow[iRow]) {
3619                     CoinBigIndex j;
3620                     int useful = 0;
3621                     for (j = rowStart[iRow]; j < rowStart[iRow+1]; j++) {
3622                          int iColumn = column[j];
3623                          if (usefulColumn[iColumn]) {
3624                               useful = 1;
3625                               break;
3626                          }
3627                     }
3628                     usefulRow[iRow] = static_cast<char>(useful);
3629                }
3630           }
3631           double savedOverallRatio = 0.0;
3632           double tolerance = 5.0 * model->primalTolerance();
3633           double overallLargest = -1.0e-20;
3634           double overallSmallest = 1.0e20;
3635           bool finished = false;
3636           // if scalingMethod 3 then may change
3637           bool extraDetails = (model->logLevel() > 2);
3638           while (!finished) {
3639                int numberPass = 3;
3640                overallLargest = -1.0e-20;
3641                overallSmallest = 1.0e20;
3642                if (!baseModel) {
3643                     ClpFillN ( rowScale, numberRows, 1.0);
3644                     ClpFillN ( columnScale, numberColumns, 1.0);
3645                } else {
3646                     // Copy scales and do quick scale on extra rows
3647                     // Then just one? pass
3648                     assert (numberColumns == baseModel->numberColumns());
3649                     int numberRows2 = baseModel->numberRows();
3650                     assert (numberRows >= numberRows2);
3651                     assert (baseModel->rowScale());
3652                     CoinMemcpyN(baseModel->rowScale(), numberRows2, rowScale);
3653                     CoinMemcpyN(baseModel->columnScale(), numberColumns, columnScale);
3654                     if (numberRows > numberRows2) {
3655                          numberPass = 1;
3656                          // do some scaling
3657                          if (scalingMethod == 1 || scalingMethod == 3) {
3658                               // Maximum in each row
3659                               for (iRow = numberRows2; iRow < numberRows; iRow++) {
3660                                    if (usefulRow[iRow]) {
3661                                         CoinBigIndex j;
3662                                         largest = 1.0e-10;
3663                                         for (j = rowStart[iRow]; j < rowStart[iRow+1]; j++) {
3664                                              int iColumn = column[j];
3665                                              if (usefulColumn[iColumn]) {
3666                                                   double value = fabs(element[j] * columnScale[iColumn]);
3667                                                   largest = CoinMax(largest, value);
3668                                                   assert (largest < 1.0e40);
3669                                              }
3670                                         }
3671                                         rowScale[iRow] = 1.0 / largest;
3672 #ifdef COIN_DEVELOP
3673                                         if (extraDetails) {
3674                                              overallLargest = CoinMax(overallLargest, largest);
3675                                              overallSmallest = CoinMin(overallSmallest, largest);
3676                                         }
3677 #endif
3678                                    }
3679                               }
3680                          } else {
3681                               overallLargest = 0.0;
3682                               overallSmallest = 1.0e50;
3683                               // Geometric mean on row scales
3684                               for (iRow = 0; iRow < numberRows; iRow++) {
3685                                    if (usefulRow[iRow]) {
3686                                         CoinBigIndex j;
3687                                         largest = 1.0e-20;
3688                                         smallest = 1.0e50;
3689                                         for (j = rowStart[iRow]; j < rowStart[iRow+1]; j++) {
3690                                              int iColumn = column[j];
3691                                              if (usefulColumn[iColumn]) {
3692                                                   double value = fabs(element[j]);
3693                                                   value *= columnScale[iColumn];
3694                                                   largest = CoinMax(largest, value);
3695                                                   smallest = CoinMin(smallest, value);
3696                                              }
3697                                         }
3698                                         if (iRow >= numberRows2) {
3699                                              rowScale[iRow] = 1.0 / sqrt(smallest * largest);
3700                                              //rowScale[iRow]=CoinMax(1.0e-10,CoinMin(1.0e10,rowScale[iRow]));
3701                                         }
3702 #ifdef COIN_DEVELOP
3703                                         if (extraDetails) {
3704                                              overallLargest = CoinMax(largest * rowScale[iRow], overallLargest);
3705                                              overallSmallest = CoinMin(smallest * rowScale[iRow], overallSmallest);
3706                                         }
3707 #endif
3708                                    }
3709                               }
3710                          }
3711                     } else {
3712                          // just use
3713                          numberPass = 0;
3714                     }
3715                }
3716                if (!baseModel && (scalingMethod == 1 || scalingMethod == 3)) {
3717                     // Maximum in each row
3718                     for (iRow = 0; iRow < numberRows; iRow++) {
3719                          if (usefulRow[iRow]) {
3720                               CoinBigIndex j;
3721                               largest = 1.0e-10;
3722                               for (j = rowStart[iRow]; j < rowStart[iRow+1]; j++) {
3723                                    int iColumn = column[j];
3724                                    if (usefulColumn[iColumn]) {
3725                                         double value = fabs(element[j]);
3726                                         largest = CoinMax(largest, value);
3727                                         assert (largest < 1.0e40);
3728                                    }
3729                               }
3730                               rowScale[iRow] = 1.0 / largest;
3731 #ifdef COIN_DEVELOP
3732                               if (extraDetails) {
3733                                    overallLargest = CoinMax(overallLargest, largest);
3734                                    overallSmallest = CoinMin(overallSmallest, largest);
3735                               }
3736 #endif
3737                          }
3738                     }
3739                } else {
3740 #ifdef USE_OBJECTIVE
3741                     // This will be used to help get scale factors
3742                     double * objective = new double[numberColumns];
3743                     CoinMemcpyN(model->costRegion(1), numberColumns, objective);
3744                     double objScale = 1.0;
3745 #endif
3746                     while (numberPass) {
3747                          overallLargest = 0.0;
3748                          overallSmallest = 1.0e50;
3749                          numberPass--;
3750                          // Geometric mean on row scales
3751                          for (iRow = 0; iRow < numberRows; iRow++) {
3752                               if (usefulRow[iRow]) {
3753                                    CoinBigIndex j;
3754                                    largest = 1.0e-20;
3755                                    smallest = 1.0e50;
3756                                    for (j = rowStart[iRow]; j < rowStart[iRow+1]; j++) {
3757                                         int iColumn = column[j];
3758                                         if (usefulColumn[iColumn]) {
3759                                              double value = fabs(element[j]);
3760                                              value *= columnScale[iColumn];
3761                                              largest = CoinMax(largest, value);
3762                                              smallest = CoinMin(smallest, value);
3763                                         }
3764                                    }
3765 
3766                                    rowScale[iRow] = 1.0 / sqrt(smallest * largest);
3767                                    //rowScale[iRow]=CoinMax(1.0e-10,CoinMin(1.0e10,rowScale[iRow]));
3768                                    if (extraDetails) {
3769                                         overallLargest = CoinMax(largest * rowScale[iRow], overallLargest);
3770                                         overallSmallest = CoinMin(smallest * rowScale[iRow], overallSmallest);
3771                                    }
3772                               }
3773                          }
3774 #ifdef USE_OBJECTIVE
3775                          largest = 1.0e-20;
3776                          smallest = 1.0e50;
3777                          for (iColumn = 0; iColumn < numberColumns; iColumn++) {
3778                               if (usefulColumn[iColumn]) {
3779                                    double value = fabs(objective[iColumn]);
3780                                    value *= columnScale[iColumn];
3781                                    largest = CoinMax(largest, value);
3782                                    smallest = CoinMin(smallest, value);
3783                               }
3784                          }
3785                          objScale = 1.0 / sqrt(smallest * largest);
3786 #endif
3787                          model->messageHandler()->message(CLP_PACKEDSCALE_WHILE, *model->messagesPointer())
3788                                    << overallSmallest
3789                                    << overallLargest
3790                                    << CoinMessageEol;
3791                          // skip last column round
3792                          if (numberPass == 1)
3793                               break;
3794                          // Geometric mean on column scales
3795                          for (iColumn = 0; iColumn < numberColumns; iColumn++) {
3796                               if (usefulColumn[iColumn]) {
3797                                    CoinBigIndex j;
3798                                    largest = 1.0e-20;
3799                                    smallest = 1.0e50;
3800                                    for (j = columnStart[iColumn];
3801                                              j < columnStart[iColumn] + columnLength[iColumn]; j++) {
3802                                         iRow = row[j];
3803                                         double value = fabs(elementByColumn[j]);
3804                                         if (usefulRow[iRow]) {
3805                                              value *= rowScale[iRow];
3806                                              largest = CoinMax(largest, value);
3807                                              smallest = CoinMin(smallest, value);
3808                                         }
3809                                    }
3810 #ifdef USE_OBJECTIVE
3811                                    if (fabs(objective[iColumn]) > 1.0e-20) {
3812                                         double value = fabs(objective[iColumn]) * objScale;
3813                                         largest = CoinMax(largest, value);
3814                                         smallest = CoinMin(smallest, value);
3815                                    }
3816 #endif
3817                                    columnScale[iColumn] = 1.0 / sqrt(smallest * largest);
3818                                    //columnScale[iColumn]=CoinMax(1.0e-10,CoinMin(1.0e10,columnScale[iColumn]));
3819                               }
3820                          }
3821                     }
3822 #ifdef USE_OBJECTIVE
3823                     delete [] objective;
3824                     printf("obj scale %g - use it if you want\n", objScale);
3825 #endif
3826                }
3827                // If ranges will make horrid then scale
3828                for (iRow = 0; iRow < numberRows; iRow++) {
3829                     if (usefulRow[iRow]) {
3830                          double difference = rowUpper[iRow] - rowLower[iRow];
3831                          double scaledDifference = difference * rowScale[iRow];
3832                          if (scaledDifference > tolerance && scaledDifference < 1.0e-4) {
3833                               // make gap larger
3834                               rowScale[iRow] *= 1.0e-4 / scaledDifference;
3835                               rowScale[iRow] = CoinMax(1.0e-10, CoinMin(1.0e10, rowScale[iRow]));
3836                               //printf("Row %d difference %g scaled diff %g => %g\n",iRow,difference,
3837                               // scaledDifference,difference*rowScale[iRow]);
3838                          }
3839                     }
3840                }
3841                // final pass to scale columns so largest is reasonable
3842                // See what smallest will be if largest is 1.0
3843                overallSmallest = 1.0e50;
3844                for (iColumn = 0; iColumn < numberColumns; iColumn++) {
3845                     if (usefulColumn[iColumn]) {
3846                          CoinBigIndex j;
3847                          largest = 1.0e-20;
3848                          smallest = 1.0e50;
3849                          for (j = columnStart[iColumn];
3850                                    j < columnStart[iColumn] + columnLength[iColumn]; j++) {
3851                               iRow = row[j];
3852                               if(elementByColumn[j] && usefulRow[iRow]) {
3853                                    double value = fabs(elementByColumn[j] * rowScale[iRow]);
3854                                    largest = CoinMax(largest, value);
3855                                    smallest = CoinMin(smallest, value);
3856                               }
3857                          }
3858                          if (overallSmallest * largest > smallest)
3859                               overallSmallest = smallest / largest;
3860                     }
3861                }
3862                if (scalingMethod == 1 || scalingMethod == 2) {
3863                     finished = true;
3864                } else if (savedOverallRatio == 0.0 && scalingMethod != 4) {
3865                     savedOverallRatio = overallSmallest;
3866                     scalingMethod = 4;
3867                } else {
3868                     assert (scalingMethod == 4);
3869                     if (overallSmallest > 2.0 * savedOverallRatio) {
3870                          finished = true; // geometric was better
3871                          if (model->scalingFlag() == 4) {
3872                               // If in Branch and bound change
3873                               if ((model->specialOptions() & 1024) != 0) {
3874                                    model->scaling(2);
3875                               }
3876                          }
3877                     } else {
3878                          scalingMethod = 1; // redo equilibrium
3879                          if (model->scalingFlag() == 4) {
3880                               // If in Branch and bound change
3881                               if ((model->specialOptions() & 1024) != 0) {
3882                                    model->scaling(1);
3883                               }
3884                          }
3885                     }
3886 #if 0
3887                     if (extraDetails) {
3888                          if (finished)
3889                               printf("equilibrium ratio %g, geometric ratio %g , geo chosen\n",
3890                                      savedOverallRatio, overallSmallest);
3891                          else
3892                               printf("equilibrium ratio %g, geometric ratio %g , equi chosen\n",
3893                                      savedOverallRatio, overallSmallest);
3894                     }
3895 #endif
3896                }
3897           }
3898           //#define RANDOMIZE
3899 #ifdef RANDOMIZE
3900           // randomize by up to 10%
3901           for (iRow = 0; iRow < numberRows; iRow++) {
3902                double value = 0.5 - randomNumberGenerator_.randomDouble(); //between -0.5 to + 0.5
3903                rowScale[iRow] *= (1.0 + 0.1 * value);
3904           }
3905 #endif
3906           overallLargest = 1.0;
3907           if (overallSmallest < 1.0e-1)
3908                overallLargest = 1.0 / sqrt(overallSmallest);
3909           overallLargest = CoinMin(100.0, overallLargest);
3910           overallSmallest = 1.0e50;
3911           //printf("scaling %d\n",model->scalingFlag());
3912           for (iColumn = 0; iColumn < numberColumns; iColumn++) {
3913                if (columnUpper[iColumn] >
3914                          columnLower[iColumn] + 1.0e-12) {
3915                     //if (usefulColumn[iColumn]) {
3916                     CoinBigIndex j;
3917                     largest = 1.0e-20;
3918                     smallest = 1.0e50;
3919                     for (j = columnStart[iColumn];
3920                               j < columnStart[iColumn] + columnLength[iColumn]; j++) {
3921                          iRow = row[j];
3922                          if(elementByColumn[j] && usefulRow[iRow]) {
3923                               double value = fabs(elementByColumn[j] * rowScale[iRow]);
3924                               largest = CoinMax(largest, value);
3925                               smallest = CoinMin(smallest, value);
3926                          }
3927                     }
3928                     columnScale[iColumn] = overallLargest / largest;
3929                     //columnScale[iColumn]=CoinMax(1.0e-10,CoinMin(1.0e10,columnScale[iColumn]));
3930 #ifdef RANDOMIZE
3931                     double value = 0.5 - randomNumberGenerator_.randomDouble(); //between -0.5 to + 0.5
3932                     columnScale[iColumn] *= (1.0 + 0.1 * value);
3933 #endif
3934                     double difference = columnUpper[iColumn] - columnLower[iColumn];
3935                     if (difference < 1.0e-5 * columnScale[iColumn]) {
3936                          // make gap larger
3937                          columnScale[iColumn] = difference / 1.0e-5;
3938                          //printf("Column %d difference %g scaled diff %g => %g\n",iColumn,difference,
3939                          // scaledDifference,difference*columnScale[iColumn]);
3940                     }
3941                     double value = smallest * columnScale[iColumn];
3942                     if (overallSmallest > value)
3943                          overallSmallest = value;
3944                     //overallSmallest = CoinMin(overallSmallest,smallest*columnScale[iColumn]);
3945                }
3946           }
3947           model->messageHandler()->message(CLP_PACKEDSCALE_FINAL, *model->messagesPointer())
3948                     << overallSmallest
3949                     << overallLargest
3950                     << CoinMessageEol;
3951           if (overallSmallest < 1.0e-13) {
3952                // Change factorization zero tolerance
3953                double newTolerance = CoinMax(1.0e-15 * (overallSmallest / 1.0e-13),
3954                                              1.0e-18);
3955                ClpSimplex * simplex = static_cast<ClpSimplex *> (model);
3956                if (simplex->factorization()->zeroTolerance() > newTolerance)
3957                     simplex->factorization()->zeroTolerance(newTolerance);
3958                newTolerance = CoinMax(overallSmallest * 0.5, 1.0e-18);
3959                simplex->setZeroTolerance(newTolerance);
3960           }
3961           delete [] usefulRow;
3962           delete [] usefulColumn;
3963 #ifndef SLIM_CLP
3964           // If quadratic then make symmetric
3965           ClpObjective * obj = model->objectiveAsObject();
3966 #ifndef NO_RTTI
3967           ClpQuadraticObjective * quadraticObj = (dynamic_cast< ClpQuadraticObjective*>(obj));
3968 #else
3969           ClpQuadraticObjective * quadraticObj = NULL;
3970           if (obj->type() == 2)
3971                quadraticObj = (static_cast< ClpQuadraticObjective*>(obj));
3972 #endif
3973           if (quadraticObj) {
3974                if (!rowCopyBase) {
3975                     // temporary copy
3976                     rowCopyBase = reverseOrderedCopy();
3977                }
3978 #ifndef NDEBUG
3979                ClpPackedMatrix* rowCopy =
3980                     dynamic_cast< ClpPackedMatrix*>(rowCopyBase);
3981                // Make sure it is really a ClpPackedMatrix
3982                assert (rowCopy != NULL);
3983 #else
3984                ClpPackedMatrix* rowCopy =
3985                     static_cast< ClpPackedMatrix*>(rowCopyBase);
3986 #endif
3987                const int * column = rowCopy->getIndices();
3988                const CoinBigIndex * rowStart = rowCopy->getVectorStarts();
3989                CoinPackedMatrix * quadratic = quadraticObj->quadraticObjective();
3990                int numberXColumns = quadratic->getNumCols();
3991                if (numberXColumns < numberColumns) {
3992                     // we assume symmetric
3993                     int numberQuadraticColumns = 0;
3994                     int i;
3995                     //const int * columnQuadratic = quadratic->getIndices();
3996                     //const int * columnQuadraticStart = quadratic->getVectorStarts();
3997                     const int * columnQuadraticLength = quadratic->getVectorLengths();
3998                     for (i = 0; i < numberXColumns; i++) {
3999                          int length = columnQuadraticLength[i];
4000 #ifndef CORRECT_COLUMN_COUNTS
4001                          length = 1;
4002 #endif
4003                          if (length)
4004                               numberQuadraticColumns++;
4005                     }
4006                     int numberXRows = numberRows - numberQuadraticColumns;
4007                     numberQuadraticColumns = 0;
4008                     for (i = 0; i < numberXColumns; i++) {
4009                          int length = columnQuadraticLength[i];
4010 #ifndef CORRECT_COLUMN_COUNTS
4011                          length = 1;
4012 #endif
4013                          if (length) {
4014                               rowScale[numberQuadraticColumns+numberXRows] = columnScale[i];
4015                               numberQuadraticColumns++;
4016                          }
4017                     }
4018                     int numberQuadraticRows = 0;
4019                     for (i = 0; i < numberXRows; i++) {
4020                          // See if any in row quadratic
4021                          CoinBigIndex j;
4022                          int numberQ = 0;
4023                          for (j = rowStart[i]; j < rowStart[i+1]; j++) {
4024                               int iColumn = column[j];
4025                               if (columnQuadraticLength[iColumn])
4026                                    numberQ++;
4027                          }
4028 #ifndef CORRECT_ROW_COUNTS
4029                          numberQ = 1;
4030 #endif
4031                          if (numberQ) {
4032                               columnScale[numberQuadraticRows+numberXColumns] = rowScale[i];
4033                               numberQuadraticRows++;
4034                          }
4035                     }
4036                     // and make sure Sj okay
4037                     for (iColumn = numberQuadraticRows + numberXColumns; iColumn < numberColumns; iColumn++) {
4038                          CoinBigIndex j = columnStart[iColumn];
4039                          assert(columnLength[iColumn] == 1);
4040                          int iRow = row[j];
4041                          double value = fabs(elementByColumn[j] * rowScale[iRow]);
4042                          columnScale[iColumn] = 1.0 / value;
4043                     }
4044                }
4045           }
4046 #endif
4047           // make copy (could do faster by using previous values)
4048           // could just do partial
4049           for (iRow = 0; iRow < numberRows; iRow++)
4050                inverseRowScale[iRow] = 1.0 / rowScale[iRow] ;
4051           for (iColumn = 0; iColumn < numberColumns; iColumn++)
4052                inverseColumnScale[iColumn] = 1.0 / columnScale[iColumn] ;
4053           if (!arraysExist) {
4054                model->setRowScale(rowScale);
4055                model->setColumnScale(columnScale);
4056           }
4057           if (model->rowCopy()) {
4058                // need to replace row by row
4059                ClpPackedMatrix* rowCopy =
4060                     static_cast< ClpPackedMatrix*>(model->rowCopy());
4061                double * element = rowCopy->getMutableElements();
4062                const int * column = rowCopy->getIndices();
4063                const CoinBigIndex * rowStart = rowCopy->getVectorStarts();
4064                // scale row copy
4065                for (iRow = 0; iRow < numberRows; iRow++) {
4066                     CoinBigIndex j;
4067                     double scale = rowScale[iRow];
4068                     double * elementsInThisRow = element + rowStart[iRow];
4069                     const int * columnsInThisRow = column + rowStart[iRow];
4070                     int number = rowStart[iRow+1] - rowStart[iRow];
4071                     assert (number <= numberColumns);
4072                     for (j = 0; j < number; j++) {
4073                          int iColumn = columnsInThisRow[j];
4074                          elementsInThisRow[j] *= scale * columnScale[iColumn];
4075                     }
4076                }
4077                if ((model->specialOptions() & 262144) != 0) {
4078                     //if ((model->specialOptions()&(COIN_CBC_USING_CLP|16384))!=0) {
4079                     //if (model->inCbcBranchAndBound()&&false) {
4080                     // copy without gaps
4081                     CoinPackedMatrix * scaledMatrix = new CoinPackedMatrix(*matrix_, 0, 0);
4082                     ClpPackedMatrix * scaled = new ClpPackedMatrix(scaledMatrix);
4083                     model->setClpScaledMatrix(scaled);
4084                     // get matrix data pointers
4085                     const int * row = scaledMatrix->getIndices();
4086                     const CoinBigIndex * columnStart = scaledMatrix->getVectorStarts();
4087 #ifndef NDEBUG
4088                     const int * columnLength = scaledMatrix->getVectorLengths();
4089 #endif
4090                     double * elementByColumn = scaledMatrix->getMutableElements();
4091                     for (iColumn = 0; iColumn < numberColumns; iColumn++) {
4092                          CoinBigIndex j;
4093                          double scale = columnScale[iColumn];
4094                          assert (columnStart[iColumn+1] == columnStart[iColumn] + columnLength[iColumn]);
4095                          for (j = columnStart[iColumn];
4096                                    j < columnStart[iColumn+1]; j++) {
4097                               int iRow = row[j];
4098                               elementByColumn[j] *= scale * rowScale[iRow];
4099                          }
4100                     }
4101                } else {
4102                     //printf("not in b&b\n");
4103                }
4104           } else {
4105                // no row copy
4106                delete rowCopyBase;
4107           }
4108           return 0;
4109      }
4110 }
4111 #endif
4112 //#define SQRT_ARRAY
4113 #ifdef SQRT_ARRAY
doSqrts(double * array,int n)4114 static void doSqrts(double *array, int n)
4115 {
4116   for (int i = 0; i < n; i++)
4117     array[i] = 1.0 / sqrt(array[i]);
4118 }
4119 #endif
4120 //static int scale_stats[5]={0,0,0,0,0};
4121 // Creates scales for column copy (rowCopy in model may be modified)
scale(ClpModel * model,ClpSimplex * simplex) const4122 int ClpPackedMatrix::scale(ClpModel *model, ClpSimplex *simplex) const
4123 {
4124   //const ClpSimplex * baseModel=NULL;
4125   //return scale2(model);
4126 #if 0
4127      ClpMatrixBase * rowClone = NULL;
4128      if (model->rowCopy())
4129           rowClone = model->rowCopy()->clone();
4130      assert (!model->rowScale());
4131      assert (!model->columnScale());
4132      int returnCode = scale2(model);
4133      if (returnCode)
4134           return returnCode;
4135 #endif
4136 #ifndef NDEBUG
4137   //checkFlags();
4138 #endif
4139   int numberRows = model->numberRows();
4140   int numberColumns = matrix_->getNumCols();
4141   model->setClpScaledMatrix(NULL); // get rid of any scaled matrix
4142   // If empty - return as sanityCheck will trap
4143   if (!numberRows || !numberColumns) {
4144     model->setRowScale(NULL);
4145     model->setColumnScale(NULL);
4146     return 1;
4147   }
4148 #if 0
4149      // start fake
4150      double * rowScale2 = CoinCopyOfArray(model->rowScale(), numberRows);
4151      double * columnScale2 = CoinCopyOfArray(model->columnScale(), numberColumns);
4152      model->setRowScale(NULL);
4153      model->setColumnScale(NULL);
4154      model->setNewRowCopy(rowClone);
4155 #endif
4156   ClpMatrixBase *rowCopyBase = model->rowCopy();
4157   double *COIN_RESTRICT rowScale;
4158   double *COIN_RESTRICT columnScale;
4159   //assert (!model->rowScale());
4160   bool arraysExist;
4161   double *COIN_RESTRICT inverseRowScale = NULL;
4162   double *COIN_RESTRICT inverseColumnScale = NULL;
4163   if (!model->rowScale()) {
4164     rowScale = new double[numberRows * 2];
4165     columnScale = new double[numberColumns * 2];
4166     inverseRowScale = rowScale + numberRows;
4167     inverseColumnScale = columnScale + numberColumns;
4168     arraysExist = false;
4169   } else {
4170     rowScale = model->mutableRowScale();
4171     columnScale = model->mutableColumnScale();
4172     inverseRowScale = model->mutableInverseRowScale();
4173     inverseColumnScale = model->mutableInverseColumnScale();
4174     arraysExist = true;
4175   }
4176   assert(inverseRowScale == rowScale + numberRows);
4177   assert(inverseColumnScale == columnScale + numberColumns);
4178   // we are going to mark bits we are interested in
4179   char *COIN_RESTRICT usefulColumn = new char[numberColumns];
4180   double *COIN_RESTRICT rowLower = model->rowLower();
4181   double *COIN_RESTRICT rowUpper = model->rowUpper();
4182   double *COIN_RESTRICT columnLower = model->columnLower();
4183   double *COIN_RESTRICT columnUpper = model->columnUpper();
4184   int iColumn, iRow;
4185   //#define LEAVE_FIXED
4186   // mark empty and fixed columns
4187   // also see if worth scaling
4188   assert(model->scalingFlag() <= 5);
4189   //  scale_stats[model->scalingFlag()]++;
4190   double largest = 0.0;
4191   double smallest = 1.0e50;
4192   // get matrix data pointers
4193   int *COIN_RESTRICT row = matrix_->getMutableIndices();
4194   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
4195   int *COIN_RESTRICT columnLength = matrix_->getMutableVectorLengths();
4196   double *COIN_RESTRICT elementByColumn = matrix_->getMutableElements();
4197   CoinBigIndex deletedElements = 0;
4198   for (iColumn = 0; iColumn < numberColumns; iColumn++) {
4199     CoinBigIndex j;
4200     char useful = 0;
4201     bool deleteSome = false;
4202     CoinBigIndex start = columnStart[iColumn];
4203     CoinBigIndex end = start + columnLength[iColumn];
4204 #ifndef LEAVE_FIXED
4205     if (columnUpper[iColumn] > columnLower[iColumn] + 1.0e-12 || (simplex && simplex->getColumnStatus(iColumn) == ClpSimplex::basic)) {
4206 #endif
4207       for (j = start; j < end; j++) {
4208         iRow = row[j];
4209         double value = fabs(elementByColumn[j]);
4210         if (value > 1.0e-20) {
4211           useful = 1;
4212           largest = CoinMax(largest, fabs(elementByColumn[j]));
4213           smallest = CoinMin(smallest, fabs(elementByColumn[j]));
4214         } else {
4215           // small
4216           deleteSome = true;
4217         }
4218       }
4219 #ifndef LEAVE_FIXED
4220     } else {
4221       // just check values
4222       for (j = start; j < end; j++) {
4223         double value = fabs(elementByColumn[j]);
4224         if (value <= 1.0e-20) {
4225           // small
4226           deleteSome = true;
4227         }
4228       }
4229     }
4230 #endif
4231     usefulColumn[iColumn] = useful;
4232     if (deleteSome) {
4233       CoinBigIndex put = start;
4234       for (j = start; j < end; j++) {
4235         double value = elementByColumn[j];
4236         if (fabs(value) > 1.0e-20) {
4237           row[put] = row[j];
4238           elementByColumn[put++] = value;
4239         }
4240       }
4241       deletedElements += end - put;
4242       columnLength[iColumn] = static_cast< int >(put - start);
4243     }
4244   }
4245   // don't scale integers if option set
4246   if ((model->specialOptions() & 4194304) != 0 && model->integerInformation()) {
4247     const char *COIN_RESTRICT integer = model->integerInformation();
4248     for (iColumn = 0; iColumn < numberColumns; iColumn++) {
4249       if (integer[iColumn])
4250         usefulColumn[iColumn] = 0;
4251     }
4252   }
4253   if (deletedElements) {
4254     matrix_->setNumElements(matrix_->getNumElements() - deletedElements);
4255     flags_ |= 0x02;
4256   }
4257   model->messageHandler()->message(CLP_PACKEDSCALE_INITIAL, *model->messagesPointer())
4258     << smallest << largest
4259     << CoinMessageEol;
4260   if (smallest * 1.0e12 < largest && simplex) {
4261     // increase tolerances
4262     simplex->setCurrentDualTolerance(CoinMax(simplex->currentDualTolerance(), 5.0e-7));
4263     simplex->setCurrentPrimalTolerance(CoinMax(simplex->currentPrimalTolerance(), 5.0e-7));
4264   }
4265   if (smallest >= 0.5 && largest <= 2.0 && !deletedElements) {
4266     // don't bother scaling
4267     model->messageHandler()->message(CLP_PACKEDSCALE_FORGET, *model->messagesPointer())
4268       << CoinMessageEol;
4269     if (!arraysExist) {
4270       delete[] rowScale;
4271       delete[] columnScale;
4272     } else {
4273       model->setRowScale(NULL);
4274       model->setColumnScale(NULL);
4275     }
4276     delete[] usefulColumn;
4277     return 1;
4278   } else {
4279 #ifdef CLP_INVESTIGATE
4280     if (deletedElements)
4281       printf("DEL_ELS\n");
4282 #endif
4283     if (!rowCopyBase) {
4284       // temporary copy
4285       rowCopyBase = reverseOrderedCopy();
4286     } else if (deletedElements) {
4287       rowCopyBase = reverseOrderedCopy();
4288       model->setNewRowCopy(rowCopyBase);
4289     }
4290 #ifndef NDEBUG
4291     ClpPackedMatrix *rowCopy = dynamic_cast< ClpPackedMatrix * >(rowCopyBase);
4292     // Make sure it is really a ClpPackedMatrix
4293     assert(rowCopy != NULL);
4294 #else
4295     ClpPackedMatrix *rowCopy = static_cast< ClpPackedMatrix * >(rowCopyBase);
4296 #endif
4297 
4298     const int *COIN_RESTRICT column = rowCopy->getIndices();
4299     const CoinBigIndex *COIN_RESTRICT rowStart = rowCopy->getVectorStarts();
4300     const double *COIN_RESTRICT element = rowCopy->getElements();
4301     // need to scale
4302     if (largest > 1.0e13 * smallest) {
4303       // safer to have smaller zero tolerance
4304       double ratio = smallest / largest;
4305       ClpSimplex *simplex = static_cast< ClpSimplex * >(model);
4306       double newTolerance = CoinMax(ratio * 0.5, 1.0e-18);
4307       if (simplex->zeroTolerance() > newTolerance)
4308         simplex->setZeroTolerance(newTolerance);
4309     }
4310     int scalingMethod = model->scalingFlag();
4311     if (scalingMethod == 4) {
4312       // As auto
4313       scalingMethod = 3;
4314     } else if (scalingMethod == 5) {
4315       // As geometric
4316       scalingMethod = 2;
4317     }
4318     double savedOverallRatio = 0.0;
4319     double tolerance = 5.0 * model->primalTolerance();
4320     double overallLargest = -1.0e-20;
4321     double overallSmallest = 1.0e20;
4322     bool finished = false;
4323     // if scalingMethod 3 then may change
4324     bool extraDetails = (model->logLevel() > 2);
4325 #if 0
4326 	  for (iColumn = 0; iColumn < numberColumns; iColumn++) {
4327 	    if (columnUpper[iColumn] >
4328 		columnLower[iColumn] + 1.0e-12 && columnLength[iColumn])
4329 	      assert(usefulColumn[iColumn]!=0);
4330 	    else
4331 	      assert(usefulColumn[iColumn]==0);
4332 	  }
4333 #endif
4334     while (!finished) {
4335       int numberPass = 3;
4336       overallLargest = -1.0e-20;
4337       overallSmallest = 1.0e20;
4338       ClpFillN(rowScale, numberRows, 1.0);
4339       ClpFillN(columnScale, numberColumns, 1.0);
4340       if (scalingMethod == 1 || scalingMethod == 3) {
4341         // Maximum in each row
4342         for (iRow = 0; iRow < numberRows; iRow++) {
4343           CoinBigIndex j;
4344           largest = 1.0e-10;
4345           for (j = rowStart[iRow]; j < rowStart[iRow + 1]; j++) {
4346             int iColumn = column[j];
4347             if (usefulColumn[iColumn]) {
4348               double value = fabs(element[j]);
4349               largest = CoinMax(largest, value);
4350               assert(largest < 1.0e40);
4351             }
4352           }
4353           rowScale[iRow] = 1.0 / largest;
4354 #ifdef COIN_DEVELOP
4355           if (extraDetails) {
4356             overallLargest = CoinMax(overallLargest, largest);
4357             overallSmallest = CoinMin(overallSmallest, largest);
4358           }
4359 #endif
4360         }
4361       } else {
4362 #ifdef USE_OBJECTIVE
4363         // This will be used to help get scale factors
4364         double *COIN_RESTRICT objective = new double[numberColumns];
4365         CoinMemcpyN(model->costRegion(1), numberColumns, objective);
4366         double objScale = 1.0;
4367 #endif
4368         while (numberPass) {
4369           overallLargest = 0.0;
4370           overallSmallest = 1.0e50;
4371           numberPass--;
4372           // Geometric mean on row scales
4373           for (iRow = 0; iRow < numberRows; iRow++) {
4374             CoinBigIndex j;
4375             largest = 1.0e-50;
4376             smallest = 1.0e50;
4377             for (j = rowStart[iRow]; j < rowStart[iRow + 1]; j++) {
4378               int iColumn = column[j];
4379               if (usefulColumn[iColumn]) {
4380                 double value = fabs(element[j]);
4381                 value *= columnScale[iColumn];
4382                 largest = CoinMax(largest, value);
4383                 smallest = CoinMin(smallest, value);
4384               }
4385             }
4386 
4387 #ifdef SQRT_ARRAY
4388             rowScale[iRow] = smallest * largest;
4389 #else
4390             rowScale[iRow] = 1.0 / sqrt(smallest * largest);
4391 #endif
4392             //rowScale[iRow]=CoinMax(1.0e-10,CoinMin(1.0e10,rowScale[iRow]));
4393             if (extraDetails) {
4394               overallLargest = CoinMax(largest * rowScale[iRow], overallLargest);
4395               overallSmallest = CoinMin(smallest * rowScale[iRow], overallSmallest);
4396             }
4397           }
4398           if (model->scalingFlag() == 5)
4399             break; // just scale rows
4400 #ifdef SQRT_ARRAY
4401           doSqrts(rowScale, numberRows);
4402 #endif
4403 #ifdef USE_OBJECTIVE
4404           largest = 1.0e-20;
4405           smallest = 1.0e50;
4406           for (iColumn = 0; iColumn < numberColumns; iColumn++) {
4407             if (usefulColumn[iColumn]) {
4408               double value = fabs(objective[iColumn]);
4409               value *= columnScale[iColumn];
4410               largest = CoinMax(largest, value);
4411               smallest = CoinMin(smallest, value);
4412             }
4413           }
4414           objScale = 1.0 / sqrt(smallest * largest);
4415 #endif
4416           model->messageHandler()->message(CLP_PACKEDSCALE_WHILE, *model->messagesPointer())
4417             << overallSmallest
4418             << overallLargest
4419             << CoinMessageEol;
4420           // skip last column round
4421           if (numberPass == 1)
4422             break;
4423           // Geometric mean on column scales
4424           for (iColumn = 0; iColumn < numberColumns; iColumn++) {
4425             if (usefulColumn[iColumn]) {
4426               CoinBigIndex j;
4427               largest = 1.0e-50;
4428               smallest = 1.0e50;
4429               for (j = columnStart[iColumn];
4430                    j < columnStart[iColumn] + columnLength[iColumn]; j++) {
4431                 iRow = row[j];
4432                 double value = fabs(elementByColumn[j]);
4433                 value *= rowScale[iRow];
4434                 largest = CoinMax(largest, value);
4435                 smallest = CoinMin(smallest, value);
4436               }
4437 #ifdef USE_OBJECTIVE
4438               if (fabs(objective[iColumn]) > 1.0e-20) {
4439                 double value = fabs(objective[iColumn]) * objScale;
4440                 largest = CoinMax(largest, value);
4441                 smallest = CoinMin(smallest, value);
4442               }
4443 #endif
4444 #ifdef SQRT_ARRAY
4445               columnScale[iColumn] = smallest * largest;
4446 #else
4447               columnScale[iColumn] = 1.0 / sqrt(smallest * largest);
4448 #endif
4449               //columnScale[iColumn]=CoinMax(1.0e-10,CoinMin(1.0e10,columnScale[iColumn]));
4450             }
4451           }
4452 #ifdef SQRT_ARRAY
4453           doSqrts(columnScale, numberColumns);
4454 #endif
4455         }
4456 #ifdef USE_OBJECTIVE
4457         delete[] objective;
4458         printf("obj scale %g - use it if you want\n", objScale);
4459 #endif
4460       }
4461       // If ranges will make horrid then scale
4462       for (iRow = 0; iRow < numberRows; iRow++) {
4463         double difference = rowUpper[iRow] - rowLower[iRow];
4464         double scaledDifference = difference * rowScale[iRow];
4465         if (scaledDifference > tolerance && scaledDifference < 1.0e-4) {
4466           // make gap larger
4467           rowScale[iRow] *= 1.0e-4 / scaledDifference;
4468           rowScale[iRow] = CoinMax(1.0e-10, CoinMin(1.0e10, rowScale[iRow]));
4469           //printf("Row %d difference %g scaled diff %g => %g\n",iRow,difference,
4470           // scaledDifference,difference*rowScale[iRow]);
4471         }
4472       }
4473       // final pass to scale columns so largest is reasonable
4474       // See what smallest will be if largest is 1.0
4475       if (model->scalingFlag() != 5) {
4476         overallSmallest = 1.0e50;
4477         for (iColumn = 0; iColumn < numberColumns; iColumn++) {
4478           if (usefulColumn[iColumn]) {
4479             CoinBigIndex j;
4480             largest = 1.0e-20;
4481             smallest = 1.0e50;
4482             for (j = columnStart[iColumn];
4483                  j < columnStart[iColumn] + columnLength[iColumn]; j++) {
4484               iRow = row[j];
4485               double value = fabs(elementByColumn[j] * rowScale[iRow]);
4486               largest = CoinMax(largest, value);
4487               smallest = CoinMin(smallest, value);
4488             }
4489             if (overallSmallest * largest > smallest)
4490               overallSmallest = smallest / largest;
4491           }
4492         }
4493       }
4494       if (scalingMethod == 1 || scalingMethod == 2) {
4495         finished = true;
4496       } else if (savedOverallRatio == 0.0 && scalingMethod != 4) {
4497         savedOverallRatio = overallSmallest;
4498         scalingMethod = 4;
4499       } else {
4500         assert(scalingMethod == 4);
4501         if (overallSmallest > 2.0 * savedOverallRatio) {
4502           finished = true; // geometric was better
4503           if (model->scalingFlag() == 4) {
4504             // If in Branch and bound change
4505             if ((model->specialOptions() & 1024) != 0) {
4506               model->scaling(2);
4507             }
4508           }
4509         } else {
4510           scalingMethod = 1; // redo equilibrium
4511           if (model->scalingFlag() == 4) {
4512             // If in Branch and bound change
4513             if ((model->specialOptions() & 1024) != 0) {
4514               model->scaling(1);
4515             }
4516           }
4517         }
4518 #if 0
4519                     if (extraDetails) {
4520                          if (finished)
4521                               printf("equilibrium ratio %g, geometric ratio %g , geo chosen\n",
4522                                      savedOverallRatio, overallSmallest);
4523                          else
4524                               printf("equilibrium ratio %g, geometric ratio %g , equi chosen\n",
4525                                      savedOverallRatio, overallSmallest);
4526                     }
4527 #endif
4528       }
4529     }
4530     //#define RANDOMIZE
4531 #ifdef RANDOMIZE
4532     // randomize by up to 10%
4533     for (iRow = 0; iRow < numberRows; iRow++) {
4534       double value = 0.5 - randomNumberGenerator_.randomDouble(); //between -0.5 to + 0.5
4535       rowScale[iRow] *= (1.0 + 0.1 * value);
4536     }
4537 #endif
4538     overallLargest = 1.0;
4539     if (overallSmallest < 1.0e-1)
4540       overallLargest = 1.0 / sqrt(overallSmallest);
4541     overallLargest = CoinMin(100.0, overallLargest);
4542     overallSmallest = 1.0e50;
4543     char *usedRow = reinterpret_cast< char * >(inverseRowScale);
4544     memset(usedRow, 0, numberRows);
4545     //printf("scaling %d\n",model->scalingFlag());
4546     if (model->scalingFlag() != 5) {
4547       for (iColumn = 0; iColumn < numberColumns; iColumn++) {
4548         if (columnUpper[iColumn] > columnLower[iColumn] + 1.0e-12) {
4549           //if (usefulColumn[iColumn]) {
4550           CoinBigIndex j;
4551           largest = 1.0e-20;
4552           smallest = 1.0e50;
4553           for (j = columnStart[iColumn];
4554                j < columnStart[iColumn] + columnLength[iColumn]; j++) {
4555             iRow = row[j];
4556             usedRow[iRow] = 1;
4557             double value = fabs(elementByColumn[j] * rowScale[iRow]);
4558             largest = CoinMax(largest, value);
4559             smallest = CoinMin(smallest, value);
4560           }
4561           columnScale[iColumn] = overallLargest / largest;
4562           //columnScale[iColumn]=CoinMax(1.0e-10,CoinMin(1.0e10,columnScale[iColumn]));
4563 #ifdef RANDOMIZE
4564           double value = 0.5 - randomNumberGenerator_.randomDouble(); //between -0.5 to + 0.5
4565           columnScale[iColumn] *= (1.0 + 0.1 * value);
4566 #endif
4567           double difference = columnUpper[iColumn] - columnLower[iColumn];
4568           if (difference < 1.0e-5 * columnScale[iColumn]) {
4569             // make gap larger
4570             columnScale[iColumn] = difference / 1.0e-5;
4571             //printf("Column %d difference %g scaled diff %g => %g\n",iColumn,difference,
4572             // scaledDifference,difference*columnScale[iColumn]);
4573           }
4574           double value = smallest * columnScale[iColumn];
4575           if (overallSmallest > value)
4576             overallSmallest = value;
4577           //overallSmallest = CoinMin(overallSmallest,smallest*columnScale[iColumn]);
4578         } else {
4579           //assert(columnScale[iColumn] == 1.0);
4580           columnScale[iColumn] = 1.0;
4581         }
4582       }
4583       for (iRow = 0; iRow < numberRows; iRow++) {
4584         if (!usedRow[iRow]) {
4585           rowScale[iRow] = 1.0;
4586         }
4587       }
4588     }
4589     model->messageHandler()->message(CLP_PACKEDSCALE_FINAL, *model->messagesPointer())
4590       << overallSmallest
4591       << overallLargest
4592       << CoinMessageEol;
4593 #if 0
4594           {
4595                for (iRow = 0; iRow < numberRows; iRow++) {
4596                     assert (rowScale[iRow] == rowScale2[iRow]);
4597                }
4598                delete [] rowScale2;
4599                for (iColumn = 0; iColumn < numberColumns; iColumn++) {
4600                     assert (columnScale[iColumn] == columnScale2[iColumn]);
4601                }
4602                delete [] columnScale2;
4603           }
4604 #endif
4605     if (overallSmallest < 1.0e-13) {
4606       // Change factorization zero tolerance
4607       double newTolerance = CoinMax(1.0e-15 * (overallSmallest / 1.0e-13),
4608         1.0e-18);
4609       ClpSimplex *simplex = static_cast< ClpSimplex * >(model);
4610       if (simplex->factorization()->zeroTolerance() > newTolerance)
4611         simplex->factorization()->zeroTolerance(newTolerance);
4612       newTolerance = CoinMax(overallSmallest * 0.5, 1.0e-18);
4613       simplex->setZeroTolerance(newTolerance);
4614     }
4615     delete[] usefulColumn;
4616 #ifndef SLIM_CLP
4617     // If quadratic then make symmetric
4618     ClpObjective *obj = model->objectiveAsObject();
4619 #ifndef NO_RTTI
4620     ClpQuadraticObjective *quadraticObj = (dynamic_cast< ClpQuadraticObjective * >(obj));
4621 #else
4622     ClpQuadraticObjective *quadraticObj = NULL;
4623     if (obj->type() == 2)
4624       quadraticObj = (static_cast< ClpQuadraticObjective * >(obj));
4625 #endif
4626     if (quadraticObj) {
4627       if (!rowCopyBase) {
4628         // temporary copy
4629         rowCopyBase = reverseOrderedCopy();
4630       }
4631 #ifndef NDEBUG
4632       ClpPackedMatrix *rowCopy = dynamic_cast< ClpPackedMatrix * >(rowCopyBase);
4633       // Make sure it is really a ClpPackedMatrix
4634       assert(rowCopy != NULL);
4635 #else
4636       ClpPackedMatrix *rowCopy = static_cast< ClpPackedMatrix * >(rowCopyBase);
4637 #endif
4638       const int *COIN_RESTRICT column = rowCopy->getIndices();
4639       const CoinBigIndex *COIN_RESTRICT rowStart = rowCopy->getVectorStarts();
4640       CoinPackedMatrix *quadratic = quadraticObj->quadraticObjective();
4641       int numberXColumns = quadratic->getNumCols();
4642       if (numberXColumns < numberColumns) {
4643         // we assume symmetric
4644         int numberQuadraticColumns = 0;
4645         int i;
4646         //const int * columnQuadratic = quadratic->getIndices();
4647         //const int * columnQuadraticStart = quadratic->getVectorStarts();
4648         const int *COIN_RESTRICT columnQuadraticLength = quadratic->getVectorLengths();
4649         for (i = 0; i < numberXColumns; i++) {
4650           int length = columnQuadraticLength[i];
4651 #ifndef CORRECT_COLUMN_COUNTS
4652           length = 1;
4653 #endif
4654           if (length)
4655             numberQuadraticColumns++;
4656         }
4657         int numberXRows = numberRows - numberQuadraticColumns;
4658         numberQuadraticColumns = 0;
4659         for (i = 0; i < numberXColumns; i++) {
4660           int length = columnQuadraticLength[i];
4661 #ifndef CORRECT_COLUMN_COUNTS
4662           length = 1;
4663 #endif
4664           if (length) {
4665             rowScale[numberQuadraticColumns + numberXRows] = columnScale[i];
4666             numberQuadraticColumns++;
4667           }
4668         }
4669         int numberQuadraticRows = 0;
4670         for (i = 0; i < numberXRows; i++) {
4671           // See if any in row quadratic
4672           CoinBigIndex j;
4673           int numberQ = 0;
4674           for (j = rowStart[i]; j < rowStart[i + 1]; j++) {
4675             int iColumn = column[j];
4676             if (columnQuadraticLength[iColumn])
4677               numberQ++;
4678           }
4679 #ifndef CORRECT_ROW_COUNTS
4680           numberQ = 1;
4681 #endif
4682           if (numberQ) {
4683             columnScale[numberQuadraticRows + numberXColumns] = rowScale[i];
4684             numberQuadraticRows++;
4685           }
4686         }
4687         // and make sure Sj okay
4688         for (iColumn = numberQuadraticRows + numberXColumns; iColumn < numberColumns; iColumn++) {
4689           CoinBigIndex j = columnStart[iColumn];
4690           assert(columnLength[iColumn] == 1);
4691           int iRow = row[j];
4692           double value = fabs(elementByColumn[j] * rowScale[iRow]);
4693           columnScale[iColumn] = 1.0 / value;
4694         }
4695       }
4696     }
4697 #endif
4698     // make copy (could do faster by using previous values)
4699     // could just do partial
4700     for (iRow = 0; iRow < numberRows; iRow++)
4701       inverseRowScale[iRow] = 1.0 / rowScale[iRow];
4702     for (iColumn = 0; iColumn < numberColumns; iColumn++)
4703       inverseColumnScale[iColumn] = 1.0 / columnScale[iColumn];
4704     if (!arraysExist) {
4705       model->setRowScale(rowScale);
4706       model->setColumnScale(columnScale);
4707     }
4708     if (model->rowCopy()) {
4709       // need to replace row by row
4710       ClpPackedMatrix *rowCopy = static_cast< ClpPackedMatrix * >(model->rowCopy());
4711       double *COIN_RESTRICT element = rowCopy->getMutableElements();
4712       const int *COIN_RESTRICT column = rowCopy->getIndices();
4713       const CoinBigIndex *COIN_RESTRICT rowStart = rowCopy->getVectorStarts();
4714       // scale row copy
4715       for (iRow = 0; iRow < numberRows; iRow++) {
4716         CoinBigIndex j;
4717         double scale = rowScale[iRow];
4718         double *COIN_RESTRICT elementsInThisRow = element + rowStart[iRow];
4719         const int *COIN_RESTRICT columnsInThisRow = column + rowStart[iRow];
4720         int number = static_cast< int >(rowStart[iRow + 1] - rowStart[iRow]);
4721         assert(number <= numberColumns);
4722         for (j = 0; j < number; j++) {
4723           int iColumn = columnsInThisRow[j];
4724           elementsInThisRow[j] *= scale * columnScale[iColumn];
4725         }
4726       }
4727       if ((model->specialOptions() & 262144) != 0) {
4728         //if ((model->specialOptions()&(COIN_CBC_USING_CLP|16384))!=0) {
4729         //if (model->inCbcBranchAndBound()&&false) {
4730         // copy without gaps
4731         CoinPackedMatrix *scaledMatrix = new CoinPackedMatrix(*matrix_, 0, 0);
4732         ClpPackedMatrix *scaled = new ClpPackedMatrix(scaledMatrix);
4733         model->setClpScaledMatrix(scaled);
4734         // get matrix data pointers
4735         const int *COIN_RESTRICT row = scaledMatrix->getIndices();
4736         const CoinBigIndex *COIN_RESTRICT columnStart = scaledMatrix->getVectorStarts();
4737 #ifndef NDEBUG
4738         const int *COIN_RESTRICT columnLength = scaledMatrix->getVectorLengths();
4739 #endif
4740         double *COIN_RESTRICT elementByColumn = scaledMatrix->getMutableElements();
4741         for (iColumn = 0; iColumn < numberColumns; iColumn++) {
4742           CoinBigIndex j;
4743           double scale = columnScale[iColumn];
4744           assert(columnStart[iColumn + 1] == columnStart[iColumn] + columnLength[iColumn]);
4745           for (j = columnStart[iColumn];
4746                j < columnStart[iColumn + 1]; j++) {
4747             int iRow = row[j];
4748             elementByColumn[j] *= scale * rowScale[iRow];
4749           }
4750         }
4751       } else {
4752         //printf("not in b&b\n");
4753       }
4754     } else {
4755       // no row copy
4756       delete rowCopyBase;
4757     }
4758     return 0;
4759   }
4760 }
4761 // Creates scaled column copy if scales exist
createScaledMatrix(ClpSimplex * model) const4762 void ClpPackedMatrix::createScaledMatrix(ClpSimplex *model) const
4763 {
4764   int numberRows = model->numberRows();
4765   int numberColumns = matrix_->getNumCols();
4766   model->setClpScaledMatrix(NULL); // get rid of any scaled matrix
4767   // If empty - return as sanityCheck will trap
4768   if (!numberRows || !numberColumns) {
4769     model->setRowScale(NULL);
4770     model->setColumnScale(NULL);
4771     return;
4772   }
4773   if (!model->rowScale())
4774     return;
4775   double *COIN_RESTRICT rowScale = model->mutableRowScale();
4776   double *COIN_RESTRICT columnScale = model->mutableColumnScale();
4777   // copy without gaps
4778   CoinPackedMatrix *scaledMatrix = new CoinPackedMatrix(*matrix_, 0, 0);
4779   ClpPackedMatrix *scaled = new ClpPackedMatrix(scaledMatrix);
4780   model->setClpScaledMatrix(scaled);
4781   // get matrix data pointers
4782   const int *COIN_RESTRICT row = scaledMatrix->getIndices();
4783   const CoinBigIndex *COIN_RESTRICT columnStart = scaledMatrix->getVectorStarts();
4784 #ifndef NDEBUG
4785   const int *COIN_RESTRICT columnLength = scaledMatrix->getVectorLengths();
4786 #endif
4787   double *COIN_RESTRICT elementByColumn = scaledMatrix->getMutableElements();
4788   for (int iColumn = 0; iColumn < numberColumns; iColumn++) {
4789     CoinBigIndex j;
4790     double scale = columnScale[iColumn];
4791     assert(columnStart[iColumn + 1] == columnStart[iColumn] + columnLength[iColumn]);
4792     for (j = columnStart[iColumn];
4793          j < columnStart[iColumn + 1]; j++) {
4794       int iRow = row[j];
4795       elementByColumn[j] *= scale * rowScale[iRow];
4796     }
4797   }
4798 #ifdef DO_CHECK_FLAGS
4799   checkFlags(0);
4800 #endif
4801 }
4802 /* Unpacks a column into an CoinIndexedvector
4803  */
unpack(const ClpSimplex * model,CoinIndexedVector * rowArray,int iColumn) const4804 void ClpPackedMatrix::unpack(const ClpSimplex *model, CoinIndexedVector *rowArray,
4805   int iColumn) const
4806 {
4807   const double *COIN_RESTRICT rowScale = model->rowScale();
4808   const int *COIN_RESTRICT row = matrix_->getIndices();
4809   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
4810   const int *COIN_RESTRICT columnLength = matrix_->getVectorLengths();
4811   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
4812   CoinBigIndex i;
4813   if (!rowScale) {
4814     for (i = columnStart[iColumn];
4815          i < columnStart[iColumn] + columnLength[iColumn]; i++) {
4816       rowArray->quickAdd(row[i], elementByColumn[i]);
4817     }
4818   } else {
4819     // apply scaling
4820     double scale = model->columnScale()[iColumn];
4821     for (i = columnStart[iColumn];
4822          i < columnStart[iColumn] + columnLength[iColumn]; i++) {
4823       int iRow = row[i];
4824       rowArray->quickAdd(iRow, elementByColumn[i] * scale * rowScale[iRow]);
4825     }
4826   }
4827 }
4828 /* Unpacks a column into a CoinIndexedVector
4829 ** in packed format
4830 Note that model is NOT const.  Bounds and objective could
4831 be modified if doing column generation (just for this variable) */
unpackPacked(ClpSimplex * model,CoinIndexedVector * rowArray,int iColumn) const4832 void ClpPackedMatrix::unpackPacked(ClpSimplex *model,
4833   CoinIndexedVector *rowArray,
4834   int iColumn) const
4835 {
4836   const double *COIN_RESTRICT rowScale = model->rowScale();
4837   const int *COIN_RESTRICT row = matrix_->getIndices();
4838   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
4839   const int *COIN_RESTRICT columnLength = matrix_->getVectorLengths();
4840   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
4841   CoinBigIndex i;
4842   int *COIN_RESTRICT index = rowArray->getIndices();
4843   double *COIN_RESTRICT array = rowArray->denseVector();
4844   int number = 0;
4845   if (!rowScale) {
4846     for (i = columnStart[iColumn];
4847          i < columnStart[iColumn] + columnLength[iColumn]; i++) {
4848       int iRow = row[i];
4849       double value = elementByColumn[i];
4850       if (value) {
4851         array[number] = value;
4852         index[number++] = iRow;
4853       }
4854     }
4855     rowArray->setNumElements(number);
4856     rowArray->setPackedMode(true);
4857   } else {
4858     // apply scaling
4859     double scale = model->columnScale()[iColumn];
4860     for (i = columnStart[iColumn];
4861          i < columnStart[iColumn] + columnLength[iColumn]; i++) {
4862       int iRow = row[i];
4863       double value = elementByColumn[i] * scale * rowScale[iRow];
4864       if (value) {
4865         array[number] = value;
4866         index[number++] = iRow;
4867       }
4868     }
4869     rowArray->setNumElements(number);
4870     rowArray->setPackedMode(true);
4871   }
4872 }
4873 /* Adds multiple of a column into an CoinIndexedvector
4874       You can use quickAdd to add to vector */
add(const ClpSimplex * model,CoinIndexedVector * rowArray,int iColumn,double multiplier) const4875 void ClpPackedMatrix::add(const ClpSimplex *model, CoinIndexedVector *rowArray,
4876   int iColumn, double multiplier) const
4877 {
4878   const double *COIN_RESTRICT rowScale = model->rowScale();
4879   const int *COIN_RESTRICT row = matrix_->getIndices();
4880   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
4881   const int *COIN_RESTRICT columnLength = matrix_->getVectorLengths();
4882   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
4883   CoinBigIndex i;
4884   if (!rowScale) {
4885     for (i = columnStart[iColumn];
4886          i < columnStart[iColumn] + columnLength[iColumn]; i++) {
4887       int iRow = row[i];
4888       rowArray->quickAdd(iRow, multiplier * elementByColumn[i]);
4889     }
4890   } else {
4891     // apply scaling
4892     double scale = model->columnScale()[iColumn] * multiplier;
4893     for (i = columnStart[iColumn];
4894          i < columnStart[iColumn] + columnLength[iColumn]; i++) {
4895       int iRow = row[i];
4896       rowArray->quickAdd(iRow, elementByColumn[i] * scale * rowScale[iRow]);
4897     }
4898   }
4899 }
4900 /* Adds multiple of a column into an array */
add(const ClpSimplex * model,double * COIN_RESTRICT array,int iColumn,double multiplier) const4901 void ClpPackedMatrix::add(const ClpSimplex *model, double *COIN_RESTRICT array,
4902   int iColumn, double multiplier) const
4903 {
4904   const double *COIN_RESTRICT rowScale = model->rowScale();
4905   const int *COIN_RESTRICT row = matrix_->getIndices();
4906   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
4907   const int *COIN_RESTRICT columnLength = matrix_->getVectorLengths();
4908   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
4909   CoinBigIndex i;
4910   if (!rowScale) {
4911     for (i = columnStart[iColumn];
4912          i < columnStart[iColumn] + columnLength[iColumn]; i++) {
4913       int iRow = row[i];
4914       array[iRow] += multiplier * elementByColumn[i];
4915     }
4916   } else {
4917     // apply scaling
4918     double scale = model->columnScale()[iColumn] * multiplier;
4919     for (i = columnStart[iColumn];
4920          i < columnStart[iColumn] + columnLength[iColumn]; i++) {
4921       int iRow = row[i];
4922       array[iRow] += elementByColumn[i] * scale * rowScale[iRow];
4923     }
4924   }
4925 }
4926 /* Checks if all elements are in valid range.  Can just
4927    return true if you are not paranoid.  For Clp I will
4928    probably expect no zeros.  Code can modify matrix to get rid of
4929    small elements.
4930    check bits (can be turned off to save time) :
4931    1 - check if matrix has gaps
4932    2 - check if zero elements
4933    4 - check and compress duplicates
4934    8 - report on large and small
4935 */
allElementsInRange(ClpModel * model,double smallest,double largest,int check)4936 bool ClpPackedMatrix::allElementsInRange(ClpModel *model,
4937   double smallest, double largest,
4938   int check)
4939 {
4940   int iColumn;
4941   // make sure matrix correct size
4942   assert(matrix_->getNumRows() <= model->numberRows());
4943   if (model->clpScaledMatrix())
4944     assert(model->clpScaledMatrix()->getNumElements() == matrix_->getNumElements());
4945   assert(matrix_->getNumRows() <= model->numberRows());
4946   matrix_->setDimensions(model->numberRows(), model->numberColumns());
4947   CoinBigIndex numberLarge = 0;
4948   ;
4949   CoinBigIndex numberSmall = 0;
4950   ;
4951   CoinBigIndex numberDuplicate = 0;
4952   ;
4953   int firstBadColumn = -1;
4954   int firstBadRow = -1;
4955   double firstBadElement = 0.0;
4956   // get matrix data pointers
4957   const int *COIN_RESTRICT row = matrix_->getIndices();
4958   const CoinBigIndex *COIN_RESTRICT columnStart = matrix_->getVectorStarts();
4959   const int *COIN_RESTRICT columnLength = matrix_->getVectorLengths();
4960   const double *COIN_RESTRICT elementByColumn = matrix_->getElements();
4961   int numberRows = model->numberRows();
4962   int numberColumns = matrix_->getNumCols();
4963   // Say no gaps
4964   flags_ &= ~2;
4965   if (type_ >= 10)
4966     return true; // gub
4967   if (check == 14 || check == 10) {
4968     if (matrix_->getNumElements() < columnStart[numberColumns]) {
4969       // pack down!
4970 #if 0
4971                matrix_->removeGaps();
4972 #else
4973       checkGaps();
4974 #ifdef DO_CHECK_FLAGS
4975       checkFlags(0);
4976 #endif
4977 #endif
4978 #ifdef COIN_DEVELOP
4979       //printf("flags set to 2\n");
4980 #endif
4981     } else if (numberColumns) {
4982       assert(columnStart[numberColumns] == columnStart[numberColumns - 1] + columnLength[numberColumns - 1]);
4983     }
4984     return true;
4985   }
4986   assert(check == 15 || check == 11);
4987   if (check == 15) {
4988     CoinBigIndex *COIN_RESTRICT mark = new CoinBigIndex[numberRows];
4989     int i;
4990     for (i = 0; i < numberRows; i++)
4991       mark[i] = -1;
4992     for (iColumn = 0; iColumn < numberColumns; iColumn++) {
4993       CoinBigIndex j;
4994       CoinBigIndex start = columnStart[iColumn];
4995       CoinBigIndex end = start + columnLength[iColumn];
4996       if (end != columnStart[iColumn + 1])
4997         flags_ |= 2;
4998       for (j = start; j < end; j++) {
4999         double value = fabs(elementByColumn[j]);
5000         int iRow = row[j];
5001         if (iRow < 0 || iRow >= numberRows) {
5002 #ifndef COIN_BIG_INDEX
5003           printf("Out of range %d %d %d %g\n", iColumn, j, row[j], elementByColumn[j]);
5004 #elif COIN_BIG_INDEX == 0
5005           printf("Out of range %d %d %d %g\n", iColumn, j, row[j], elementByColumn[j]);
5006 #elif COIN_BIG_INDEX == 1
5007           printf("Out of range %d %ld %d %g\n", iColumn, j, row[j], elementByColumn[j]);
5008 #else
5009           printf("Out of range %d %lld %d %g\n", iColumn, j, row[j], elementByColumn[j]);
5010 #endif
5011           return false;
5012         }
5013         if (mark[iRow] == -1) {
5014           mark[iRow] = j;
5015         } else {
5016           // duplicate
5017           numberDuplicate++;
5018         }
5019         //printf("%d %d %d %g\n",iColumn,j,row[j],elementByColumn[j]);
5020         if (!value)
5021           flags_ |= 1; // there are zero elements
5022         if (value < smallest) {
5023           numberSmall++;
5024         } else if (!(value <= largest)) {
5025           numberLarge++;
5026           if (firstBadColumn < 0) {
5027             firstBadColumn = iColumn;
5028             firstBadRow = row[j];
5029             firstBadElement = elementByColumn[j];
5030           }
5031         }
5032       }
5033       //clear mark
5034       for (j = columnStart[iColumn];
5035            j < columnStart[iColumn] + columnLength[iColumn]; j++) {
5036         int iRow = row[j];
5037         mark[iRow] = -1;
5038       }
5039     }
5040     delete[] mark;
5041   } else {
5042     // just check for out of range - not for duplicates
5043     for (iColumn = 0; iColumn < numberColumns; iColumn++) {
5044       CoinBigIndex j;
5045       CoinBigIndex start = columnStart[iColumn];
5046       CoinBigIndex end = start + columnLength[iColumn];
5047       if (end != columnStart[iColumn + 1])
5048         flags_ |= 2;
5049       for (j = start; j < end; j++) {
5050         double value = fabs(elementByColumn[j]);
5051         int iRow = row[j];
5052         if (iRow < 0 || iRow >= numberRows) {
5053 #ifndef COIN_BIG_INDEX
5054           printf("Out of range %d %d %d %g\n", iColumn, j, row[j], elementByColumn[j]);
5055 #elif COIN_BIG_INDEX == 0
5056           printf("Out of range %d %d %d %g\n", iColumn, j, row[j], elementByColumn[j]);
5057 #elif COIN_BIG_INDEX == 1
5058           printf("Out of range %d %ld %d %g\n", iColumn, j, row[j], elementByColumn[j]);
5059 #else
5060           printf("Out of range %d %lld %d %g\n", iColumn, j, row[j], elementByColumn[j]);
5061 #endif
5062           return false;
5063         }
5064         if (!value)
5065           flags_ |= 1; // there are zero elements
5066         if (value < smallest) {
5067           numberSmall++;
5068         } else if (!(value <= largest)) {
5069           numberLarge++;
5070           if (firstBadColumn < 0) {
5071             firstBadColumn = iColumn;
5072             firstBadRow = iRow;
5073             firstBadElement = value;
5074           }
5075         }
5076       }
5077     }
5078   }
5079   if (numberLarge) {
5080     model->messageHandler()->message(CLP_BAD_MATRIX, model->messages())
5081       << numberLarge
5082       << firstBadColumn << firstBadRow << firstBadElement
5083       << CoinMessageEol;
5084     return false;
5085   }
5086   if (numberSmall)
5087     model->messageHandler()->message(CLP_SMALLELEMENTS, model->messages())
5088       << numberSmall
5089       << CoinMessageEol;
5090   if (numberDuplicate)
5091     model->messageHandler()->message(CLP_DUPLICATEELEMENTS, model->messages())
5092       << numberDuplicate
5093       << CoinMessageEol;
5094   if (numberDuplicate)
5095     matrix_->eliminateDuplicates(smallest);
5096   else if (numberSmall)
5097     matrix_->compress(smallest);
5098   // If smallest >0.0 then there can't be zero elements
5099   if (smallest > 0.0)
5100     flags_ &= ~1;
5101   ;
5102   if (numberSmall || numberDuplicate)
5103     flags_ |= 2; // will have gaps
5104   return true;
5105 }
gutsOfTransposeTimesByRowGE3a(const CoinIndexedVector * COIN_RESTRICT piVector,int * COIN_RESTRICT index,double * COIN_RESTRICT output,int * COIN_RESTRICT lookup,char * COIN_RESTRICT marked,const double tolerance,const double scalar) const5106 int ClpPackedMatrix::gutsOfTransposeTimesByRowGE3a(const CoinIndexedVector *COIN_RESTRICT piVector,
5107   int *COIN_RESTRICT index,
5108   double *COIN_RESTRICT output,
5109   int *COIN_RESTRICT lookup,
5110   char *COIN_RESTRICT marked,
5111   const double tolerance,
5112   const double scalar) const
5113 {
5114   const double *COIN_RESTRICT pi = piVector->denseVector();
5115   int numberNonZero = 0;
5116   int numberInRowArray = piVector->getNumElements();
5117   const int *COIN_RESTRICT column = matrix_->getIndices();
5118   const CoinBigIndex *COIN_RESTRICT rowStart = matrix_->getVectorStarts();
5119   const double *COIN_RESTRICT element = matrix_->getElements();
5120   const int *COIN_RESTRICT whichRow = piVector->getIndices();
5121   int *COIN_RESTRICT fakeRow = const_cast< int * >(whichRow);
5122   fakeRow[numberInRowArray] = 0; // so can touch
5123 #ifndef NDEBUG
5124   int maxColumn = 0;
5125 #endif
5126   // ** Row copy is already scaled
5127   int nextRow = whichRow[0];
5128   CoinBigIndex nextStart = rowStart[nextRow];
5129   CoinBigIndex nextEnd = rowStart[nextRow + 1];
5130   for (int i = 0; i < numberInRowArray; i++) {
5131     double value = pi[i] * scalar;
5132     CoinBigIndex start = nextStart;
5133     CoinBigIndex end = nextEnd;
5134     nextRow = whichRow[i + 1];
5135     nextStart = rowStart[nextRow];
5136     //coin_prefetch_const(column + nextStart);
5137     //coin_prefetch_const(element + nextStart);
5138     nextEnd = rowStart[nextRow + 1];
5139     CoinBigIndex j;
5140     for (j = start; j < end; j++) {
5141       int iColumn = column[j];
5142 #ifndef NDEBUG
5143       maxColumn = CoinMax(maxColumn, iColumn);
5144 #endif
5145       double elValue = element[j];
5146       elValue *= value;
5147       if (marked[iColumn]) {
5148         int k = lookup[iColumn];
5149         output[k] += elValue;
5150       } else {
5151         output[numberNonZero] = elValue;
5152         marked[iColumn] = 1;
5153         lookup[iColumn] = numberNonZero;
5154         index[numberNonZero++] = iColumn;
5155       }
5156     }
5157   }
5158 #ifndef NDEBUG
5159   int saveN = numberNonZero;
5160 #endif
5161   // get rid of tiny values and zero out lookup
5162   for (int i = 0; i < numberNonZero; i++) {
5163     int iColumn = index[i];
5164     marked[iColumn] = 0;
5165     double value = output[i];
5166     if (fabs(value) <= tolerance) {
5167       while (fabs(value) <= tolerance) {
5168         numberNonZero--;
5169         value = output[numberNonZero];
5170         iColumn = index[numberNonZero];
5171         marked[iColumn] = 0;
5172         if (i < numberNonZero) {
5173           output[numberNonZero] = 0.0;
5174           output[i] = value;
5175           index[i] = iColumn;
5176         } else {
5177           output[i] = 0.0;
5178           value = 1.0; // to force end of while
5179         }
5180       }
5181     }
5182   }
5183 #ifndef NDEBUG
5184   for (int i = numberNonZero; i < saveN; i++)
5185     assert(!output[i]);
5186   for (int i = 0; i <= maxColumn; i++)
5187     assert(!marked[i]);
5188 #endif
5189   return numberNonZero;
5190 }
gutsOfTransposeTimesByRowGE3(const CoinIndexedVector * COIN_RESTRICT piVector,int * COIN_RESTRICT index,double * COIN_RESTRICT output,double * COIN_RESTRICT array,const double tolerance,const double scalar) const5191 int ClpPackedMatrix::gutsOfTransposeTimesByRowGE3(const CoinIndexedVector *COIN_RESTRICT piVector,
5192   int *COIN_RESTRICT index,
5193   double *COIN_RESTRICT output,
5194   double *COIN_RESTRICT array,
5195   const double tolerance,
5196   const double scalar) const
5197 {
5198   const double *COIN_RESTRICT pi = piVector->denseVector();
5199   int numberNonZero = 0;
5200   int numberInRowArray = piVector->getNumElements();
5201   const int *COIN_RESTRICT column = matrix_->getIndices();
5202   const CoinBigIndex *COIN_RESTRICT rowStart = matrix_->getVectorStarts();
5203   const double *COIN_RESTRICT element = matrix_->getElements();
5204   const int *COIN_RESTRICT whichRow = piVector->getIndices();
5205   // ** Row copy is already scaled
5206   for (int i = 0; i < numberInRowArray; i++) {
5207     int iRow = whichRow[i];
5208     double value = pi[i] * scalar;
5209     CoinBigIndex j;
5210     for (j = rowStart[iRow]; j < rowStart[iRow + 1]; j++) {
5211       int iColumn = column[j];
5212       double inValue = array[iColumn];
5213       double elValue = element[j];
5214       elValue *= value;
5215       if (inValue) {
5216         double outValue = inValue + elValue;
5217         if (!outValue)
5218           outValue = COIN_INDEXED_REALLY_TINY_ELEMENT;
5219         array[iColumn] = outValue;
5220       } else {
5221         array[iColumn] = elValue;
5222         assert(elValue);
5223         index[numberNonZero++] = iColumn;
5224       }
5225     }
5226   }
5227   int saveN = numberNonZero;
5228   // get rid of tiny values
5229   numberNonZero = 0;
5230   for (int i = 0; i < saveN; i++) {
5231     int iColumn = index[i];
5232     double value = array[iColumn];
5233     array[iColumn] = 0.0;
5234     if (fabs(value) > tolerance) {
5235       output[numberNonZero] = value;
5236       index[numberNonZero++] = iColumn;
5237     }
5238   }
5239   return numberNonZero;
5240 }
5241 /* Given positive integer weights for each row fills in sum of weights
5242    for each column (and slack).
5243    Returns weights vector
5244 */
5245 CoinBigIndex *
dubiousWeights(const ClpSimplex * model,int * inputWeights) const5246 ClpPackedMatrix::dubiousWeights(const ClpSimplex *model, int *inputWeights) const
5247 {
5248   int numberRows = model->numberRows();
5249   int numberColumns = matrix_->getNumCols();
5250   int number = numberRows + numberColumns;
5251   CoinBigIndex *weights = new CoinBigIndex[number];
5252   // get matrix data pointers
5253   const int *row = matrix_->getIndices();
5254   const CoinBigIndex *columnStart = matrix_->getVectorStarts();
5255   const int *columnLength = matrix_->getVectorLengths();
5256   int i;
5257   for (i = 0; i < numberColumns; i++) {
5258     CoinBigIndex j;
5259     CoinBigIndex count = 0;
5260     for (j = columnStart[i]; j < columnStart[i] + columnLength[i]; j++) {
5261       int iRow = row[j];
5262       count += inputWeights[iRow];
5263     }
5264     weights[i] = count;
5265   }
5266   for (i = 0; i < numberRows; i++) {
5267     weights[i + numberColumns] = inputWeights[i];
5268   }
5269   return weights;
5270 }
5271 /* Returns largest and smallest elements of both signs.
5272    Largest refers to largest absolute value.
5273 */
rangeOfElements(double & smallestNegative,double & largestNegative,double & smallestPositive,double & largestPositive)5274 void ClpPackedMatrix::rangeOfElements(double &smallestNegative, double &largestNegative,
5275   double &smallestPositive, double &largestPositive)
5276 {
5277   smallestNegative = -COIN_DBL_MAX;
5278   largestNegative = 0.0;
5279   smallestPositive = COIN_DBL_MAX;
5280   largestPositive = 0.0;
5281   // get matrix data pointers
5282   const double *elementByColumn = matrix_->getElements();
5283   const CoinBigIndex *columnStart = matrix_->getVectorStarts();
5284   const int *columnLength = matrix_->getVectorLengths();
5285   int numberColumns = matrix_->getNumCols();
5286   int i;
5287   for (i = 0; i < numberColumns; i++) {
5288     CoinBigIndex j;
5289     for (j = columnStart[i]; j < columnStart[i] + columnLength[i]; j++) {
5290       double value = elementByColumn[j];
5291       if (value > 0.0) {
5292         smallestPositive = CoinMin(smallestPositive, value);
5293         largestPositive = CoinMax(largestPositive, value);
5294       } else if (value < 0.0) {
5295         smallestNegative = CoinMax(smallestNegative, value);
5296         largestNegative = CoinMin(largestNegative, value);
5297       }
5298     }
5299   }
5300 }
5301 // Says whether it can do partial pricing
canDoPartialPricing() const5302 bool ClpPackedMatrix::canDoPartialPricing() const
5303 {
5304   return true;
5305 }
5306 // Partial pricing
partialPricing(ClpSimplex * model,double startFraction,double endFraction,int & bestSequence,int & numberWanted)5307 void ClpPackedMatrix::partialPricing(ClpSimplex *model, double startFraction, double endFraction,
5308   int &bestSequence, int &numberWanted)
5309 {
5310   numberWanted = currentWanted_;
5311   int start = static_cast< int >(startFraction * numberActiveColumns_);
5312   int end = CoinMin(static_cast< int >(endFraction * numberActiveColumns_ + 1), numberActiveColumns_);
5313   const double *COIN_RESTRICT element = matrix_->getElements();
5314   const int *COIN_RESTRICT row = matrix_->getIndices();
5315   const CoinBigIndex *COIN_RESTRICT startColumn = matrix_->getVectorStarts();
5316   const int *COIN_RESTRICT length = matrix_->getVectorLengths();
5317   const double *COIN_RESTRICT rowScale = model->rowScale();
5318   const double *COIN_RESTRICT columnScale = model->columnScale();
5319   int iSequence;
5320   CoinBigIndex j;
5321   double tolerance = model->currentDualTolerance();
5322   double *COIN_RESTRICT reducedCost = model->djRegion();
5323   const double *COIN_RESTRICT duals = model->dualRowSolution();
5324   const double *COIN_RESTRICT cost = model->costRegion();
5325   double bestDj;
5326   if (bestSequence >= 0)
5327     bestDj = fabs(model->clpMatrix()->reducedCost(model, bestSequence));
5328   else
5329     bestDj = tolerance;
5330   int sequenceOut = model->sequenceOut();
5331   int saveSequence = bestSequence;
5332   int lastScan = minimumObjectsScan_ < 0 ? end : start + minimumObjectsScan_;
5333   int minNeg = minimumGoodReducedCosts_ == -1 ? numberWanted : minimumGoodReducedCosts_;
5334   if (rowScale) {
5335     // scaled
5336     for (iSequence = start; iSequence < end; iSequence++) {
5337       if (iSequence != sequenceOut) {
5338         double value;
5339         ClpSimplex::Status status = model->getStatus(iSequence);
5340 
5341         switch (status) {
5342 
5343         case ClpSimplex::basic:
5344         case ClpSimplex::isFixed:
5345           break;
5346         case ClpSimplex::isFree:
5347         case ClpSimplex::superBasic:
5348           value = 0.0;
5349           // scaled
5350           for (j = startColumn[iSequence];
5351                j < startColumn[iSequence] + length[iSequence]; j++) {
5352             int jRow = row[j];
5353             value -= duals[jRow] * element[j] * rowScale[jRow];
5354           }
5355           value = fabs(cost[iSequence] + value * columnScale[iSequence]);
5356           if (value > FREE_ACCEPT * tolerance) {
5357             numberWanted--;
5358             // we are going to bias towards free (but only if reasonable)
5359             value *= FREE_BIAS;
5360             if (value > bestDj) {
5361               // check flagged variable and correct dj
5362               if (!model->flagged(iSequence)) {
5363                 bestDj = value;
5364                 bestSequence = iSequence;
5365               } else {
5366                 // just to make sure we don't exit before got something
5367                 numberWanted++;
5368               }
5369             }
5370           }
5371           break;
5372         case ClpSimplex::atUpperBound:
5373           value = 0.0;
5374           // scaled
5375           for (j = startColumn[iSequence];
5376                j < startColumn[iSequence] + length[iSequence]; j++) {
5377             int jRow = row[j];
5378             value -= duals[jRow] * element[j] * rowScale[jRow];
5379           }
5380           value = cost[iSequence] + value * columnScale[iSequence];
5381           if (value > tolerance) {
5382             numberWanted--;
5383             if (value > bestDj) {
5384               // check flagged variable and correct dj
5385               if (!model->flagged(iSequence)) {
5386                 bestDj = value;
5387                 bestSequence = iSequence;
5388               } else {
5389                 // just to make sure we don't exit before got something
5390                 numberWanted++;
5391               }
5392             }
5393           }
5394           break;
5395         case ClpSimplex::atLowerBound:
5396           value = 0.0;
5397           // scaled
5398           for (j = startColumn[iSequence];
5399                j < startColumn[iSequence] + length[iSequence]; j++) {
5400             int jRow = row[j];
5401             value -= duals[jRow] * element[j] * rowScale[jRow];
5402           }
5403           value = -(cost[iSequence] + value * columnScale[iSequence]);
5404           if (value > tolerance) {
5405             numberWanted--;
5406             if (value > bestDj) {
5407               // check flagged variable and correct dj
5408               if (!model->flagged(iSequence)) {
5409                 bestDj = value;
5410                 bestSequence = iSequence;
5411               } else {
5412                 // just to make sure we don't exit before got something
5413                 numberWanted++;
5414               }
5415             }
5416           }
5417           break;
5418         }
5419       }
5420       if (numberWanted + minNeg < originalWanted_ && iSequence > lastScan) {
5421         // give up
5422         break;
5423       }
5424       if (!numberWanted)
5425         break;
5426     }
5427     if (bestSequence != saveSequence) {
5428       // recompute dj
5429       double value = 0.0;
5430       // scaled
5431       for (j = startColumn[bestSequence];
5432            j < startColumn[bestSequence] + length[bestSequence]; j++) {
5433         int jRow = row[j];
5434         value -= duals[jRow] * element[j] * rowScale[jRow];
5435       }
5436       reducedCost[bestSequence] = cost[bestSequence] + value * columnScale[bestSequence];
5437       savedBestSequence_ = bestSequence;
5438       savedBestDj_ = reducedCost[savedBestSequence_];
5439     }
5440   } else {
5441     // not scaled
5442     for (iSequence = start; iSequence < end; iSequence++) {
5443       if (iSequence != sequenceOut) {
5444         double value;
5445         ClpSimplex::Status status = model->getStatus(iSequence);
5446 
5447         switch (status) {
5448 
5449         case ClpSimplex::basic:
5450         case ClpSimplex::isFixed:
5451           break;
5452         case ClpSimplex::isFree:
5453         case ClpSimplex::superBasic:
5454           value = cost[iSequence];
5455           for (j = startColumn[iSequence];
5456                j < startColumn[iSequence] + length[iSequence]; j++) {
5457             int jRow = row[j];
5458             value -= duals[jRow] * element[j];
5459           }
5460           value = fabs(value);
5461           if (value > FREE_ACCEPT * tolerance) {
5462             numberWanted--;
5463             // we are going to bias towards free (but only if reasonable)
5464             value *= FREE_BIAS;
5465             if (value > bestDj) {
5466               // check flagged variable and correct dj
5467               if (!model->flagged(iSequence)) {
5468                 bestDj = value;
5469                 bestSequence = iSequence;
5470               } else {
5471                 // just to make sure we don't exit before got something
5472                 numberWanted++;
5473               }
5474             }
5475           }
5476           break;
5477         case ClpSimplex::atUpperBound:
5478           value = cost[iSequence];
5479           // scaled
5480           for (j = startColumn[iSequence];
5481                j < startColumn[iSequence] + length[iSequence]; j++) {
5482             int jRow = row[j];
5483             value -= duals[jRow] * element[j];
5484           }
5485           if (value > tolerance) {
5486             numberWanted--;
5487             if (value > bestDj) {
5488               // check flagged variable and correct dj
5489               if (!model->flagged(iSequence)) {
5490                 bestDj = value;
5491                 bestSequence = iSequence;
5492               } else {
5493                 // just to make sure we don't exit before got something
5494                 numberWanted++;
5495               }
5496             }
5497           }
5498           break;
5499         case ClpSimplex::atLowerBound:
5500           value = cost[iSequence];
5501           for (j = startColumn[iSequence];
5502                j < startColumn[iSequence] + length[iSequence]; j++) {
5503             int jRow = row[j];
5504             value -= duals[jRow] * element[j];
5505           }
5506           value = -value;
5507           if (value > tolerance) {
5508             numberWanted--;
5509             if (value > bestDj) {
5510               // check flagged variable and correct dj
5511               if (!model->flagged(iSequence)) {
5512                 bestDj = value;
5513                 bestSequence = iSequence;
5514               } else {
5515                 // just to make sure we don't exit before got something
5516                 numberWanted++;
5517               }
5518             }
5519           }
5520           break;
5521         }
5522       }
5523       if (numberWanted + minNeg < originalWanted_ && iSequence > lastScan) {
5524         // give up
5525         break;
5526       }
5527       if (!numberWanted)
5528         break;
5529     }
5530     if (bestSequence != saveSequence) {
5531       // recompute dj
5532       double value = cost[bestSequence];
5533       for (j = startColumn[bestSequence];
5534            j < startColumn[bestSequence] + length[bestSequence]; j++) {
5535         int jRow = row[j];
5536         value -= duals[jRow] * element[j];
5537       }
5538       reducedCost[bestSequence] = value;
5539       savedBestSequence_ = bestSequence;
5540       savedBestDj_ = reducedCost[savedBestSequence_];
5541     }
5542   }
5543   currentWanted_ = numberWanted;
5544 }
5545 // Sets up an effective RHS
useEffectiveRhs(ClpSimplex * model)5546 void ClpPackedMatrix::useEffectiveRhs(ClpSimplex *model)
5547 {
5548   delete[] rhsOffset_;
5549   int numberRows = model->numberRows();
5550   rhsOffset_ = new double[numberRows];
5551   rhsOffset(model, true);
5552 }
5553 // Gets rid of special copies
clearCopies()5554 void ClpPackedMatrix::clearCopies()
5555 {
5556   delete rowCopy_;
5557   delete columnCopy_;
5558   rowCopy_ = NULL;
5559   columnCopy_ = NULL;
5560   flags_ &= ~(4 + 8);
5561   checkGaps();
5562 #ifdef DO_CHECK_FLAGS
5563   checkFlags(0);
5564 #endif
5565 }
5566 // makes sure active columns correct
refresh(ClpSimplex *)5567 int ClpPackedMatrix::refresh(ClpSimplex *)
5568 {
5569   numberActiveColumns_ = matrix_->getNumCols();
5570 #if 0
5571      ClpMatrixBase * rowCopyBase = reverseOrderedCopy();
5572      ClpPackedMatrix* rowCopy =
5573           dynamic_cast< ClpPackedMatrix*>(rowCopyBase);
5574      // Make sure it is really a ClpPackedMatrix
5575      assert (rowCopy != NULL);
5576 
5577      const int * column = rowCopy->matrix_->getIndices();
5578      const CoinBigIndex * rowStart = rowCopy->matrix_->getVectorStarts();
5579      const int * rowLength = rowCopy->matrix_->getVectorLengths();
5580      const double * element = rowCopy->matrix_->getElements();
5581      int numberRows = rowCopy->matrix_->getNumRows();
5582      for (int i = 0; i < numberRows; i++) {
5583           if (!rowLength[i])
5584                printf("zero row %d\n", i);
5585      }
5586      delete rowCopy;
5587 #endif
5588   checkGaps();
5589 #ifdef DO_CHECK_FLAGS
5590   checkFlags(0);
5591 #endif
5592   return 0;
5593 }
5594 
5595 /* Scales rowCopy if column copy scaled
5596    Only called if scales already exist */
scaleRowCopy(ClpModel * model) const5597 void ClpPackedMatrix::scaleRowCopy(ClpModel *model) const
5598 {
5599   if (model->rowCopy()) {
5600     // need to replace row by row
5601     int numberRows = model->numberRows();
5602 #ifndef NDEBUG
5603     int numberColumns = matrix_->getNumCols();
5604 #endif
5605     ClpMatrixBase *rowCopyBase = model->rowCopy();
5606 #ifndef NDEBUG
5607     ClpPackedMatrix *rowCopy = dynamic_cast< ClpPackedMatrix * >(rowCopyBase);
5608     // Make sure it is really a ClpPackedMatrix
5609     assert(rowCopy != NULL);
5610 #else
5611     ClpPackedMatrix *rowCopy = static_cast< ClpPackedMatrix * >(rowCopyBase);
5612 #endif
5613 
5614     const int *COIN_RESTRICT column = rowCopy->getIndices();
5615     const CoinBigIndex *COIN_RESTRICT rowStart = rowCopy->getVectorStarts();
5616     double *COIN_RESTRICT element = rowCopy->getMutableElements();
5617     const double *COIN_RESTRICT rowScale = model->rowScale();
5618     const double *COIN_RESTRICT columnScale = model->columnScale();
5619     // scale row copy
5620     for (int iRow = 0; iRow < numberRows; iRow++) {
5621       CoinBigIndex j;
5622       double scale = rowScale[iRow];
5623       double *COIN_RESTRICT elementsInThisRow = element + rowStart[iRow];
5624       const int *COIN_RESTRICT columnsInThisRow = column + rowStart[iRow];
5625       int number = static_cast< int >(rowStart[iRow + 1] - rowStart[iRow]);
5626       assert(number <= numberColumns);
5627       for (j = 0; j < number; j++) {
5628         int iColumn = columnsInThisRow[j];
5629         elementsInThisRow[j] *= scale * columnScale[iColumn];
5630       }
5631     }
5632   }
5633 }
5634 /* Realy really scales column copy
5635    Only called if scales already exist.
5636    Up to user ro delete */
5637 ClpMatrixBase *
scaledColumnCopy(ClpModel * model) const5638 ClpPackedMatrix::scaledColumnCopy(ClpModel *model) const
5639 {
5640   // need to replace column by column
5641 #ifndef NDEBUG
5642   int numberRows = model->numberRows();
5643 #endif
5644   int numberColumns = matrix_->getNumCols();
5645   ClpPackedMatrix *copy = new ClpPackedMatrix(*this);
5646   const int *COIN_RESTRICT row = copy->getIndices();
5647   const CoinBigIndex *COIN_RESTRICT columnStart = copy->getVectorStarts();
5648   const int *COIN_RESTRICT length = copy->getVectorLengths();
5649   double *COIN_RESTRICT element = copy->getMutableElements();
5650   const double *COIN_RESTRICT rowScale = model->rowScale();
5651   const double *COIN_RESTRICT columnScale = model->columnScale();
5652   // scale column copy
5653   for (int iColumn = 0; iColumn < numberColumns; iColumn++) {
5654     CoinBigIndex j;
5655     double scale = columnScale[iColumn];
5656     double *COIN_RESTRICT elementsInThisColumn = element + columnStart[iColumn];
5657     const int *COIN_RESTRICT rowsInThisColumn = row + columnStart[iColumn];
5658     int number = length[iColumn];
5659     assert(number <= numberRows);
5660     for (j = 0; j < number; j++) {
5661       int iRow = rowsInThisColumn[j];
5662       elementsInThisColumn[j] *= scale * rowScale[iRow];
5663     }
5664   }
5665   return copy;
5666 }
5667 // Really scale matrix
reallyScale(const double * rowScale,const double * columnScale)5668 void ClpPackedMatrix::reallyScale(const double *rowScale, const double *columnScale)
5669 {
5670   clearCopies();
5671   int numberColumns = matrix_->getNumCols();
5672   const int *row = matrix_->getIndices();
5673   const CoinBigIndex *columnStart = matrix_->getVectorStarts();
5674   const int *length = matrix_->getVectorLengths();
5675   double *element = matrix_->getMutableElements();
5676   // scale
5677   for (int iColumn = 0; iColumn < numberColumns; iColumn++) {
5678     CoinBigIndex j;
5679     double scale = columnScale[iColumn];
5680     for (j = columnStart[iColumn]; j < columnStart[iColumn] + length[iColumn]; j++) {
5681       int iRow = row[j];
5682       element[j] *= scale * rowScale[iRow];
5683     }
5684   }
5685 }
5686 /* Delete the columns whose indices are listed in <code>indDel</code>. */
deleteCols(const int numDel,const int * indDel)5687 void ClpPackedMatrix::deleteCols(const int numDel, const int *indDel)
5688 {
5689   if (matrix_->getNumCols())
5690     matrix_->deleteCols(numDel, indDel);
5691   clearCopies();
5692   numberActiveColumns_ = matrix_->getNumCols();
5693   // may now have gaps
5694   checkGaps();
5695 #ifdef DO_CHECK_FLAGS
5696   checkFlags(0);
5697 #endif
5698   matrix_->setExtraGap(0.0);
5699 }
5700 /* Delete the rows whose indices are listed in <code>indDel</code>. */
deleteRows(const int numDel,const int * indDel)5701 void ClpPackedMatrix::deleteRows(const int numDel, const int *indDel)
5702 {
5703   if (matrix_->getNumRows())
5704     matrix_->deleteRows(numDel, indDel);
5705   clearCopies();
5706   numberActiveColumns_ = matrix_->getNumCols();
5707   // may now have gaps
5708   checkGaps();
5709 #ifdef DO_CHECK_FLAGS
5710   checkFlags(0);
5711 #endif
5712   matrix_->setExtraGap(0.0);
5713 }
5714 #ifndef CLP_NO_VECTOR
5715 // Append Columns
appendCols(int number,const CoinPackedVectorBase * const * columns)5716 void ClpPackedMatrix::appendCols(int number, const CoinPackedVectorBase *const *columns)
5717 {
5718   matrix_->appendCols(number, columns);
5719   numberActiveColumns_ = matrix_->getNumCols();
5720   clearCopies();
5721 }
5722 // Append Rows
appendRows(int number,const CoinPackedVectorBase * const * rows)5723 void ClpPackedMatrix::appendRows(int number, const CoinPackedVectorBase *const *rows)
5724 {
5725   matrix_->appendRows(number, rows);
5726   numberActiveColumns_ = matrix_->getNumCols();
5727   // may now have gaps
5728   checkGaps();
5729 #ifdef DO_CHECK_FLAGS
5730   checkFlags(0);
5731 #endif
5732   clearCopies();
5733 }
5734 #endif
5735 /* Set the dimensions of the matrix. In effect, append new empty
5736    columns/rows to the matrix. A negative number for either dimension
5737    means that that dimension doesn't change. Otherwise the new dimensions
5738    MUST be at least as large as the current ones otherwise an exception
5739    is thrown. */
setDimensions(int numrows,int numcols)5740 void ClpPackedMatrix::setDimensions(int numrows, int numcols)
5741 {
5742   matrix_->setDimensions(numrows, numcols);
5743 #ifdef DO_CHECK_FLAGS
5744   checkFlags(0);
5745 #endif
5746 }
5747 /* Append a set of rows/columns to the end of the matrix. Returns number of errors
5748    i.e. if any of the new rows/columns contain an index that's larger than the
5749    number of columns-1/rows-1 (if numberOther>0) or duplicates
5750    If 0 then rows, 1 if columns */
appendMatrix(int number,int type,const CoinBigIndex * starts,const int * index,const double * element,int numberOther)5751 int ClpPackedMatrix::appendMatrix(int number, int type,
5752   const CoinBigIndex *starts, const int *index,
5753   const double *element, int numberOther)
5754 {
5755   int numberErrors = 0;
5756   // make sure other dimension is big enough
5757   if (type == 0) {
5758     // rows
5759     if (matrix_->isColOrdered() && numberOther > matrix_->getNumCols())
5760       matrix_->setDimensions(-1, numberOther);
5761     if (!matrix_->isColOrdered() || numberOther >= 0 || matrix_->getExtraGap()) {
5762       numberErrors = matrix_->appendRows(number, starts, index, element, numberOther);
5763     } else {
5764       //CoinPackedMatrix mm(*matrix_);
5765       matrix_->appendMinorFast(number, starts, index, element);
5766       //mm.appendRows(number,starts,index,element,numberOther);
5767       //if (!mm.isEquivalent(*matrix_)) {
5768       //printf("bad append\n");
5769       //abort();
5770       //}
5771     }
5772   } else {
5773     // columns
5774     if (!matrix_->isColOrdered() && numberOther > matrix_->getNumRows())
5775       matrix_->setDimensions(numberOther, -1);
5776     if (element)
5777       numberErrors = matrix_->appendCols(number, starts, index, element, numberOther);
5778     else
5779       matrix_->setDimensions(-1, matrix_->getNumCols() + number); // resize
5780   }
5781   clearCopies();
5782   numberActiveColumns_ = matrix_->getNumCols();
5783   return numberErrors;
5784 }
specialRowCopy(ClpSimplex * model,const ClpMatrixBase * rowCopy)5785 void ClpPackedMatrix::specialRowCopy(ClpSimplex *model, const ClpMatrixBase *rowCopy)
5786 {
5787   delete rowCopy_;
5788   rowCopy_ = new ClpPackedMatrix2(model, rowCopy->getPackedMatrix());
5789   // See if anything in it
5790   if (!rowCopy_->usefulInfo()) {
5791     delete rowCopy_;
5792     rowCopy_ = NULL;
5793     flags_ &= ~4;
5794   } else {
5795     flags_ |= 4;
5796   }
5797 }
specialColumnCopy(ClpSimplex * model)5798 void ClpPackedMatrix::specialColumnCopy(ClpSimplex *model)
5799 {
5800   delete columnCopy_;
5801   if (model->vectorMode() == 1) {
5802     flags_ |= 16;
5803     // go to exact devex (unless full steepest)
5804     ClpPrimalColumnSteepest *pricing = dynamic_cast< ClpPrimalColumnSteepest * >(model->primalColumnPivot());
5805     if (pricing && pricing->mode() > 1)
5806       pricing->setMode(0);
5807   }
5808   if ((flags_ & 16) != 0 && model->numberRows() > 200 && model->numberColumns() > 500) {
5809     columnCopy_ = new ClpPackedMatrix3(model, matrix_);
5810     flags_ |= 8;
5811   } else {
5812     columnCopy_ = NULL;
5813   }
5814 }
5815 // Say we don't want special column copy
releaseSpecialColumnCopy()5816 void ClpPackedMatrix::releaseSpecialColumnCopy()
5817 {
5818   flags_ &= ~(8 + 16);
5819   delete columnCopy_;
5820   columnCopy_ = NULL;
5821 }
5822 // Correct sequence in and out to give true value
correctSequence(const ClpSimplex * model,int & sequenceIn,int & sequenceOut)5823 void ClpPackedMatrix::correctSequence(const ClpSimplex *model, int &sequenceIn, int &sequenceOut)
5824 {
5825   if (columnCopy_) {
5826     if (sequenceIn != -999) {
5827       columnCopy_->swapOne(model, this, sequenceIn);
5828       if (sequenceIn != sequenceOut)
5829         columnCopy_->swapOne(model, this, sequenceOut);
5830     } else {
5831       // do all
5832       columnCopy_->sortBlocks(model);
5833     }
5834 #ifndef NDEBUG
5835     columnCopy_->checkBlocks(model);
5836 #endif
5837   }
5838 }
5839 // Check validity
checkFlags(int type) const5840 void ClpPackedMatrix::checkFlags(int type) const
5841 {
5842   int iColumn;
5843   // get matrix data pointers
5844   //const int * row = matrix_->getIndices();
5845   const CoinBigIndex *columnStart = matrix_->getVectorStarts();
5846   const int *columnLength = matrix_->getVectorLengths();
5847   const double *elementByColumn = matrix_->getElements();
5848   if (!zeros()) {
5849     for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
5850       CoinBigIndex j;
5851       for (j = columnStart[iColumn]; j < columnStart[iColumn] + columnLength[iColumn]; j++) {
5852         if (!elementByColumn[j])
5853           abort();
5854       }
5855     }
5856   }
5857   if ((flags_ & 2) == 0) {
5858     for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
5859       if (columnStart[iColumn + 1] != columnStart[iColumn] + columnLength[iColumn]) {
5860         abort();
5861       }
5862     }
5863   }
5864   if (type) {
5865     if ((flags_ & 2) != 0) {
5866       bool ok = true;
5867       for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
5868         if (columnStart[iColumn + 1] != columnStart[iColumn] + columnLength[iColumn]) {
5869           ok = false;
5870           break;
5871         }
5872       }
5873       if (ok)
5874         COIN_DETAIL_PRINT(printf("flags_ could be 0\n"));
5875     }
5876   }
5877 }
5878 //#############################################################################
5879 // Constructors / Destructor / Assignment
5880 //#############################################################################
5881 
5882 //-------------------------------------------------------------------
5883 // Default Constructor
5884 //-------------------------------------------------------------------
ClpPackedMatrix2()5885 ClpPackedMatrix2::ClpPackedMatrix2()
5886   : numberBlocks_(0)
5887   , numberRows_(0)
5888   , offset_(NULL)
5889   , count_(NULL)
5890   , rowStart_(NULL)
5891   , column_(NULL)
5892   , work_(NULL)
5893 {
5894 #ifdef THREAD
5895   threadId_ = NULL;
5896   info_ = NULL;
5897 #endif
5898 }
5899 //-------------------------------------------------------------------
5900 // Useful Constructor
5901 //-------------------------------------------------------------------
ClpPackedMatrix2(ClpSimplex *,const CoinPackedMatrix * rowCopy)5902 ClpPackedMatrix2::ClpPackedMatrix2(ClpSimplex *, const CoinPackedMatrix *rowCopy)
5903   : numberBlocks_(0)
5904   , numberRows_(0)
5905   , offset_(NULL)
5906   , count_(NULL)
5907   , rowStart_(NULL)
5908   , column_(NULL)
5909   , work_(NULL)
5910 {
5911 #ifdef THREAD
5912   threadId_ = NULL;
5913   info_ = NULL;
5914 #endif
5915   numberRows_ = rowCopy->getNumRows();
5916   if (!numberRows_)
5917     return;
5918   int numberColumns = rowCopy->getNumCols();
5919   const int *column = rowCopy->getIndices();
5920   const CoinBigIndex *rowStart = rowCopy->getVectorStarts();
5921   const int *length = rowCopy->getVectorLengths();
5922   const double *element = rowCopy->getElements();
5923   int chunk = 32768; // tune
5924   //chunk=100;
5925   // tune
5926 #if 0
5927      int chunkY[7] = {1024, 2048, 4096, 8192, 16384, 32768, 65535};
5928      int its = model->maximumIterations();
5929      if (its >= 1000000 && its < 1000999) {
5930           its -= 1000000;
5931           its = its / 10;
5932           if (its >= 7) abort();
5933           chunk = chunkY[its];
5934           printf("chunk size %d\n", chunk);
5935 #define cpuid(func, ax, bx, cx, dx)                             \
5936   __asm__ __volatile__("cpuid"                                  \
5937                        : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
5938                        : "a"(func));
5939           unsigned int a, b, c, d;
5940           int func = 0;
5941           cpuid(func, a, b, c, d);
5942           {
5943                int i;
5944                unsigned int value;
5945                value = b;
5946                for (i = 0; i < 4; i++) {
5947                     printf("%c", (value & 0xff));
5948                     value = value >> 8;
5949                }
5950                value = d;
5951                for (i = 0; i < 4; i++) {
5952                     printf("%c", (value & 0xff));
5953                     value = value >> 8;
5954                }
5955                value = c;
5956                for (i = 0; i < 4; i++) {
5957                     printf("%c", (value & 0xff));
5958                     value = value >> 8;
5959                }
5960                printf("\n");
5961                int maxfunc = a;
5962                if (maxfunc > 10) {
5963                     printf("not intel?\n");
5964                     abort();
5965                }
5966                for (func = 1; func <= maxfunc; func++) {
5967                     cpuid(func, a, b, c, d);
5968                     printf("func %d, %x %x %x %x\n", func, a, b, c, d);
5969                }
5970           }
5971 #else
5972   if (numberColumns > 10000 || chunk == 100) {
5973 #endif
5974 }
5975 else
5976 {
5977   //printf("no chunk\n");
5978   return;
5979 }
5980 // Could also analyze matrix to get natural breaks
5981 numberBlocks_ = (numberColumns + chunk - 1) / chunk;
5982 #ifdef THREAD
5983 // Get work areas
5984 threadId_ = new pthread_t[numberBlocks_];
5985 info_ = new dualColumn0Struct[numberBlocks_];
5986 #endif
5987 // Even out
5988 chunk = (numberColumns + numberBlocks_ - 1) / numberBlocks_;
5989 offset_ = new int[numberBlocks_ + 1];
5990 offset_[numberBlocks_] = numberColumns;
5991 int nRow = numberBlocks_ * numberRows_;
5992 count_ = new unsigned short[nRow];
5993 memset(count_, 0, nRow * sizeof(unsigned short));
5994 rowStart_ = new CoinBigIndex[nRow + numberRows_ + 1];
5995 CoinBigIndex nElement = rowStart[numberRows_];
5996 rowStart_[nRow + numberRows_] = nElement;
5997 column_ = new unsigned short[nElement];
5998 // assumes int <= double
5999 int sizeWork = 6 * numberBlocks_;
6000 work_ = new double[sizeWork];
6001 ;
6002 int iBlock;
6003 int nZero = 0;
6004 for (iBlock = 0; iBlock < numberBlocks_; iBlock++) {
6005   int start = iBlock * chunk;
6006   offset_[iBlock] = start;
6007   int end = start + chunk;
6008   for (int iRow = 0; iRow < numberRows_; iRow++) {
6009     if (rowStart[iRow + 1] != rowStart[iRow] + length[iRow]) {
6010       printf("not packed correctly - gaps\n");
6011       abort();
6012     }
6013     bool lastFound = false;
6014     int nFound = 0;
6015     for (CoinBigIndex j = rowStart[iRow];
6016          j < rowStart[iRow] + length[iRow]; j++) {
6017       int iColumn = column[j];
6018       if (iColumn >= start) {
6019         if (iColumn < end) {
6020           if (!element[j]) {
6021             printf("not packed correctly - zero element\n");
6022             abort();
6023           }
6024           column_[j] = static_cast< unsigned short >(iColumn - start);
6025           nFound++;
6026           if (lastFound) {
6027             printf("not packed correctly - out of order\n");
6028             abort();
6029           }
6030         } else {
6031           //can't find any more
6032           lastFound = true;
6033         }
6034       }
6035     }
6036     count_[iRow * numberBlocks_ + iBlock] = static_cast< unsigned short >(nFound);
6037     if (!nFound)
6038       nZero++;
6039   }
6040 }
6041 //double fraction = ((double) nZero)/((double) (numberBlocks_*numberRows_));
6042 //printf("%d empty blocks, %g%%\n",nZero,100.0*fraction);
6043 }
6044 
6045 //-------------------------------------------------------------------
6046 // Copy constructor
6047 //-------------------------------------------------------------------
6048 ClpPackedMatrix2::ClpPackedMatrix2(const ClpPackedMatrix2 &rhs)
6049   : numberBlocks_(rhs.numberBlocks_)
6050   , numberRows_(rhs.numberRows_)
6051 {
6052   if (numberBlocks_) {
6053     offset_ = CoinCopyOfArray(rhs.offset_, numberBlocks_ + 1);
6054     int nRow = numberBlocks_ * numberRows_;
6055     count_ = CoinCopyOfArray(rhs.count_, nRow);
6056     rowStart_ = CoinCopyOfArray(rhs.rowStart_, nRow + numberRows_ + 1);
6057     CoinBigIndex nElement = rowStart_[nRow + numberRows_];
6058     column_ = CoinCopyOfArray(rhs.column_, nElement);
6059     int sizeWork = 6 * numberBlocks_;
6060     work_ = CoinCopyOfArray(rhs.work_, sizeWork);
6061 #ifdef THREAD
6062     threadId_ = new pthread_t[numberBlocks_];
6063     info_ = new dualColumn0Struct[numberBlocks_];
6064 #endif
6065   } else {
6066     offset_ = NULL;
6067     count_ = NULL;
6068     rowStart_ = NULL;
6069     column_ = NULL;
6070     work_ = NULL;
6071 #ifdef THREAD
6072     threadId_ = NULL;
6073     info_ = NULL;
6074 #endif
6075   }
6076 }
6077 //-------------------------------------------------------------------
6078 // Destructor
6079 //-------------------------------------------------------------------
6080 ClpPackedMatrix2::~ClpPackedMatrix2()
6081 {
6082   delete[] offset_;
6083   delete[] count_;
6084   delete[] rowStart_;
6085   delete[] column_;
6086   delete[] work_;
6087 #ifdef THREAD
6088   delete[] threadId_;
6089   delete[] info_;
6090 #endif
6091 }
6092 
6093 //----------------------------------------------------------------
6094 // Assignment operator
6095 //-------------------------------------------------------------------
6096 ClpPackedMatrix2 &
6097 ClpPackedMatrix2::operator=(const ClpPackedMatrix2 &rhs)
6098 {
6099   if (this != &rhs) {
6100     numberBlocks_ = rhs.numberBlocks_;
6101     numberRows_ = rhs.numberRows_;
6102     delete[] offset_;
6103     delete[] count_;
6104     delete[] rowStart_;
6105     delete[] column_;
6106     delete[] work_;
6107 #ifdef THREAD
6108     delete[] threadId_;
6109     delete[] info_;
6110 #endif
6111     if (numberBlocks_) {
6112       offset_ = CoinCopyOfArray(rhs.offset_, numberBlocks_ + 1);
6113       int nRow = numberBlocks_ * numberRows_;
6114       count_ = CoinCopyOfArray(rhs.count_, nRow);
6115       rowStart_ = CoinCopyOfArray(rhs.rowStart_, nRow + numberRows_ + 1);
6116       CoinBigIndex nElement = rowStart_[nRow + numberRows_];
6117       column_ = CoinCopyOfArray(rhs.column_, nElement);
6118       int sizeWork = 6 * numberBlocks_;
6119       work_ = CoinCopyOfArray(rhs.work_, sizeWork);
6120 #ifdef THREAD
6121       threadId_ = new pthread_t[numberBlocks_];
6122       info_ = new dualColumn0Struct[numberBlocks_];
6123 #endif
6124     } else {
6125       offset_ = NULL;
6126       count_ = NULL;
6127       rowStart_ = NULL;
6128       column_ = NULL;
6129       work_ = NULL;
6130 #ifdef THREAD
6131       threadId_ = NULL;
6132       info_ = NULL;
6133 #endif
6134     }
6135   }
6136   return *this;
6137 }
6138 static int dualColumn0(const ClpSimplex *model, double *spare,
6139   int *spareIndex, const double *arrayTemp,
6140   const int *indexTemp, int numberIn,
6141   int offset, double acceptablePivot,
6142   double *upperThetaPtr, int *posFreePtr, double *freePivotPtr)
6143 {
6144   // do dualColumn0
6145   int i;
6146   int numberRemaining = 0;
6147   double upperTheta = 1.0e31;
6148   double freePivot = acceptablePivot;
6149   int posFree = -1;
6150   const double *reducedCost = model->djRegion(1);
6151   double dualTolerance = model->dualTolerance();
6152   // We can also see if infeasible or pivoting on free
6153   double tentativeTheta = 1.0e25;
6154   for (i = 0; i < numberIn; i++) {
6155     double alpha = arrayTemp[i];
6156     int iSequence = indexTemp[i] + offset;
6157     double oldValue;
6158     double value;
6159     bool keep;
6160 
6161     switch (model->getStatus(iSequence)) {
6162 
6163     case ClpSimplex::basic:
6164     case ClpSimplex::isFixed:
6165       break;
6166     case ClpSimplex::isFree:
6167     case ClpSimplex::superBasic:
6168       oldValue = reducedCost[iSequence];
6169       // If free has to be very large - should come in via dualRow
6170       if (model->getStatus(iSequence) == ClpSimplex::isFree && fabs(alpha) < 1.0e-3)
6171         break;
6172       if (oldValue > dualTolerance) {
6173         keep = true;
6174       } else if (oldValue < -dualTolerance) {
6175         keep = true;
6176       } else {
6177         if (fabs(alpha) > CoinMax(10.0 * acceptablePivot, 1.0e-5))
6178           keep = true;
6179         else
6180           keep = false;
6181       }
6182       if (keep) {
6183         // free - choose largest
6184         if (fabs(alpha) > freePivot) {
6185           freePivot = fabs(alpha);
6186           posFree = i;
6187         }
6188       }
6189       break;
6190     case ClpSimplex::atUpperBound:
6191       oldValue = reducedCost[iSequence];
6192       value = oldValue - tentativeTheta * alpha;
6193       //assert (oldValue<=dualTolerance*1.0001);
6194       if (value > dualTolerance) {
6195         value = oldValue - upperTheta * alpha;
6196         if (value > dualTolerance && -alpha >= acceptablePivot)
6197           upperTheta = (oldValue - dualTolerance) / alpha;
6198         // add to list
6199         spare[numberRemaining] = alpha;
6200         spareIndex[numberRemaining++] = iSequence;
6201       }
6202       break;
6203     case ClpSimplex::atLowerBound:
6204       oldValue = reducedCost[iSequence];
6205       value = oldValue - tentativeTheta * alpha;
6206       //assert (oldValue>=-dualTolerance*1.0001);
6207       if (value < -dualTolerance) {
6208         value = oldValue - upperTheta * alpha;
6209         if (value < -dualTolerance && alpha >= acceptablePivot)
6210           upperTheta = (oldValue + dualTolerance) / alpha;
6211         // add to list
6212         spare[numberRemaining] = alpha;
6213         spareIndex[numberRemaining++] = iSequence;
6214       }
6215       break;
6216     }
6217   }
6218   *upperThetaPtr = upperTheta;
6219   *freePivotPtr = freePivot;
6220   *posFreePtr = posFree;
6221   return numberRemaining;
6222 }
6223 static int doOneBlock(double *array, int *index,
6224   const double *pi, const CoinBigIndex *rowStart, const double *element,
6225   const unsigned short *column, int numberInRowArray, int numberLook)
6226 {
6227   int iWhich = 0;
6228   CoinBigIndex nextN = 0;
6229   CoinBigIndex nextStart = 0;
6230   double nextPi = 0.0;
6231   for (; iWhich < numberInRowArray; iWhich++) {
6232     nextStart = rowStart[0];
6233     nextN = rowStart[numberInRowArray] - nextStart;
6234     rowStart++;
6235     if (nextN) {
6236       nextPi = pi[iWhich];
6237       break;
6238     }
6239   }
6240   int i;
6241   while (iWhich < numberInRowArray) {
6242     double value = nextPi;
6243     CoinBigIndex j = nextStart;
6244     CoinBigIndex n = nextN;
6245     // get next
6246     iWhich++;
6247     for (; iWhich < numberInRowArray; iWhich++) {
6248       nextStart = rowStart[0];
6249       nextN = rowStart[numberInRowArray] - nextStart;
6250       rowStart++;
6251       if (nextN) {
6252         //coin_prefetch_const(element + nextStart);
6253         nextPi = pi[iWhich];
6254         break;
6255       }
6256     }
6257     CoinBigIndex end = j + n;
6258     //coin_prefetch_const(element+rowStart_[i+1]);
6259     //coin_prefetch_const(column_+rowStart_[i+1]);
6260     if (n < 100) {
6261       if ((n & 1) != 0) {
6262         unsigned int jColumn = column[j];
6263         array[jColumn] -= value * element[j];
6264         j++;
6265       }
6266       //coin_prefetch_const(column + nextStart);
6267       for (; j < end; j += 2) {
6268         unsigned int jColumn0 = column[j];
6269         double value0 = value * element[j];
6270         unsigned int jColumn1 = column[j + 1];
6271         double value1 = value * element[j + 1];
6272         array[jColumn0] -= value0;
6273         array[jColumn1] -= value1;
6274       }
6275     } else {
6276       if ((n & 1) != 0) {
6277         unsigned int jColumn = column[j];
6278         array[jColumn] -= value * element[j];
6279         j++;
6280       }
6281       if ((n & 2) != 0) {
6282         unsigned int jColumn0 = column[j];
6283         double value0 = value * element[j];
6284         unsigned int jColumn1 = column[j + 1];
6285         double value1 = value * element[j + 1];
6286         array[jColumn0] -= value0;
6287         array[jColumn1] -= value1;
6288         j += 2;
6289       }
6290       if ((n & 4) != 0) {
6291         unsigned int jColumn0 = column[j];
6292         double value0 = value * element[j];
6293         unsigned int jColumn1 = column[j + 1];
6294         double value1 = value * element[j + 1];
6295         unsigned int jColumn2 = column[j + 2];
6296         double value2 = value * element[j + 2];
6297         unsigned int jColumn3 = column[j + 3];
6298         double value3 = value * element[j + 3];
6299         array[jColumn0] -= value0;
6300         array[jColumn1] -= value1;
6301         array[jColumn2] -= value2;
6302         array[jColumn3] -= value3;
6303         j += 4;
6304       }
6305       //coin_prefetch_const(column+nextStart);
6306       for (; j < end; j += 8) {
6307         //coin_prefetch_const(element + j + 16);
6308         unsigned int jColumn0 = column[j];
6309         double value0 = value * element[j];
6310         unsigned int jColumn1 = column[j + 1];
6311         double value1 = value * element[j + 1];
6312         unsigned int jColumn2 = column[j + 2];
6313         double value2 = value * element[j + 2];
6314         unsigned int jColumn3 = column[j + 3];
6315         double value3 = value * element[j + 3];
6316         array[jColumn0] -= value0;
6317         array[jColumn1] -= value1;
6318         array[jColumn2] -= value2;
6319         array[jColumn3] -= value3;
6320         //coin_prefetch_const(column + j + 16);
6321         jColumn0 = column[j + 4];
6322         value0 = value * element[j + 4];
6323         jColumn1 = column[j + 5];
6324         value1 = value * element[j + 5];
6325         jColumn2 = column[j + 6];
6326         value2 = value * element[j + 6];
6327         jColumn3 = column[j + 7];
6328         value3 = value * element[j + 7];
6329         array[jColumn0] -= value0;
6330         array[jColumn1] -= value1;
6331         array[jColumn2] -= value2;
6332         array[jColumn3] -= value3;
6333       }
6334     }
6335   }
6336   // get rid of tiny values
6337   int nSmall = numberLook;
6338   int numberNonZero = 0;
6339   for (i = 0; i < nSmall; i++) {
6340     double value = array[i];
6341     array[i] = 0.0;
6342     if (fabs(value) > 1.0e-12) {
6343       array[numberNonZero] = value;
6344       index[numberNonZero++] = i;
6345     }
6346   }
6347   for (; i < numberLook; i += 4) {
6348     double value0 = array[i + 0];
6349     double value1 = array[i + 1];
6350     double value2 = array[i + 2];
6351     double value3 = array[i + 3];
6352     array[i + 0] = 0.0;
6353     array[i + 1] = 0.0;
6354     array[i + 2] = 0.0;
6355     array[i + 3] = 0.0;
6356     if (fabs(value0) > 1.0e-12) {
6357       array[numberNonZero] = value0;
6358       index[numberNonZero++] = i + 0;
6359     }
6360     if (fabs(value1) > 1.0e-12) {
6361       array[numberNonZero] = value1;
6362       index[numberNonZero++] = i + 1;
6363     }
6364     if (fabs(value2) > 1.0e-12) {
6365       array[numberNonZero] = value2;
6366       index[numberNonZero++] = i + 2;
6367     }
6368     if (fabs(value3) > 1.0e-12) {
6369       array[numberNonZero] = value3;
6370       index[numberNonZero++] = i + 3;
6371     }
6372   }
6373   return numberNonZero;
6374 }
6375 #ifdef THREAD
6376 static void *doOneBlockThread(void *voidInfo)
6377 {
6378   dualColumn0Struct *info = (dualColumn0Struct *)voidInfo;
6379   *(info->numberInPtr) = doOneBlock(info->arrayTemp, info->indexTemp, info->pi,
6380     info->rowStart, info->element, info->column,
6381     info->numberInRowArray, info->numberLook);
6382   return NULL;
6383 }
6384 static void *doOneBlockAnd0Thread(void *voidInfo)
6385 {
6386   dualColumn0Struct *info = (dualColumn0Struct *)voidInfo;
6387   *(info->numberInPtr) = doOneBlock(info->arrayTemp, info->indexTemp, info->pi,
6388     info->rowStart, info->element, info->column,
6389     info->numberInRowArray, info->numberLook);
6390   *(info->numberOutPtr) = dualColumn0(info->model, info->spare,
6391     info->spareIndex, (const double *)info->arrayTemp,
6392     (const int *)info->indexTemp, *(info->numberInPtr),
6393     info->offset, info->acceptablePivot,
6394     info->upperThetaPtr, info->posFreePtr, info->freePivotPtr);
6395   return NULL;
6396 }
6397 #endif
6398 /* Return <code>x * scalar * A in <code>z</code>.
6399    Note - x packed and z will be packed mode
6400    Squashes small elements and knows about ClpSimplex */
6401 void ClpPackedMatrix2::transposeTimes(const ClpSimplex *model,
6402   const CoinPackedMatrix *rowCopy,
6403   const CoinIndexedVector *rowArray,
6404   CoinIndexedVector *spareArray,
6405   CoinIndexedVector *columnArray) const
6406 {
6407   // See if dualColumn0 coding wanted
6408   bool dualColumn = model->spareIntArray_[0] == 1;
6409   double acceptablePivot = model->spareDoubleArray_[0];
6410   double upperTheta = 1.0e31;
6411   double freePivot = acceptablePivot;
6412   int posFree = -1;
6413   int numberRemaining = 0;
6414   //if (model->numberIterations()>=200000) {
6415   //printf("time %g\n",CoinCpuTime()-startTime);
6416   //exit(77);
6417   //}
6418   double *pi = rowArray->denseVector();
6419   int numberNonZero = 0;
6420   int *index = columnArray->getIndices();
6421   double *array = columnArray->denseVector();
6422   int numberInRowArray = rowArray->getNumElements();
6423   const int *whichRow = rowArray->getIndices();
6424   double *element = const_cast< double * >(rowCopy->getElements());
6425   const CoinBigIndex *rowStart = rowCopy->getVectorStarts();
6426   int i;
6427   CoinBigIndex *rowStart2 = rowStart_;
6428   if (!dualColumn) {
6429     for (i = 0; i < numberInRowArray; i++) {
6430       int iRow = whichRow[i];
6431       CoinBigIndex start = rowStart[iRow];
6432       *rowStart2 = start;
6433       unsigned short *count1 = count_ + iRow * numberBlocks_;
6434       int put = 0;
6435       for (int j = 0; j < numberBlocks_; j++) {
6436         put += numberInRowArray;
6437         start += count1[j];
6438         rowStart2[put] = start;
6439       }
6440       rowStart2++;
6441     }
6442   } else {
6443     // also do dualColumn stuff
6444     double *spare = spareArray->denseVector();
6445     int *spareIndex = spareArray->getIndices();
6446     const double *reducedCost = model->djRegion(0);
6447     double dualTolerance = model->dualTolerance();
6448     // We can also see if infeasible or pivoting on free
6449     double tentativeTheta = 1.0e25;
6450     int addSequence = model->numberColumns();
6451     for (i = 0; i < numberInRowArray; i++) {
6452       int iRow = whichRow[i];
6453       double alpha = pi[i];
6454       double oldValue;
6455       double value;
6456       bool keep;
6457 
6458       switch (model->getStatus(iRow + addSequence)) {
6459 
6460       case ClpSimplex::basic:
6461       case ClpSimplex::isFixed:
6462         break;
6463       case ClpSimplex::isFree:
6464       case ClpSimplex::superBasic:
6465         oldValue = reducedCost[iRow];
6466         // If free has to be very large - should come in via dualRow
6467         if (model->getStatus(iRow + addSequence) == ClpSimplex::isFree && fabs(alpha) < 1.0e-3)
6468           break;
6469         if (oldValue > dualTolerance) {
6470           keep = true;
6471         } else if (oldValue < -dualTolerance) {
6472           keep = true;
6473         } else {
6474           if (fabs(alpha) > CoinMax(10.0 * acceptablePivot, 1.0e-5))
6475             keep = true;
6476           else
6477             keep = false;
6478         }
6479         if (keep) {
6480           // free - choose largest
6481           if (fabs(alpha) > freePivot) {
6482             freePivot = fabs(alpha);
6483             posFree = i + addSequence;
6484           }
6485         }
6486         break;
6487       case ClpSimplex::atUpperBound:
6488         oldValue = reducedCost[iRow];
6489         value = oldValue - tentativeTheta * alpha;
6490         //assert (oldValue<=dualTolerance*1.0001);
6491         if (value > dualTolerance) {
6492           value = oldValue - upperTheta * alpha;
6493           if (value > dualTolerance && -alpha >= acceptablePivot)
6494             upperTheta = (oldValue - dualTolerance) / alpha;
6495           // add to list
6496           spare[numberRemaining] = alpha;
6497           spareIndex[numberRemaining++] = iRow + addSequence;
6498         }
6499         break;
6500       case ClpSimplex::atLowerBound:
6501         oldValue = reducedCost[iRow];
6502         value = oldValue - tentativeTheta * alpha;
6503         //assert (oldValue>=-dualTolerance*1.0001);
6504         if (value < -dualTolerance) {
6505           value = oldValue - upperTheta * alpha;
6506           if (value < -dualTolerance && alpha >= acceptablePivot)
6507             upperTheta = (oldValue + dualTolerance) / alpha;
6508           // add to list
6509           spare[numberRemaining] = alpha;
6510           spareIndex[numberRemaining++] = iRow + addSequence;
6511         }
6512         break;
6513       }
6514       CoinBigIndex start = rowStart[iRow];
6515       *rowStart2 = start;
6516       unsigned short *count1 = count_ + iRow * numberBlocks_;
6517       int put = 0;
6518       for (int j = 0; j < numberBlocks_; j++) {
6519         put += numberInRowArray;
6520         start += count1[j];
6521         rowStart2[put] = start;
6522       }
6523       rowStart2++;
6524     }
6525   }
6526   double *spare = spareArray->denseVector();
6527   int *spareIndex = spareArray->getIndices();
6528   int saveNumberRemaining = numberRemaining;
6529   int iBlock;
6530   for (iBlock = 0; iBlock < numberBlocks_; iBlock++) {
6531     double *dwork = work_ + 6 * iBlock;
6532     int *iwork = reinterpret_cast< int * >(dwork + 3);
6533     if (!dualColumn) {
6534 #ifndef THREAD
6535       int offset = offset_[iBlock];
6536       int offset3 = offset;
6537       offset = numberNonZero;
6538       double *arrayTemp = array + offset;
6539       int *indexTemp = index + offset;
6540       iwork[0] = doOneBlock(arrayTemp, indexTemp, pi, rowStart_ + numberInRowArray * iBlock,
6541         element, column_, numberInRowArray, offset_[iBlock + 1] - offset);
6542       int number = iwork[0];
6543       for (i = 0; i < number; i++) {
6544         //double value = arrayTemp[i];
6545         //arrayTemp[i]=0.0;
6546         //array[numberNonZero]=value;
6547         index[numberNonZero++] = indexTemp[i] + offset3;
6548       }
6549 #else
6550       int offset = offset_[iBlock];
6551       double *arrayTemp = array + offset;
6552       int *indexTemp = index + offset;
6553       dualColumn0Struct *infoPtr = info_ + iBlock;
6554       infoPtr->arrayTemp = arrayTemp;
6555       infoPtr->indexTemp = indexTemp;
6556       infoPtr->numberInPtr = &iwork[0];
6557       infoPtr->pi = pi;
6558       infoPtr->rowStart = rowStart_ + numberInRowArray * iBlock;
6559       infoPtr->element = element;
6560       infoPtr->column = column_;
6561       infoPtr->numberInRowArray = numberInRowArray;
6562       infoPtr->numberLook = offset_[iBlock + 1] - offset;
6563       pthread_create(&threadId_[iBlock], NULL, doOneBlockThread, infoPtr);
6564 #endif
6565     } else {
6566 #ifndef THREAD
6567       int offset = offset_[iBlock];
6568       // allow for already saved
6569       int offset2 = offset + saveNumberRemaining;
6570       int offset3 = offset;
6571       offset = numberNonZero;
6572       offset2 = numberRemaining;
6573       double *arrayTemp = array + offset;
6574       int *indexTemp = index + offset;
6575       iwork[0] = doOneBlock(arrayTemp, indexTemp, pi, rowStart_ + numberInRowArray * iBlock,
6576         element, column_, numberInRowArray, offset_[iBlock + 1] - offset);
6577       iwork[1] = dualColumn0(model, spare + offset2,
6578         spareIndex + offset2,
6579         arrayTemp, indexTemp,
6580         iwork[0], offset3, acceptablePivot,
6581         &dwork[1], &iwork[2],
6582         &dwork[2]);
6583       int number = iwork[0];
6584       int numberLook = iwork[1];
6585 #if 1
6586       numberRemaining += numberLook;
6587 #else
6588       double *spareTemp = spare + offset2;
6589       const int *spareIndexTemp = spareIndex + offset2;
6590       for (i = 0; i < numberLook; i++) {
6591         double value = spareTemp[i];
6592         spareTemp[i] = 0.0;
6593         spare[numberRemaining] = value;
6594         spareIndex[numberRemaining++] = spareIndexTemp[i];
6595       }
6596 #endif
6597       if (dwork[2] > freePivot) {
6598         freePivot = dwork[2];
6599         posFree = iwork[2] + numberNonZero;
6600       }
6601       upperTheta = CoinMin(dwork[1], upperTheta);
6602       for (i = 0; i < number; i++) {
6603         // double value = arrayTemp[i];
6604         //arrayTemp[i]=0.0;
6605         //array[numberNonZero]=value;
6606         index[numberNonZero++] = indexTemp[i] + offset3;
6607       }
6608 #else
6609       int offset = offset_[iBlock];
6610       // allow for already saved
6611       int offset2 = offset + saveNumberRemaining;
6612       double *arrayTemp = array + offset;
6613       int *indexTemp = index + offset;
6614       dualColumn0Struct *infoPtr = info_ + iBlock;
6615       infoPtr->model = model;
6616       infoPtr->spare = spare + offset2;
6617       infoPtr->spareIndex = spareIndex + offset2;
6618       infoPtr->arrayTemp = arrayTemp;
6619       infoPtr->indexTemp = indexTemp;
6620       infoPtr->numberInPtr = &iwork[0];
6621       infoPtr->offset = offset;
6622       infoPtr->acceptablePivot = acceptablePivot;
6623       infoPtr->upperThetaPtr = &dwork[1];
6624       infoPtr->posFreePtr = &iwork[2];
6625       infoPtr->freePivotPtr = &dwork[2];
6626       infoPtr->numberOutPtr = &iwork[1];
6627       infoPtr->pi = pi;
6628       infoPtr->rowStart = rowStart_ + numberInRowArray * iBlock;
6629       infoPtr->element = element;
6630       infoPtr->column = column_;
6631       infoPtr->numberInRowArray = numberInRowArray;
6632       infoPtr->numberLook = offset_[iBlock + 1] - offset;
6633       if (iBlock >= 2)
6634         pthread_join(threadId_[iBlock - 2], NULL);
6635       pthread_create(threadId_ + iBlock, NULL, doOneBlockAnd0Thread, infoPtr);
6636       //pthread_join(threadId_[iBlock],NULL);
6637 #endif
6638     }
6639   }
6640   for (iBlock = CoinMax(0, numberBlocks_ - 2); iBlock < numberBlocks_; iBlock++) {
6641 #ifdef THREAD
6642     pthread_join(threadId_[iBlock], NULL);
6643 #endif
6644   }
6645 #ifdef THREAD
6646   for (iBlock = 0; iBlock < numberBlocks_; iBlock++) {
6647     //pthread_join(threadId_[iBlock],NULL);
6648     int offset = offset_[iBlock];
6649     double *dwork = work_ + 6 * iBlock;
6650     int *iwork = (int *)(dwork + 3);
6651     int number = iwork[0];
6652     if (dualColumn) {
6653       // allow for already saved
6654       int offset2 = offset + saveNumberRemaining;
6655       int numberLook = iwork[1];
6656       double *spareTemp = spare + offset2;
6657       const int *spareIndexTemp = spareIndex + offset2;
6658       for (i = 0; i < numberLook; i++) {
6659         double value = spareTemp[i];
6660         spareTemp[i] = 0.0;
6661         spare[numberRemaining] = value;
6662         spareIndex[numberRemaining++] = spareIndexTemp[i];
6663       }
6664       if (dwork[2] > freePivot) {
6665         freePivot = dwork[2];
6666         posFree = iwork[2] + numberNonZero;
6667       }
6668       upperTheta = CoinMin(dwork[1], upperTheta);
6669     }
6670     double *arrayTemp = array + offset;
6671     const int *indexTemp = index + offset;
6672     for (i = 0; i < number; i++) {
6673       double value = arrayTemp[i];
6674       arrayTemp[i] = 0.0;
6675       array[numberNonZero] = value;
6676       index[numberNonZero++] = indexTemp[i] + offset;
6677     }
6678   }
6679 #endif
6680   columnArray->setNumElements(numberNonZero);
6681   columnArray->setPackedMode(true);
6682   if (dualColumn) {
6683     model->spareDoubleArray_[0] = upperTheta;
6684     // and theta and alpha and sequence
6685     if (posFree < 0) {
6686       model->spareIntArray_[1] = -1;
6687     } else {
6688       const double *reducedCost = model->djRegion(0);
6689       double alpha;
6690       int numberColumns = model->numberColumns();
6691       if (posFree < numberColumns) {
6692         alpha = columnArray->denseVector()[posFree];
6693         posFree = columnArray->getIndices()[posFree];
6694       } else {
6695         alpha = rowArray->denseVector()[posFree - numberColumns];
6696         posFree = rowArray->getIndices()[posFree - numberColumns] + numberColumns;
6697       }
6698       model->spareDoubleArray_[2] = fabs(reducedCost[posFree] / alpha);
6699       ;
6700       model->spareDoubleArray_[3] = alpha;
6701       model->spareIntArray_[1] = posFree;
6702     }
6703     spareArray->setNumElements(numberRemaining);
6704     // signal done
6705     model->spareIntArray_[0] = -1;
6706   }
6707 }
6708 #define ALIGNMENT 32
6709 static void *clp_align(void *memory)
6710 {
6711   if (sizeof(int) == sizeof(void *) && ALIGNMENT) {
6712     CoinInt64 k = reinterpret_cast< CoinInt64 >(memory);
6713     if ((k & (ALIGNMENT - 1)) != 0) {
6714       k &= ~(ALIGNMENT - 1);
6715       k += ALIGNMENT;
6716       memory = reinterpret_cast< void * >(k);
6717     }
6718     return memory;
6719   } else {
6720     CoinInt64 k = reinterpret_cast< CoinInt64 >(memory);
6721     if ((k & (ALIGNMENT - 1)) != 0) {
6722       k &= ~(ALIGNMENT - 1);
6723       k += ALIGNMENT;
6724       memory = reinterpret_cast< void * >(k);
6725     }
6726     return memory;
6727   }
6728 }
6729 /* Default constructor. */
6730 ClpPackedMatrix3::ClpPackedMatrix3()
6731   : numberBlocks_(0)
6732   , numberColumns_(0)
6733   , numberColumnsWithGaps_(0)
6734   ,
6735 #if ABOCA_LITE
6736   numberChunks_(0)
6737   ,
6738 #endif
6739   numberElements_(0)
6740   , maxBlockSize_(0)
6741   , column_(NULL)
6742   , start_(NULL)
6743   , row_(NULL)
6744   , element_(NULL)
6745   , temporary_(NULL)
6746   , block_(NULL)
6747   , ifActive_(0)
6748 {
6749 }
6750 #ifdef _MSC_VER
6751 #include <intrin.h>
6752 #elif defined(__arm__)
6753 #include <arm_neon.h>
6754 #else
6755 //#include <immintrin.h>
6756 //#include <fmaintrin.h>
6757 #endif
6758 /* Constructor from copy. */
6759 ClpPackedMatrix3::ClpPackedMatrix3(ClpSimplex *model, const CoinPackedMatrix *columnCopy)
6760   : numberBlocks_(0)
6761   , numberColumns_(0)
6762   , numberColumnsWithGaps_(0)
6763   ,
6764 #if ABOCA_LITE
6765   numberChunks_(0)
6766   ,
6767 #endif
6768   numberElements_(0)
6769   , maxBlockSize_(0)
6770   , column_(NULL)
6771   , start_(NULL)
6772   , row_(NULL)
6773   , element_(NULL)
6774   , temporary_(NULL)
6775   , block_(NULL)
6776   , ifActive_(0)
6777 {
6778   //#undef COIN_AVX2
6779   //#define COIN_AVX2 8
6780   //#define NO_AVX_HARDWARE
6781 #ifndef COIN_AVX2
6782 #define COIN_AVX2 4
6783 #else
6784 #if COIN_AVX2 == 4
6785 #ifndef NO_AVX_HARDWARE
6786 #define HASWELL
6787 #endif
6788 #elif COIN_AVX2 == 8
6789 #ifndef NO_AVX_HARDWARE
6790 #define SKYLAKE
6791 #endif
6792 #else
6793   error
6794 #endif
6795 #endif
6796 #if COIN_AVX2 == 1
6797 #define COIN_AVX2_SHIFT 0
6798 #elif COIN_AVX2 == 2
6799 #define COIN_AVX2_SHIFT 1
6800 #elif COIN_AVX2 == 4
6801 #define COIN_AVX2_SHIFT 2
6802 #elif COIN_AVX2 == 8
6803 #define COIN_AVX2_SHIFT 3
6804 #else
6805   error;
6806 #endif
6807 #define COIN_ALIGN 8 * COIN_AVX2 // later
6808 #define COIN_ALIGN_DOUBLE COIN_AVX2
6809 #define COIN_AVX2_CHUNK 128 // do this many at a time
6810 #define MINBLOCK 6
6811 #define MAXBLOCK 100
6812 #define MAXUNROLL 10
6813 #define roundUp(xx) ((xx + COIN_ALIGN_DOUBLE - 1) & (~(COIN_ALIGN_DOUBLE - 1)))
6814 #define roundDown(xx) (xx & (~(COIN_ALIGN_DOUBLE - 1)))
6815   numberColumns_ = model->getNumCols();
6816   int numberColumns = columnCopy->getNumCols();
6817   assert(numberColumns_ >= numberColumns);
6818   int numberRows = columnCopy->getNumRows();
6819   int *counts = new int[numberRows + 1];
6820   CoinZeroN(counts, numberRows + 1);
6821   CoinBigIndex nels = 0;
6822   int iColumn;
6823   // get matrix data pointers
6824   const int *row = columnCopy->getIndices();
6825   const CoinBigIndex *columnStart = columnCopy->getVectorStarts();
6826   const int *columnLength = columnCopy->getVectorLengths();
6827   const double *elementByColumn = columnCopy->getElements();
6828   unsigned char *status = model->statusArray();
6829   const double *lower = model->columnLower();
6830   const double *upper = model->columnUpper();
6831   int nFree = 0; // or superbasic
6832   int nFreeEls = 0;
6833   for (iColumn = 0; iColumn < numberColumns; iColumn++) {
6834     CoinBigIndex start = columnStart[iColumn];
6835     int n = columnLength[iColumn];
6836     CoinBigIndex end = start + n;
6837     int kZero = 0;
6838     for (CoinBigIndex j = start; j < end; j++) {
6839       if (!elementByColumn[j])
6840         kZero++;
6841     }
6842     n -= kZero;
6843     nels += n;
6844     if ((lower[iColumn] == -COIN_DBL_MAX && upper[iColumn] == COIN_DBL_MAX) || (status[iColumn] & 3) == 0) {
6845       nFree++;
6846       nFreeEls += n;
6847       n = 0;
6848       if ((status[iColumn] & 3) != 0) {
6849         status[iColumn] &= ~7;
6850         status[iColumn] |= 4;
6851       }
6852     }
6853     counts[n]++;
6854   }
6855   counts[0] += numberColumns_ - numberColumns;
6856   int nZeroColumns = counts[0]; // also free
6857   counts[0] = -1;
6858   int nOdd = nZeroColumns;
6859   CoinBigIndex nInOdd = nFreeEls;
6860   maxBlockSize_ = 0;
6861   int i;
6862   for (i = 1; i <= numberRows; i++) {
6863     int n = counts[i];
6864     if (n) {
6865       if (n < MINBLOCK || i > MAXBLOCK) {
6866         nOdd += n;
6867         counts[i] = -1;
6868         nInOdd += n * i;
6869       } else {
6870         numberBlocks_++;
6871         maxBlockSize_ = CoinMax(maxBlockSize_, n);
6872         ;
6873       }
6874     } else {
6875       counts[i] = -1;
6876     }
6877   }
6878   // align on boundaries
6879   nels = roundUp(nInOdd);
6880   numberColumnsWithGaps_ = nOdd;
6881   for (int i = 0; i <= CoinMin(MAXBLOCK, numberRows); i++) {
6882     if (counts[i] > 0) {
6883       int n = roundUp(counts[i]);
6884       nels += n * i;
6885       numberColumnsWithGaps_ += n;
6886     }
6887   }
6888   row_ = new int[nels + 15];
6889   element_ = new double[nels + 31];
6890   start_ = new CoinBigIndex[nOdd + 1];
6891   // also allow for rows!
6892   int numberColumnsWithGaps = roundUp(numberColumnsWithGaps_);
6893   numberColumnsWithGaps_ = roundUp(numberColumnsWithGaps + numberRows);
6894   column_ = new int[2 * numberColumnsWithGaps_];
6895   memset(row_, 0, nels * sizeof(int));
6896   memset(element_, 0, nels * sizeof(double));
6897   int *lookup = column_ + numberColumnsWithGaps_;
6898   for (int i = 0; i < numberColumnsWithGaps; i++) {
6899     column_[i] = -1;
6900     lookup[i] = -1;
6901   }
6902   for (int i = 0; i < numberRows; i++) {
6903     column_[i + numberColumnsWithGaps] = i + numberColumns;
6904     lookup[i + numberColumns] = i;
6905   }
6906   for (int i = numberRows + numberColumnsWithGaps;
6907        i < numberColumnsWithGaps_; i++) {
6908     column_[i] = -1;
6909     lookup[i] = -1;
6910   }
6911   // even if no blocks do a dummy one
6912   numberBlocks_ = CoinMax(numberBlocks_, 1);
6913   block_ = new blockStruct[numberBlocks_ + 1]; // +1 for slacks
6914   memset(block_, 0, (numberBlocks_ + 1) * sizeof(blockStruct));
6915   // Fill in what we can
6916   int nTotal = nOdd;
6917   // in case no blocks
6918   block_->startIndices_ = nTotal;
6919   // adjust nels so start on 8 double boundary
6920   double *elements2 = reinterpret_cast< double * >(clp_align(element_ + nInOdd));
6921   nels = elements2 - element_;
6922   int nBlock = 0;
6923   for (i = 0; i <= CoinMin(MAXBLOCK, numberRows); i++) {
6924     if (counts[i] > 0) {
6925       blockStruct *block = block_ + nBlock;
6926       int n = roundUp(counts[i]);
6927       counts[i] = nBlock; // backward pointer
6928       nBlock++;
6929       block->startIndices_ = nTotal;
6930       block->startElements_ = nels;
6931       block->numberElements_ = i;
6932       // up counts
6933       nTotal += n;
6934       nels += n * i;
6935     }
6936   }
6937   numberElements_ = nels;
6938   nBlock = CoinMax(nBlock, 1);
6939   // slacks
6940   block_[nBlock].numberElements_ = 0;
6941   block_[nBlock].numberInBlock_ = numberRows;
6942   block_[nBlock].startIndices_ = numberColumnsWithGaps;
6943   // fill
6944   start_[0] = 0;
6945   nOdd = 0;
6946   nInOdd = 0;
6947   const double *columnScale = model->columnScale();
6948   for (iColumn = 0; iColumn < numberColumns; iColumn++) {
6949     CoinBigIndex start = columnStart[iColumn];
6950     int n = columnLength[iColumn];
6951     CoinBigIndex end = start + n;
6952     int kZero = 0;
6953     for (CoinBigIndex j = start; j < end; j++) {
6954       if (!elementByColumn[j])
6955         kZero++;
6956     }
6957     n -= kZero;
6958     if ((status[iColumn] & 3) == 0)
6959       n = 0;
6960     {
6961       int iBlock = counts[n];
6962       if (iBlock >= 0) {
6963         blockStruct *block = block_ + iBlock;
6964         int k = block->numberInBlock_;
6965         block->numberInBlock_++;
6966         column_[block->startIndices_ + k] = iColumn;
6967         lookup[iColumn] = k;
6968         CoinBigIndex put = block->startElements_
6969           + roundDown(k) * n + (k & (COIN_AVX2 - 1));
6970         for (CoinBigIndex j = start; j < end; j++) {
6971           double value = elementByColumn[j];
6972           if (value) {
6973             if (columnScale)
6974               value *= columnScale[iColumn];
6975             element_[put] = value;
6976             row_[put] = row[j];
6977             put += COIN_AVX2;
6978           }
6979         }
6980       } else {
6981         // odd ones
6982         for (CoinBigIndex j = start; j < end; j++) {
6983           double value = elementByColumn[j];
6984           if (value) {
6985             if (columnScale)
6986               value *= columnScale[iColumn];
6987             element_[nInOdd] = value;
6988             row_[nInOdd++] = row[j];
6989           }
6990         }
6991         column_[nOdd] = iColumn;
6992         lookup[iColumn] = -1;
6993         nOdd++;
6994         start_[nOdd] = nInOdd;
6995       }
6996     }
6997   }
6998   // This much temporary space
6999 #if ABOCA_LITE
7000   temporary_ = new CoinDoubleArrayWithLength(2 * COIN_AVX2_CHUNK * 2 * ABOCA_LITE,
7001     -2 - COIN_ALIGN_DOUBLE);
7002 #else
7003   temporary_ = new CoinDoubleArrayWithLength(2 * COIN_AVX2_CHUNK,
7004     -2 - COIN_ALIGN_DOUBLE);
7005 #endif
7006 #if ABOCA_LITE
7007   // balance work - maybe with 2*cpu chunks
7008 #define COLUMN_WEIGHT 5
7009 #define BLOCK_MULT 2
7010   // If do +1's then need two element weights
7011   double totalWork = 0.0;
7012   for (int iBlock = 0; iBlock < numberBlocks_; iBlock++) {
7013     totalWork += (COLUMN_WEIGHT + block_[iBlock].numberElements_) * block_[iBlock].numberInBlock_;
7014     if (model->logLevel() > 2)
7015       printf("block %d ncol %d nel %d total %g this %d\n",
7016         iBlock, block_[iBlock].numberInBlock_,
7017         block_[iBlock].numberElements_, totalWork,
7018         (COLUMN_WEIGHT + block_[iBlock].numberElements_) * block_[iBlock].numberInBlock_);
7019   }
7020   double eachWork = totalWork / (BLOCK_MULT * ABOCA_LITE);
7021   double thisWork = 0.0;
7022   numberChunks_ = 0;
7023   endChunk_[0] = 0;
7024   for (int iBlock = 0; iBlock < numberBlocks_; iBlock++) {
7025     thisWork += (COLUMN_WEIGHT + block_[iBlock].numberElements_) * block_[iBlock].numberInBlock_;
7026     if (thisWork >= eachWork) {
7027       totalWork -= thisWork;
7028       thisWork = 0.0;
7029       eachWork = totalWork / (BLOCK_MULT * ABOCA_LITE - numberChunks_ - 1);
7030       numberChunks_++;
7031       endChunk_[numberChunks_] = iBlock + 1;
7032       if (numberChunks_ == BLOCK_MULT * ABOCA_LITE)
7033         break;
7034     }
7035   }
7036   if (numberChunks_ == BLOCK_MULT * ABOCA_LITE)
7037     endChunk_[numberChunks_] = numberBlocks_;
7038   else if (endChunk_[numberChunks_] < numberBlocks_)
7039     endChunk_[++numberChunks_] = numberBlocks_;
7040   //printf("%d chunks for %d blocks\n",numberChunks_,numberBlocks_);
7041   assert(numberChunks_ <= 2 * ABOCA_LITE);
7042 #endif
7043   delete[] counts;
7044 }
7045 /* Destructor */
7046 ClpPackedMatrix3::~ClpPackedMatrix3()
7047 {
7048   delete[] column_;
7049   delete[] start_;
7050   delete[] row_;
7051   delete[] element_;
7052   delete temporary_;
7053   delete[] block_;
7054 }
7055 /* The copy constructor. */
7056 ClpPackedMatrix3::ClpPackedMatrix3(const ClpPackedMatrix3 &rhs)
7057   : numberBlocks_(rhs.numberBlocks_)
7058   , numberColumns_(rhs.numberColumns_)
7059   , numberColumnsWithGaps_(rhs.numberColumnsWithGaps_)
7060   ,
7061 #if ABOCA_LITE
7062   numberChunks_(rhs.numberChunks_)
7063   ,
7064 #endif
7065   numberElements_(rhs.numberElements_)
7066   , maxBlockSize_(rhs.maxBlockSize_)
7067   , column_(NULL)
7068   , start_(NULL)
7069   , row_(NULL)
7070   , element_(NULL)
7071   , temporary_(NULL)
7072   , block_(NULL)
7073   , ifActive_(rhs.ifActive_)
7074 {
7075   if (rhs.numberBlocks_) {
7076     block_ = CoinCopyOfArray(rhs.block_, numberBlocks_);
7077     column_ = CoinCopyOfArray(rhs.column_, 2 * numberColumnsWithGaps_);
7078     int numberOdd = block_->startIndices_;
7079     start_ = CoinCopyOfArray(rhs.start_, numberOdd + 1);
7080     row_ = CoinCopyOfArray(rhs.row_, numberElements_);
7081     element_ = CoinCopyOfArray(rhs.element_, numberElements_ + 8);
7082     // This much temporary space
7083 #if ABOCA_LITE
7084     temporary_ = new CoinDoubleArrayWithLength(2 * COIN_AVX2_CHUNK * 2 * ABOCA_LITE,
7085       -2 - COIN_ALIGN_DOUBLE);
7086 #else
7087     temporary_ = new CoinDoubleArrayWithLength(2 * COIN_AVX2_CHUNK,
7088       -2 - COIN_ALIGN_DOUBLE);
7089 #endif
7090 #if ABOCA_LITE
7091     memcpy(endChunk_, rhs.endChunk_, sizeof(endChunk_));
7092 #endif
7093   }
7094 }
7095 ClpPackedMatrix3 &
7096 ClpPackedMatrix3::operator=(const ClpPackedMatrix3 &rhs)
7097 {
7098   if (this != &rhs) {
7099     delete[] column_;
7100     delete[] start_;
7101     delete[] row_;
7102     delete[] element_;
7103     delete temporary_;
7104     temporary_ = NULL;
7105     delete[] block_;
7106     numberBlocks_ = rhs.numberBlocks_;
7107     numberColumns_ = rhs.numberColumns_;
7108     numberColumnsWithGaps_ = rhs.numberColumnsWithGaps_;
7109 #if ABOCA_LITE
7110     numberChunks_ = rhs.numberChunks_;
7111     memcpy(endChunk_, rhs.endChunk_, sizeof(endChunk_));
7112 #endif
7113     numberElements_ = rhs.numberElements_;
7114     maxBlockSize_ = rhs.maxBlockSize_;
7115     ifActive_ = rhs.ifActive_;
7116     if (rhs.numberBlocks_) {
7117       block_ = CoinCopyOfArray(rhs.block_, numberBlocks_);
7118       column_ = CoinCopyOfArray(rhs.column_, 2 * numberColumnsWithGaps_);
7119       int numberOdd = block_->startIndices_;
7120       start_ = CoinCopyOfArray(rhs.start_, numberOdd + 1);
7121       row_ = CoinCopyOfArray(rhs.row_, numberElements_);
7122       element_ = CoinCopyOfArray(rhs.element_, numberElements_ + 8);
7123       // This much temporary space
7124 #if ABOCA_LITE
7125       temporary_ = new CoinDoubleArrayWithLength(2 * COIN_AVX2_CHUNK * 2 * ABOCA_LITE,
7126         -2 - COIN_ALIGN_DOUBLE);
7127 #else
7128       temporary_ = new CoinDoubleArrayWithLength(2 * COIN_AVX2_CHUNK,
7129         -2 - COIN_ALIGN_DOUBLE);
7130 #endif
7131     } else {
7132       column_ = NULL;
7133       start_ = NULL;
7134       row_ = NULL;
7135       element_ = NULL;
7136       block_ = NULL;
7137     }
7138   }
7139   return *this;
7140 }
7141 /* Sort blocks */
7142 void ClpPackedMatrix3::sortBlocks(const ClpSimplex *model)
7143 {
7144   ifActive_ = 1;
7145   int *lookup = column_ + numberColumnsWithGaps_;
7146   for (int iBlock = 0; iBlock < numberBlocks_ + 1; iBlock++) {
7147     blockStruct *block = block_ + iBlock;
7148     int numberInBlock = block->numberInBlock_;
7149     int nel = block->numberElements_;
7150     int *row = row_ + block->startElements_;
7151     double *element = element_ + block->startElements_;
7152     int *column = column_ + block->startIndices_;
7153     int lastPrice = 0;
7154     int firstNotPrice = numberInBlock - 1;
7155     while (lastPrice <= firstNotPrice) {
7156       // find first basic or fixed
7157       int iColumn = numberInBlock;
7158       for (; lastPrice <= firstNotPrice; lastPrice++) {
7159         iColumn = column[lastPrice];
7160         if (model->getColumnStatus(iColumn) == ClpSimplex::basic || model->getColumnStatus(iColumn) == ClpSimplex::isFixed)
7161           break;
7162       }
7163       // find last non basic or fixed
7164       int jColumn = -1;
7165       for (; firstNotPrice > lastPrice; firstNotPrice--) {
7166         jColumn = column[firstNotPrice];
7167         if (model->getColumnStatus(jColumn) != ClpSimplex::basic && model->getColumnStatus(jColumn) != ClpSimplex::isFixed)
7168           break;
7169       }
7170       if (firstNotPrice > lastPrice) {
7171         assert(column[lastPrice] == iColumn);
7172         assert(column[firstNotPrice] == jColumn);
7173         // need to swap
7174         column[firstNotPrice] = iColumn;
7175         lookup[iColumn] = firstNotPrice;
7176         column[lastPrice] = jColumn;
7177         lookup[jColumn] = lastPrice;
7178         int startBit = roundDown(lastPrice);
7179         CoinBigIndex offset = startBit * nel + (lastPrice - startBit);
7180         double *elementA = element + offset;
7181         int *rowA = row + offset;
7182         startBit = roundDown(firstNotPrice);
7183         offset = startBit * nel + (firstNotPrice - startBit);
7184         double *elementB = element + offset;
7185         int *rowB = row + offset;
7186         for (int i = 0; i < nel * COIN_AVX2; i += COIN_AVX2) {
7187           int temp = rowA[i];
7188           double tempE = elementA[i];
7189           rowA[i] = rowB[i];
7190           elementA[i] = elementB[i];
7191           rowB[i] = temp;
7192           elementB[i] = tempE;
7193         }
7194         firstNotPrice--;
7195         lastPrice++;
7196       } else if (lastPrice == firstNotPrice) {
7197         // make sure correct side
7198         iColumn = column[lastPrice];
7199         if (model->getColumnStatus(iColumn) != ClpSimplex::basic && model->getColumnStatus(iColumn) != ClpSimplex::isFixed)
7200           lastPrice++;
7201         break;
7202       }
7203     }
7204     block->firstBasic_ = lastPrice;
7205     // reuse ints even though names wrong
7206     // lower
7207     firstNotPrice = lastPrice - 1;
7208     lastPrice = 0;
7209     while (lastPrice <= firstNotPrice) {
7210       // find first upper
7211       int iColumn = numberInBlock;
7212       for (; lastPrice <= firstNotPrice; lastPrice++) {
7213         iColumn = column[lastPrice];
7214         if (model->getColumnStatus(iColumn) == ClpSimplex::atUpperBound)
7215           break;
7216       }
7217       // find last upper
7218       int jColumn = -1;
7219       for (; firstNotPrice > lastPrice; firstNotPrice--) {
7220         jColumn = column[firstNotPrice];
7221         if (model->getColumnStatus(jColumn) != ClpSimplex::atUpperBound)
7222           break;
7223       }
7224       if (firstNotPrice > lastPrice) {
7225         assert(column[lastPrice] == iColumn);
7226         assert(column[firstNotPrice] == jColumn);
7227         // need to swap
7228         column[firstNotPrice] = iColumn;
7229         lookup[iColumn] = firstNotPrice;
7230         column[lastPrice] = jColumn;
7231         lookup[jColumn] = lastPrice;
7232         int startBit = roundDown(lastPrice);
7233         CoinBigIndex offset = startBit * nel + (lastPrice - startBit);
7234         double *elementA = element + offset;
7235         int *rowA = row + offset;
7236         startBit = roundDown(firstNotPrice);
7237         offset = startBit * nel + (firstNotPrice - startBit);
7238         double *elementB = element + offset;
7239         int *rowB = row + offset;
7240         for (int i = 0; i < nel * COIN_AVX2; i += COIN_AVX2) {
7241           int temp = rowA[i];
7242           double tempE = elementA[i];
7243           rowA[i] = rowB[i];
7244           elementA[i] = elementB[i];
7245           rowB[i] = temp;
7246           elementB[i] = tempE;
7247         }
7248         firstNotPrice--;
7249         lastPrice++;
7250       } else if (lastPrice == firstNotPrice) {
7251         // make sure correct side
7252         iColumn = column[lastPrice];
7253         if (model->getColumnStatus(iColumn) != ClpSimplex::atUpperBound)
7254           lastPrice++;
7255         break;
7256       }
7257     }
7258     block->firstAtUpper_ = lastPrice;
7259     // now lower
7260     firstNotPrice = lastPrice - 1;
7261     lastPrice = 0;
7262     while (lastPrice <= firstNotPrice) {
7263       // find first basic or fixed
7264       int iColumn = numberInBlock;
7265       for (; lastPrice <= firstNotPrice; lastPrice++) {
7266         iColumn = column[lastPrice];
7267         if (model->getColumnStatus(iColumn) == ClpSimplex::atLowerBound)
7268           break;
7269       }
7270       // find last non basic or fixed
7271       int jColumn = -1;
7272       for (; firstNotPrice > lastPrice; firstNotPrice--) {
7273         jColumn = column[firstNotPrice];
7274         if (model->getColumnStatus(jColumn) != ClpSimplex::atLowerBound)
7275           break;
7276       }
7277       if (firstNotPrice > lastPrice) {
7278         assert(column[lastPrice] == iColumn);
7279         assert(column[firstNotPrice] == jColumn);
7280         // need to swap
7281         column[firstNotPrice] = iColumn;
7282         lookup[iColumn] = firstNotPrice;
7283         column[lastPrice] = jColumn;
7284         lookup[jColumn] = lastPrice;
7285         int startBit = roundDown(lastPrice);
7286         CoinBigIndex offset = startBit * nel + (lastPrice - startBit);
7287         double *elementA = element + offset;
7288         int *rowA = row + offset;
7289         startBit = roundDown(firstNotPrice);
7290         offset = startBit * nel + (firstNotPrice - startBit);
7291         double *elementB = element + offset;
7292         int *rowB = row + offset;
7293         for (int i = 0; i < nel * COIN_AVX2; i += COIN_AVX2) {
7294           int temp = rowA[i];
7295           double tempE = elementA[i];
7296           rowA[i] = rowB[i];
7297           elementA[i] = elementB[i];
7298           rowB[i] = temp;
7299           elementB[i] = tempE;
7300         }
7301         firstNotPrice--;
7302         lastPrice++;
7303       } else if (lastPrice == firstNotPrice) {
7304         // make sure correct side
7305         iColumn = column[lastPrice];
7306         if (model->getColumnStatus(iColumn) != ClpSimplex::atLowerBound)
7307           lastPrice++;
7308         break;
7309       }
7310     }
7311     block->firstAtLower_ = lastPrice;
7312 #ifndef NDEBUG
7313     // check
7314     int i;
7315     for (i = 0; i < block->firstBasic_; i++) {
7316       int iColumn = column[i];
7317       assert(model->getColumnStatus(iColumn) != ClpSimplex::basic && model->getColumnStatus(iColumn) != ClpSimplex::isFixed);
7318       assert(lookup[iColumn] == i);
7319       if (i < block->firstAtLower_) {
7320         assert(model->getColumnStatus(iColumn) == ClpSimplex::isFree || model->getColumnStatus(iColumn) == ClpSimplex::superBasic);
7321       } else if (i < block->firstAtUpper_) {
7322         assert(model->getColumnStatus(iColumn) == ClpSimplex::atLowerBound);
7323       } else {
7324         assert(model->getColumnStatus(iColumn) == ClpSimplex::atUpperBound);
7325       }
7326     }
7327     for (; i < numberInBlock; i++) {
7328       int iColumn = column[i];
7329       assert(model->getColumnStatus(iColumn) == ClpSimplex::basic || model->getColumnStatus(iColumn) == ClpSimplex::isFixed);
7330       assert(lookup[iColumn] == i);
7331     }
7332 #endif
7333   }
7334 }
7335 // Swap one variable - when found
7336 void ClpPackedMatrix3::swapOne(int iBlock, int kA, int kB)
7337 {
7338   int *lookup = column_ + numberColumnsWithGaps_;
7339   blockStruct *block = block_ + iBlock;
7340   int nel = block->numberElements_;
7341   int *row = row_ + block->startElements_;
7342   double *element = element_ + block->startElements_;
7343   int *column = column_ + block->startIndices_;
7344   int iColumn = column[kA];
7345   int jColumn = column[kB];
7346   column[kA] = jColumn;
7347   lookup[jColumn] = kA;
7348   column[kB] = iColumn;
7349   lookup[iColumn] = kB;
7350   int startBit = roundDown(kA);
7351   CoinBigIndex offset = startBit * nel + (kA - startBit);
7352   double *elementA = element + offset;
7353   int *rowA = row + offset;
7354   startBit = roundDown(kB);
7355   offset = startBit * nel + (kB - startBit);
7356   double *elementB = element + offset;
7357   int *rowB = row + offset;
7358   int i;
7359   for (i = 0; i < nel * COIN_AVX2; i += COIN_AVX2) {
7360     int temp = rowA[i];
7361     double tempE = elementA[i];
7362     rowA[i] = rowB[i];
7363     elementA[i] = elementB[i];
7364     rowB[i] = temp;
7365     elementB[i] = tempE;
7366   }
7367 }
7368 // Swap one variable
7369 void ClpPackedMatrix3::swapOne(const ClpSimplex *model, const ClpPackedMatrix *matrix,
7370   int iColumn)
7371 {
7372   if (!ifActive_)
7373     return;
7374   int *lookup = column_ + numberColumnsWithGaps_;
7375   // position in block
7376   int kA = lookup[iColumn];
7377   if (kA < 0)
7378     return; // odd one
7379   int iBlock = numberBlocks_;
7380   if (iColumn < model->numberColumns()) {
7381     // get matrix data pointers
7382     const CoinPackedMatrix *columnCopy = matrix->getPackedMatrix();
7383     //const int * row = columnCopy->getIndices();
7384     const CoinBigIndex *columnStart = columnCopy->getVectorStarts();
7385     const int *columnLength = columnCopy->getVectorLengths();
7386     const double *elementByColumn = columnCopy->getElements();
7387     CoinBigIndex start = columnStart[iColumn];
7388     int n = columnLength[iColumn];
7389     if (matrix->zeros()) {
7390       CoinBigIndex end = start + n;
7391       for (CoinBigIndex j = start; j < end; j++) {
7392         if (!elementByColumn[j])
7393           n--;
7394       }
7395     }
7396     // find block - could do binary search
7397     iBlock = CoinMin(n, numberBlocks_) - 1;
7398     while (block_[iBlock].numberElements_ != n)
7399       iBlock--;
7400   }
7401   blockStruct *block = block_ + iBlock;
7402 #ifndef NDEBUG
7403   int *column = column_ + block->startIndices_;
7404   assert(column[kA] == iColumn);
7405 #endif
7406   int from, to;
7407   if (kA < block->firstBasic_) {
7408     if (kA >= block->firstAtUpper_) {
7409       from = 2;
7410     } else if (kA >= block->firstAtLower_) {
7411       from = 1;
7412     } else {
7413       from = 0;
7414     }
7415   } else {
7416     from = 3;
7417   }
7418   if (model->getColumnStatus(iColumn) == ClpSimplex::basic || model->getColumnStatus(iColumn) == ClpSimplex::isFixed) {
7419     to = 3;
7420   } else if (model->getColumnStatus(iColumn) == ClpSimplex::atUpperBound) {
7421     to = 2;
7422   } else if (model->getColumnStatus(iColumn) == ClpSimplex::atLowerBound) {
7423     to = 1;
7424   } else {
7425     to = 0;
7426   }
7427   int *statusCounts = (&block->firstAtLower_) - 1;
7428   if (from < to) {
7429     while (from < to) {
7430       int kB = statusCounts[from + 1] - 1;
7431       statusCounts[from + 1] = kB;
7432       swapOne(iBlock, kA, kB);
7433       kA = kB;
7434       from++;
7435     }
7436   } else if (from > to) {
7437     while (from > to) {
7438       int kB = statusCounts[from];
7439       statusCounts[from] = kB + 1;
7440       swapOne(iBlock, kA, kB);
7441       kA = kB;
7442       from--;
7443     }
7444   }
7445 #ifndef NDEBUG
7446   // check
7447   int i;
7448   for (i = 0; i < block->firstBasic_; i++) {
7449     int iColumn = column[i];
7450     if (iColumn != model->sequenceIn() && iColumn != model->sequenceOut())
7451       assert(model->getColumnStatus(iColumn) != ClpSimplex::basic && model->getColumnStatus(iColumn) != ClpSimplex::isFixed);
7452     assert(lookup[iColumn] == i);
7453     if (model->algorithm() > 0) {
7454       if (i < block->firstAtLower_) {
7455         assert(model->getColumnStatus(iColumn) == ClpSimplex::isFree || model->getColumnStatus(iColumn) == ClpSimplex::superBasic);
7456       } else if (i < block->firstAtUpper_) {
7457         assert(model->getColumnStatus(iColumn) == ClpSimplex::atLowerBound);
7458       } else {
7459         assert(model->getColumnStatus(iColumn) == ClpSimplex::atUpperBound);
7460       }
7461     }
7462   }
7463   int numberInBlock = block->numberInBlock_;
7464   for (; i < numberInBlock; i++) {
7465     int iColumn = column[i];
7466     if (iColumn != model->sequenceIn() && iColumn != model->sequenceOut())
7467       assert(model->getColumnStatus(iColumn) == ClpSimplex::basic || model->getColumnStatus(iColumn) == ClpSimplex::isFixed);
7468     assert(lookup[iColumn] == i);
7469   }
7470 #endif
7471 }
7472 #ifndef NDEBUG
7473 /* Debug - check blocks */
7474 void ClpPackedMatrix3::checkBlocks(const ClpSimplex *model)
7475 {
7476   if (!ifActive_)
7477     return;
7478   for (int iBlock = 0; iBlock < numberBlocks_ + 1; iBlock++) {
7479     blockStruct *block = block_ + iBlock;
7480     int *column = column_ + block->startIndices_;
7481     for (int i = 0; i < block->firstAtLower_; i++) {
7482       int iSequence = column[i];
7483       assert(model->getColumnStatus(iSequence) == ClpSimplex::isFree || model->getColumnStatus(iSequence) == ClpSimplex::superBasic);
7484     }
7485     for (int i = block->firstAtLower_; i < block->firstAtUpper_; i++) {
7486       int iSequence = column[i];
7487       assert(model->getColumnStatus(iSequence) == ClpSimplex::atLowerBound);
7488     }
7489     for (int i = block->firstAtUpper_; i < block->firstBasic_; i++) {
7490       int iSequence = column[i];
7491       assert(model->getColumnStatus(iSequence) == ClpSimplex::atUpperBound);
7492     }
7493     for (int i = block->firstBasic_; i < block->numberInBlock_; i++) {
7494       int iSequence = column[i];
7495       assert(model->getColumnStatus(iSequence) == ClpSimplex::basic || model->getColumnStatus(iSequence) == ClpSimplex::isFixed);
7496     }
7497   }
7498 }
7499 #endif
7500 /* Return <code>x * -1 * A in <code>z</code>.
7501    Note - x packed and z will be packed mode
7502    Squashes small elements and knows about ClpSimplex */
7503 void ClpPackedMatrix3::transposeTimes(const ClpSimplex *model,
7504   const double *pi,
7505   CoinIndexedVector *output) const
7506 {
7507   int numberNonZero = 0;
7508   int *index = output->getIndices();
7509   double *array = output->denseVector();
7510   double zeroTolerance = model->zeroTolerance();
7511   double value = 0.0;
7512   CoinBigIndex j;
7513   int numberOdd = block_->startIndices_;
7514   if (numberOdd) {
7515     // A) as probably long may be worth unrolling
7516     CoinBigIndex end = start_[1];
7517     for (j = start_[0]; j < end; j++) {
7518       int iRow = row_[j];
7519       value += pi[iRow] * element_[j];
7520     }
7521     int iColumn;
7522     // int jColumn=column_[0];
7523 
7524     for (iColumn = 0; iColumn < numberOdd - 1; iColumn++) {
7525       CoinBigIndex start = end;
7526       end = start_[iColumn + 2];
7527       if (fabs(value) > zeroTolerance) {
7528         array[numberNonZero] = value;
7529         index[numberNonZero++] = column_[iColumn];
7530         //index[numberNonZero++]=jColumn;
7531       }
7532       // jColumn = column_[iColumn+1];
7533       value = 0.0;
7534       //if (model->getColumnStatus(jColumn)!=ClpSimplex::basic) {
7535       for (j = start; j < end; j++) {
7536         int iRow = row_[j];
7537         value += pi[iRow] * element_[j];
7538       }
7539       //}
7540     }
7541     if (fabs(value) > zeroTolerance) {
7542       array[numberNonZero] = value;
7543       index[numberNonZero++] = column_[iColumn];
7544       //index[numberNonZero++]=jColumn;
7545     }
7546   }
7547   for (int iBlock = 0; iBlock < numberBlocks_; iBlock++) {
7548     // C) Can do two at a time (if so put odd one into start_)
7549     // D) can use switch
7550     blockStruct *block = block_ + iBlock;
7551     //int numberPrice = block->numberInBlock_;
7552     int numberPrice = block->firstBasic_;
7553     int nel = block->numberElements_;
7554     int *row = row_ + block->startElements_;
7555     double *element = element_ + block->startElements_;
7556     int *column = column_ + block->startIndices_;
7557     int nBlock = numberPrice >> COIN_AVX2_SHIFT;
7558     numberPrice -= roundDown(numberPrice);
7559 #if defined(HASWELL) || defined(SKYLAKE)
7560     double *newValues = roundUpDouble((array + numberNonZero));
7561 #ifdef HASWELL
7562     assert(COIN_AVX2 == 4);
7563     for (int jBlock = 0; jBlock < nBlock; jBlock++) {
7564       __m256d arrayX = _mm256_setzero_pd();
7565       for (int j = 0; j < nel; j++) {
7566         __m128i rows = _mm_loadu_si128((const __m128i *)row);
7567         __m256d elements = _mm256_load_pd(element);
7568         __m256d pis = _mm256_i32gather_pd(pi, rows, 8);
7569         arrayX = _mm256_fmadd_pd(pis, elements, arrayX);
7570         row += 4;
7571         element += 4;
7572       }
7573       _mm256_store_pd(newValues, arrayX);
7574 #else
7575     assert(COIN_AVX2 == 8);
7576     for (int jBlock = 0; jBlock < nBlock; jBlock++) {
7577       __m512d arrayX = _mm512_setzero_pd();
7578       for (int j = 0; j < nel; j++) {
7579         __m256i rows = _mm256_load_si256((const __m256i *)row);
7580         __m512d elements = _mm512_load_pd(element);
7581         __m512d pis = _mm512_i32gather_pd(rows, pi, 8);
7582         arrayX = _mm512_fmadd_pd(pis, elements, arrayX);
7583         row += 4;
7584         element += 4;
7585       }
7586       _mm512_store_pd(newValues, arrayX);
7587 #endif
7588       newValues += COIN_AVX2;
7589       //row += (nel-1)*COIN_AVX2;
7590       //element += (nel-1)*COIN_AVX2;
7591       assert(row == row_ + block->startElements_ + nel * COIN_AVX2 * (jBlock + 1));
7592     }
7593     int n = nBlock * COIN_AVX2;
7594     int nSave = static_cast< int >(newValues - array);
7595     newValues = roundUpDouble((array + numberNonZero));
7596     for (int j = 0; j < n; j++) {
7597       double value = newValues[j];
7598       if (fabs(value) > zeroTolerance) {
7599         array[numberNonZero] = value;
7600         index[numberNonZero++] = *column;
7601       }
7602       column++;
7603     }
7604     for (int j = numberNonZero; j < nSave; j++)
7605       array[j] = 0.0;
7606 #else
7607     for (int jBlock = 0; jBlock < nBlock; jBlock++) {
7608       for (int j = 0; j < COIN_AVX2; j++) {
7609         double value = 0.0;
7610         for (int i = 0; i < nel; i++) {
7611           int iRow = row[i * COIN_AVX2];
7612           value += pi[iRow] * element[i * COIN_AVX2];
7613         }
7614 #if COIN_AVX2 > 1
7615         row++;
7616         element++;
7617 #else
7618         row += nel;
7619         element += nel;
7620 #endif
7621         if (fabs(value) > zeroTolerance) {
7622           array[numberNonZero] = value;
7623           index[numberNonZero++] = *column;
7624         }
7625         column++;
7626       }
7627 #if COIN_AVX2 > 1
7628       row += (nel - 1) * COIN_AVX2;
7629       element += (nel - 1) * COIN_AVX2;
7630       assert(row == row_ + block->startElements_ + nel * COIN_AVX2 * (jBlock + 1));
7631 #endif
7632     }
7633 #endif
7634     // last lot
7635     for (int j = 0; j < numberPrice; j++) {
7636       double value = 0.0;
7637       for (int i = 0; i < nel; i++) {
7638         int iRow = row[i * COIN_AVX2];
7639         value += pi[iRow] * element[i * COIN_AVX2];
7640       }
7641 #if COIN_AVX2 > 1
7642       row++;
7643       element++;
7644 #else
7645       row += nel;
7646       element += nel;
7647 #endif
7648       if (fabs(value) > zeroTolerance) {
7649         array[numberNonZero] = value;
7650         index[numberNonZero++] = *column;
7651       }
7652       column++;
7653     }
7654   }
7655   output->setNumElements(numberNonZero);
7656 }
7657 /* Return <code>x * -1 * A in <code>z</code>.
7658    Note - x packed and z will be packed mode
7659    Squashes small elements and knows about ClpSimplex
7660    - does dualColumn0 */
7661 void ClpPackedMatrix3::transposeTimes(const ClpSimplex *model,
7662   const double *COIN_RESTRICT pi,
7663   CoinIndexedVector *output,
7664   CoinIndexedVector *candidate,
7665   const CoinIndexedVector *rowArray) const
7666 {
7667   int numberNonZero = 0;
7668   int *index = output->getIndices();
7669   double *array = output->denseVector();
7670   double zeroTolerance = model->zeroTolerance();
7671   int numberColumns = model->numberColumns();
7672   const unsigned char *COIN_RESTRICT statusArray = model->statusArray() + numberColumns;
7673   int numberInRowArray = rowArray->getNumElements();
7674   const int *COIN_RESTRICT whichRow = rowArray->getIndices();
7675   const double *COIN_RESTRICT piOld = rowArray->denseVector();
7676   int *COIN_RESTRICT spareIndex = candidate->getIndices();
7677   double *COIN_RESTRICT spareArray = candidate->denseVector();
7678   const double *COIN_RESTRICT reducedCost = model->djRegion(0);
7679   double multiplier[] = { -1.0, 1.0 };
7680   double dualT = -model->currentDualTolerance();
7681   double acceptablePivot = model->spareDoubleArray_[0];
7682   double tentativeTheta = 1.0e15;
7683   double upperTheta = 1.0e31;
7684   int numberRemaining = 0;
7685   // dualColumn0 for slacks
7686   for (int i = 0; i < numberInRowArray; i++) {
7687     int iSequence = whichRow[i];
7688     int iStatus = (statusArray[iSequence] & 3) - 1;
7689     if (iStatus) {
7690       double mult = multiplier[iStatus - 1];
7691       double alpha = piOld[i] * mult;
7692       double oldValue;
7693       double value;
7694       if (alpha > 0.0) {
7695         oldValue = reducedCost[iSequence] * mult;
7696         value = oldValue - tentativeTheta * alpha;
7697         if (value < dualT) {
7698           value = oldValue - upperTheta * alpha;
7699           if (value < dualT && alpha >= acceptablePivot) {
7700             upperTheta = (oldValue - dualT) / alpha;
7701           }
7702           // add to list
7703           spareArray[numberRemaining] = alpha * mult;
7704           spareIndex[numberRemaining++] = iSequence + numberColumns;
7705         }
7706       }
7707     }
7708   }
7709   statusArray -= numberColumns;
7710   reducedCost -= numberColumns;
7711   double value = 0.0;
7712   CoinBigIndex j;
7713   int numberOdd = block_->startIndices_;
7714   if (numberOdd) {
7715     // A) as probably long may be worth unrolling
7716     CoinBigIndex end = start_[1];
7717     for (j = start_[0]; j < end; j++) {
7718       int iRow = row_[j];
7719       value += pi[iRow] * element_[j];
7720     }
7721     int iColumn;
7722     // int jColumn=column_[0];
7723 
7724     for (iColumn = 0; iColumn < numberOdd - 1; iColumn++) {
7725       CoinBigIndex start = end;
7726       end = start_[iColumn + 2];
7727       if (fabs(value) > zeroTolerance) {
7728         array[numberNonZero] = value;
7729         index[numberNonZero++] = column_[iColumn];
7730         //index[numberNonZero++]=jColumn;
7731       }
7732       // jColumn = column_[iColumn+1];
7733       value = 0.0;
7734       //if (model->getColumnStatus(jColumn)!=ClpSimplex::basic) {
7735       for (j = start; j < end; j++) {
7736         int iRow = row_[j];
7737         value += pi[iRow] * element_[j];
7738       }
7739       //}
7740     }
7741     if (fabs(value) > zeroTolerance) {
7742       array[numberNonZero] = value;
7743       index[numberNonZero++] = column_[iColumn];
7744       //index[numberNonZero++]=jColumn;
7745     }
7746   }
7747   // do odd ones
7748   for (int i = 0; i < numberNonZero; i++) {
7749     int iSequence = index[i];
7750     double alpha;
7751     double oldValue;
7752     double value;
7753     int iStatus = (statusArray[iSequence] & 3) - 1;
7754     if (iStatus) {
7755       double mult = multiplier[iStatus - 1];
7756       alpha = array[i] * mult;
7757       if (alpha > 0.0) {
7758         oldValue = reducedCost[iSequence] * mult;
7759         value = oldValue - tentativeTheta * alpha;
7760         if (value < dualT) {
7761           value = oldValue - upperTheta * alpha;
7762           if (value < dualT && alpha >= acceptablePivot) {
7763             upperTheta = (oldValue - dualT) / alpha;
7764           }
7765           // add to list
7766           spareArray[numberRemaining] = alpha * mult;
7767           spareIndex[numberRemaining++] = iSequence;
7768         }
7769       }
7770     }
7771   }
7772   int numberOld = numberNonZero;
7773   int nMax = 0;
7774   for (int iBlock = 0; iBlock < numberBlocks_; iBlock++) {
7775     // C) Can do two at a time (if so put odd one into start_)
7776     // D) can use switch
7777     blockStruct *block = block_ + iBlock;
7778     //int numberPrice = block->numberInBlock_;
7779     int numberPrice = block->firstBasic_;
7780     int nel = block->numberElements_;
7781     const int *COIN_RESTRICT row = row_ + block->startElements_;
7782     const double *COIN_RESTRICT element = element_ + block->startElements_;
7783     const int *COIN_RESTRICT column = column_ + block->startIndices_;
7784     int nBlock = numberPrice >> COIN_AVX2_SHIFT;
7785     numberPrice -= roundDown(numberPrice);
7786 #if defined(HASWELL) || defined(SKYLAKE)
7787     double *COIN_RESTRICT newValues = roundUpDouble((array + numberNonZero));
7788 #ifdef HASWELL
7789     assert(COIN_AVX2 == 4);
7790     for (int jBlock = 0; jBlock < nBlock; jBlock++) {
7791       __m256d arrayX = _mm256_setzero_pd();
7792       for (int j = 0; j < nel; j++) {
7793         __m128i rows = _mm_loadu_si128((const __m128i *)row);
7794         __m256d elements = _mm256_load_pd(element);
7795         __m256d pis = _mm256_i32gather_pd(pi, rows, 8);
7796         arrayX = _mm256_fmadd_pd(pis, elements, arrayX);
7797         row += 4;
7798         element += 4;
7799       }
7800       _mm256_store_pd(newValues, arrayX);
7801 #else
7802     assert(COIN_AVX2 == 8);
7803     for (int jBlock = 0; jBlock < nBlock; jBlock++) {
7804       __m512d arrayX = _mm512_setzero_pd();
7805       for (int j = 0; j < nel; j++) {
7806         __m256i rows = _mm256_loadu_si256((const __m256i *)row);
7807         __m512d elements = _mm512_load_pd(element);
7808         __m512d pis = _mm512_i32gather_pd(rows, pi, 8);
7809         arrayX = _mm512_fmadd_pd(pis, elements, arrayX);
7810         row += 4;
7811         element += 4;
7812       }
7813       _mm512_store_pd(newValues, arrayX);
7814 #endif
7815       newValues += COIN_AVX2;
7816       //row += (nel-1)*COIN_AVX2;
7817       //element += (nel-1)*COIN_AVX2;
7818       assert(row == row_ + block->startElements_ + nel * COIN_AVX2 * (jBlock + 1));
7819     }
7820 #else
7821     double *COIN_RESTRICT newValues = array + numberNonZero;
7822     for (int jBlock = 0; jBlock < nBlock; jBlock++) {
7823       for (int j = 0; j < COIN_AVX2; j++) {
7824         double value = 0.0;
7825         for (int i = 0; i < nel; i++) {
7826           int iRow = row[i * COIN_AVX2];
7827           value += pi[iRow] * element[i * COIN_AVX2];
7828         }
7829 #if COIN_AVX2 > 1
7830         row++;
7831         element++;
7832 #else
7833         row += nel;
7834         element += nel;
7835 #endif
7836         *newValues = value;
7837         newValues++;
7838       }
7839 #if COIN_AVX2 > 1
7840       row += (nel - 1) * COIN_AVX2;
7841       element += (nel - 1) * COIN_AVX2;
7842       assert(row == row_ + block->startElements_ + nel * COIN_AVX2 * (jBlock + 1));
7843 #endif
7844     }
7845 #endif
7846     for (int j = 0; j < numberPrice; j++) {
7847       double value = 0.0;
7848       for (int i = 0; i < nel; i++) {
7849         int iRow = row[i * COIN_AVX2];
7850         value += pi[iRow] * element[i * COIN_AVX2];
7851       }
7852 #if COIN_AVX2 > 1
7853       row++;
7854       element++;
7855 #else
7856       row += nel;
7857       element += nel;
7858 #endif
7859       *newValues = value;
7860       newValues++;
7861     }
7862 #ifdef HASWELL
7863     newValues = roundUpDouble((array + numberNonZero));
7864 #else
7865     newValues = array + numberNonZero;
7866 #endif
7867     int n = block->firstBasic_;
7868     int nL = block->firstAtUpper_;
7869     nMax = static_cast< int >(newValues - array) + n;
7870     for (int j = 0; j < nL; j++) {
7871       double value2 = newValues[j];
7872       if (fabs(value2) > zeroTolerance) {
7873         int iSequence = column[j];
7874         double alpha;
7875         double oldValue;
7876         double value;
7877         alpha = value2;
7878         if (alpha > 0.0) {
7879           oldValue = reducedCost[iSequence];
7880           value = oldValue - tentativeTheta * alpha;
7881           if (value < dualT) {
7882             value = oldValue - upperTheta * alpha;
7883             if (value < dualT && alpha >= acceptablePivot) {
7884               upperTheta = (oldValue - dualT) / alpha;
7885             }
7886             // add to list
7887             spareArray[numberRemaining] = value2;
7888             spareIndex[numberRemaining++] = iSequence;
7889           }
7890         }
7891         array[numberNonZero] = value2;
7892         index[numberNonZero++] = iSequence;
7893       }
7894     }
7895     for (int j = nL; j < n; j++) {
7896       double value2 = newValues[j];
7897       if (fabs(value2) > zeroTolerance) {
7898         int iSequence = column[j];
7899         double alpha;
7900         double oldValue;
7901         double value;
7902         alpha = -value2;
7903         if (alpha > 0.0) {
7904           oldValue = -reducedCost[iSequence];
7905           value = oldValue - tentativeTheta * alpha;
7906           if (value < dualT) {
7907             value = oldValue - upperTheta * alpha;
7908             if (value < dualT && alpha >= acceptablePivot) {
7909               upperTheta = (oldValue - dualT) / alpha;
7910             }
7911             // add to list
7912             spareArray[numberRemaining] = value2;
7913             spareIndex[numberRemaining++] = iSequence;
7914           }
7915         }
7916         array[numberNonZero] = value2;
7917         index[numberNonZero++] = iSequence;
7918       }
7919     }
7920     numberOld = numberNonZero;
7921   }
7922   for (int j = numberNonZero; j < nMax; j++)
7923     array[j] = 0.0;
7924   output->setNumElements(numberNonZero);
7925   candidate->setNumElements(numberRemaining);
7926   model->spareDoubleArray_[0] = upperTheta;
7927 }
7928 static void
7929 transposeTimes3Bit2Odd(clpTempInfo &info)
7930 {
7931   double zeroTolerance = info.tolerance;
7932   double dualTolerance = -info.dualTolerance; // sign swapped
7933   double *COIN_RESTRICT reducedCost = info.reducedCost;
7934   double *COIN_RESTRICT weights = info.solution;
7935   const unsigned int *reference = reinterpret_cast< const unsigned int * >(info.upper);
7936   const unsigned char *status = info.status;
7937   const CoinBigIndex *starts = info.start;
7938   const int *row = info.row;
7939   const int *column = info.which;
7940   const double *element = info.element;
7941   double scaleFactor = info.theta;
7942   const double *pi = info.cost;
7943   const double *piWeight = info.lower;
7944   double referenceIn = info.upperTheta;
7945   double devex = info.changeObj;
7946   assert(scaleFactor);
7947   int bestSequence = info.numberAdded;
7948   double bestRatio = info.bestPossible;
7949 #define NO_CHANGE_MULTIPLIER 1
7950 #define INVERSE_MULTIPLIER 1.0 / NO_CHANGE_MULTIPLIER
7951 #if NO_CHANGE_MULTIPLIER != 1
7952   double bestRatio2 = NO_CHANGE_MULTIPLIER * bestRatio;
7953 #else
7954 #define bestRatio2 bestRatio
7955 #endif
7956   CoinBigIndex end = starts[0];
7957   int numberOdd = info.numberToDo;
7958   for (int jColumn = 0; jColumn < numberOdd; jColumn++) {
7959     CoinBigIndex start = end;
7960     CoinBigIndex j;
7961     int iColumn = column[jColumn];
7962     end = starts[jColumn + 1];
7963     double value = 0.0;
7964     if ((status[iColumn] & 7) != 1) {
7965       for (j = start; j < end; j++) {
7966         int iRow = row[j];
7967         value -= pi[iRow] * element[j];
7968       }
7969       if (fabs(value) > zeroTolerance) {
7970         // and do other array
7971         double modification = 0.0;
7972         for (j = start; j < end; j++) {
7973           int iRow = row[j];
7974           modification += piWeight[iRow] * element[j];
7975         }
7976         double thisWeight = weights[iColumn];
7977         double pivot = value * scaleFactor;
7978         double pivotSquared = pivot * pivot;
7979         thisWeight += pivotSquared * devex + pivot * modification;
7980         if (thisWeight < DEVEX_TRY_NORM) {
7981           if (referenceIn < 0.0) {
7982             // steepest
7983             thisWeight = CoinMax(DEVEX_TRY_NORM, DEVEX_ADD_ONE + pivotSquared);
7984           } else {
7985             // exact
7986             thisWeight = referenceIn * pivotSquared;
7987             if (reference(iColumn))
7988               thisWeight += 1.0;
7989             thisWeight = CoinMax(thisWeight, DEVEX_TRY_NORM);
7990           }
7991         }
7992         weights[iColumn] = thisWeight;
7993         value = reducedCost[iColumn] - value;
7994         reducedCost[iColumn] = value;
7995         unsigned char thisStatus = status[iColumn] & 7;
7996         if (thisStatus == 3) {
7997         } else if ((thisStatus & 1) != 0) {
7998           // basic or fixed
7999           value = 0.0;
8000         } else if (thisStatus == 2) {
8001           value = -value;
8002         } else {
8003           // free or superbasic
8004           if (fabs(value) > -FREE_ACCEPT * dualTolerance) {
8005             // we are going to bias towards free (but only if reasonable)
8006             value = -fabs(value) * FREE_BIAS;
8007           } else {
8008             value = 0.0;
8009           }
8010         }
8011         if (value < dualTolerance) {
8012           value *= value;
8013           if (value > bestRatio * weights[iColumn]) {
8014             bestSequence = iColumn;
8015             bestRatio = value / weights[iColumn];
8016 #if NO_CHANGE_MULTIPLIER != 1
8017             bestRatio2 = bestRatio * NO_CHANGE_MULTIPLIER;
8018 #endif
8019           }
8020         }
8021       } else {
8022         value = reducedCost[iColumn];
8023         unsigned char thisStatus = status[iColumn] & 7;
8024         if (thisStatus == 3) {
8025         } else if ((thisStatus & 1) != 0) {
8026           // basic or fixed
8027           value = 0.0;
8028         } else if (thisStatus == 2) {
8029           value = -value;
8030         } else {
8031           // free or superbasic
8032           if (fabs(value) > -FREE_ACCEPT * dualTolerance) {
8033             // we are going to bias towards free (but only if reasonable)
8034             value = -fabs(value) * FREE_BIAS;
8035           } else {
8036             value = 0.0;
8037           }
8038         }
8039         if (value < dualTolerance) {
8040           value *= value;
8041           if (value > bestRatio2 * weights[iColumn]) {
8042             bestSequence = iColumn;
8043             bestRatio2 = value / weights[iColumn];
8044 #if NO_CHANGE_MULTIPLIER != 1
8045             bestRatio = bestRatio2 * INVERSE_MULTIPLIER;
8046 #endif
8047           }
8048         }
8049       }
8050     }
8051   }
8052   info.numberAdded = bestSequence;
8053 }
8054 
8055 static void
8056 transposeTimes3Bit2(clpTempInfo &info)
8057 {
8058   double zeroTolerance = info.tolerance;
8059   double dualTolerance = -info.dualTolerance; // sign swapped
8060   double *COIN_RESTRICT reducedCost = info.reducedCost;
8061   double *COIN_RESTRICT weights = info.solution;
8062   double *COIN_RESTRICT work = info.work;
8063   double *COIN_RESTRICT work2 = work + COIN_AVX2_CHUNK;
8064   const unsigned int *reference = reinterpret_cast< const unsigned int * >(info.upper);
8065   const blockStruct *blocks = reinterpret_cast< const blockStruct * >(info.pivotVariable);
8066   const unsigned char *status = info.status;
8067   const int *rowBlock = info.row;
8068   const int *columnBlock = info.which;
8069   const double *elementBlock = info.element;
8070   double scaleFactor = info.theta;
8071   const double *pi = info.cost;
8072   const double *piWeight = info.lower;
8073   double referenceIn = info.upperTheta;
8074   double devex = info.changeObj;
8075   assert(scaleFactor);
8076   int bestSequence = info.numberAdded;
8077   double bestRatio = info.bestPossible;
8078 #if NO_CHANGE_MULTIPLIER != 1
8079   double bestRatio2 = NO_CHANGE_MULTIPLIER * bestRatio;
8080 #endif
8081 #define INCLUDE_MATRIX3_PRICING 1
8082   int firstBlock = info.startColumn;
8083   int lastBlock = info.numberToDo;
8084   //double * COIN_RESTRICT tempArray =
8085   //const_cast<double *>(elementBlock+info.numberColumns);
8086   //double * COIN_RESTRICT tempArray2 = tempArray + roundUp(maxBlockSize);
8087   for (int iBlock = firstBlock; iBlock < lastBlock; iBlock++) {
8088     const blockStruct *block = blocks + iBlock;
8089     int numberPrice = block->firstBasic_;
8090     int nel = block->numberElements_;
8091     const int *row = rowBlock + block->startElements_;
8092     const double *element = elementBlock + block->startElements_;
8093     const int *column = columnBlock + block->startIndices_;
8094 #if COIN_AVX2 > 1
8095     int numberDo = roundDown(numberPrice);
8096 #if defined(HASWELL) || defined(SKYLAKE)
8097 #ifdef HASWELL
8098     assert(COIN_AVX2 == 4);
8099     __m256d zero = _mm256_setzero_pd();
8100     for (int kColumn = 0; kColumn < numberDo; kColumn += COIN_AVX2_CHUNK) {
8101       int endBlock = CoinMin(COIN_AVX2_CHUNK, numberDo - kColumn);
8102       for (int i = 0; i < endBlock; i += COIN_AVX2) {
8103         __m256d arrayX = _mm256_setzero_pd();
8104         __m256d tempX = _mm256_setzero_pd();
8105         for (int j = 0; j < nel; j++) {
8106           __m128i rows = _mm_loadu_si128((const __m128i *)row);
8107           __m256d elements = _mm256_load_pd(element);
8108           __m256d pis = _mm256_i32gather_pd(pi, rows, 8);
8109           __m256d piWeights = _mm256_i32gather_pd(piWeight, rows, 8);
8110           // should be better way using sub
8111           arrayX = _mm256_fmadd_pd(pis, elements, arrayX);
8112           tempX = _mm256_fmadd_pd(piWeights, elements, tempX);
8113           row += 4;
8114           element += 4;
8115         }
8116         arrayX = _mm256_sub_pd(zero, arrayX);
8117         _mm256_store_pd(work + i, tempX);
8118         _mm256_store_pd(work2 + i, arrayX);
8119       }
8120 #else
8121     assert(COIN_AVX2 == 8);
8122     __m512d zero = _mm512_setzero_pd();
8123     for (int kColumn = 0; kColumn < numberDo; kColumn += COIN_AVX2_CHUNK) {
8124       int endBlock = CoinMin(COIN_AVX2_CHUNK, numberDo - kColumn);
8125       for (int i = 0; i < endBlock; i += COIN_AVX2) {
8126         __m512d arrayX = _mm512_setzero_pd();
8127         __m512d tempX = _mm512_setzero_pd();
8128         for (int j = 0; j < nel; j++) {
8129           __m256i rows = _mm256_loadu_si256((const __m256i *)row);
8130           __m512d elements = _mm512_load_pd(element);
8131           __m512d pis = _mm512_i32gather_pd(rows, pi, 8);
8132           __m512d piWeights = _mm512_i32gather_pd(rows, piWeight, 8);
8133           // should be better way using sub
8134           arrayX = _mm512_fmadd_pd(pis, elements, arrayX);
8135           tempX = _mm512_fmadd_pd(piWeights, elements, tempX);
8136           row += 4;
8137           element += 4;
8138         }
8139         arrayX = _mm512_sub_pd(zero, arrayX);
8140         _mm512_store_pd(work + i, tempX);
8141         _mm512_store_pd(work2 + i, arrayX);
8142       }
8143 #endif
8144       for (int i = 0; i < endBlock; i++) {
8145         double value = work2[i];
8146         double modification = work[i];
8147         // common coding
8148 #include "ClpPackedMatrix.hpp"
8149       }
8150     }
8151 #else
8152     for (int kColumn = 0; kColumn < numberDo; kColumn += COIN_AVX2_CHUNK) {
8153       int n = 0;
8154       int nBlock = CoinMin(COIN_AVX2_CHUNK, numberPrice - kColumn) >> COIN_AVX2_SHIFT;
8155       //int nBlock = numberPrice>>COIN_AVX2_SHIFT;
8156       for (int jBlock = 0; jBlock < nBlock; jBlock++) {
8157         for (int j = 0; j < COIN_AVX2; j++) {
8158           double value = 0.0;
8159           double modification = 0.0;
8160           for (int i = 0; i < nel; i++) {
8161             //assert (element+i*COIN_AVX2<tempArray);
8162             int iRow = row[i * COIN_AVX2];
8163             value -= pi[iRow] * element[i * COIN_AVX2];
8164             modification += piWeight[iRow] * element[i * COIN_AVX2];
8165           }
8166           work[n] = modification;
8167           work2[n++] = value;
8168           row++;
8169           element++;
8170         }
8171         row += (nel - 1) * COIN_AVX2;
8172         element += (nel - 1) * COIN_AVX2;
8173         //assert (row == rowBlock + block->startElements_ + nel*COIN_AVX2*(jBlock+1));
8174       }
8175       for (int j = 0; j < n; j++) {
8176         double value = work2[j];
8177         double modification = work[j];
8178         // common coding
8179 #include "ClpPackedMatrix.hpp"
8180       }
8181     }
8182 #endif
8183     // last lot
8184     for (int j = numberDo; j < numberPrice; j++) {
8185       double value = 0.0;
8186       double modification = 0.0;
8187       for (int i = 0; i < nel; i++) {
8188         int iRow = row[i * COIN_AVX2];
8189         value -= pi[iRow] * element[i * COIN_AVX2];
8190         modification += piWeight[iRow] * element[i * COIN_AVX2];
8191       }
8192 // common coding
8193 #include "ClpPackedMatrix.hpp"
8194       row++;
8195       element++;
8196     }
8197 #else // COIN_AVX2 == 1
8198     for (int j = 0; j < numberPrice; j++) {
8199       double value = 0.0;
8200       double modification = 0.0;
8201       for (int i = 0; i < nel; i++) {
8202         //assert (element+i*COIN_AVX2<tempArray);
8203         int iRow = row[i * COIN_AVX2];
8204         value -= pi[iRow] * element[i * COIN_AVX2];
8205         modification += piWeight[iRow] * element[i * COIN_AVX2];
8206       }
8207 // common coding
8208 #include "ClpPackedMatrix.hpp"
8209       row += nel;
8210       element += nel;
8211     }
8212     int n = numberPrice;
8213 #endif
8214   }
8215   info.numberAdded = bestSequence;
8216   info.bestPossible = bestRatio;
8217 }
8218 static void
8219 transposeTimes3BitSlacks(clpTempInfo &info)
8220 {
8221   double dualTolerance = info.dualTolerance;
8222   double *COIN_RESTRICT reducedCost = info.reducedCost;
8223   double *COIN_RESTRICT weights = info.solution;
8224   const int *columnBlock = info.which;
8225   const blockStruct *blocks = reinterpret_cast< const blockStruct * >(info.pivotVariable);
8226   //const unsigned char * status = info.status;
8227   int bestSequence = info.numberAdded;
8228   double bestRatio = info.bestPossible;
8229   int iBlock = info.startColumn;
8230   assert(info.numberToDo == iBlock + 1);
8231   const blockStruct *block = blocks + iBlock;
8232   int first = 0;
8233   int last = block->firstAtLower_;
8234   const int *column = columnBlock + block->startIndices_;
8235   for (int j = first; j < last; j++) {
8236     int iColumn = *column;
8237     column++;
8238     double value = reducedCost[iColumn];
8239     // free or superbasic
8240     if (fabs(value) > FREE_ACCEPT * dualTolerance) {
8241       // we are going to bias towards free (but only if reasonable)
8242       value = -fabs(value) * FREE_BIAS;
8243       value *= value;
8244       if (value > bestRatio * weights[iColumn]) {
8245         bestSequence = iColumn;
8246         bestRatio = value / weights[iColumn];
8247       }
8248     }
8249   }
8250   first = last;
8251   last = block->firstAtUpper_;
8252   dualTolerance = -dualTolerance; //flip sign
8253   for (int j = first; j < last; j++) {
8254     int iColumn = *column;
8255     column++;
8256     double value = reducedCost[iColumn];
8257     // at lower
8258     if (value < dualTolerance) {
8259       value *= value;
8260       if (value > bestRatio * weights[iColumn]) {
8261         bestSequence = iColumn;
8262         bestRatio = value / weights[iColumn];
8263       }
8264     }
8265   }
8266   first = last;
8267   last = block->firstBasic_;
8268   dualTolerance = -dualTolerance; //flip sign
8269   for (int j = first; j < last; j++) {
8270     int iColumn = *column;
8271     column++;
8272     double value = reducedCost[iColumn];
8273     // at upper
8274     if (value > dualTolerance) {
8275       value *= value;
8276       if (value > bestRatio * weights[iColumn]) {
8277         bestSequence = iColumn;
8278         bestRatio = value / weights[iColumn];
8279       }
8280     }
8281   }
8282   info.numberAdded = bestSequence;
8283   info.bestPossible = bestRatio;
8284 }
8285 // Updates two arrays for steepest
8286 void ClpPackedMatrix3::transposeTimes2(const ClpSimplex *model,
8287   const double *COIN_RESTRICT pi,
8288   CoinIndexedVector *output,
8289   const double *COIN_RESTRICT piWeight,
8290   double *COIN_RESTRICT infeas,
8291   double *COIN_RESTRICT reducedCost,
8292   double referenceIn, double devex,
8293   // Array for exact devex to say what is in reference framework
8294   unsigned int *COIN_RESTRICT reference,
8295   double *COIN_RESTRICT weights, double scaleFactor)
8296 {
8297   /*
8298     This is designed for meaty problems so probably should also go to exact devex
8299     also could do sequenceIn_ choice here
8300     could do ones at lb separately - ?? masking - ? masking for weights
8301    */
8302   double zeroTolerance = model->zeroTolerance();
8303   double dualTolerance = model->currentDualTolerance();
8304   // we can't really trust infeasibilities if there is dual error
8305   // this coding has to mimic coding in checkDualSolution
8306   double error = CoinMin(1.0e-2, model->largestDualError());
8307   // allow tolerance at least slightly bigger than standard
8308   dualTolerance = dualTolerance + error;
8309   int numberOdd = block_->startIndices_;
8310   assert(scaleFactor);
8311 #if ABOCA_LITE
8312   //int nChunks=numberChunks_;
8313 #define ODD_INFO 2 * ABOCA_LITE
8314 #else
8315 #define ODD_INFO 1
8316   //int nChunks=1;
8317 #endif
8318   clpTempInfo info[ODD_INFO + 2] = {};
8319   unsigned char *status = model->statusArray();
8320   // fill in info
8321   for (int i = 0; i < ODD_INFO + 2; i++) {
8322     //memset(info+i,0,sizeof(clpTempInfo));
8323     info[i].tolerance = zeroTolerance;
8324     info[i].dualTolerance = dualTolerance;
8325     info[i].reducedCost = reducedCost;
8326     info[i].infeas = infeas;
8327     info[i].solution = weights;
8328     info[i].upper = reinterpret_cast< const double * >(reference);
8329     info[i].pivotVariable = reinterpret_cast< const int * >(block_);
8330     info[i].status = status;
8331     info[i].start = start_;
8332     info[i].row = row_;
8333     info[i].which = column_;
8334     info[i].element = element_;
8335     info[i].theta = scaleFactor;
8336     info[i].cost = pi;
8337     info[i].lower = piWeight;
8338     info[i].upperTheta = referenceIn;
8339     info[i].changeObj = devex;
8340     info[i].numberAdded = -1;
8341   }
8342   //assert (ODD_INFO==1);
8343   info[ODD_INFO].startColumn = 0;
8344   info[ODD_INFO].numberToDo = numberOdd;
8345   //
8346   double *temporary = temporary_->array();
8347 #if ABOCA_LITE
8348   for (int iBlock = 0; iBlock < numberChunks_; iBlock++) {
8349     info[iBlock].startColumn = endChunk_[iBlock];
8350     info[iBlock].numberToDo = endChunk_[iBlock + 1];
8351     info[iBlock].work = temporary + 2 * COIN_AVX2_CHUNK * iBlock;
8352   }
8353 #else
8354   info[0].startColumn = 0;
8355   info[0].numberToDo = numberBlocks_;
8356   info[0].work = temporary;
8357 #endif
8358   info[ODD_INFO + 1].startColumn = numberBlocks_;
8359   info[ODD_INFO + 1].numberToDo = numberBlocks_ + 1;
8360 #if ABOCA_LITE
8361   if (abcState() > 1) {
8362     cilk_spawn transposeTimes3Bit2Odd(info[ODD_INFO]);
8363     for (int iBlock = 0; iBlock < numberChunks_; iBlock++) {
8364       cilk_spawn transposeTimes3Bit2(info[iBlock]);
8365     }
8366     transposeTimes3BitSlacks(info[ODD_INFO + 1]);
8367     cilk_sync;
8368   } else {
8369 #endif
8370     transposeTimes3Bit2Odd(info[ODD_INFO]);
8371 #if ABOCA_LITE
8372     for (int iBlock = 0; iBlock < numberChunks_; iBlock++) {
8373       transposeTimes3Bit2(info[iBlock]);
8374     }
8375 #else
8376   transposeTimes3Bit2(info[0]);
8377 #endif
8378     transposeTimes3BitSlacks(info[ODD_INFO + 1]);
8379 #if ABOCA_LITE
8380   }
8381 #endif
8382   int sequenceIn = -1;
8383   double best = 0.0;
8384   for (int iBlock = 0; iBlock < ODD_INFO + 2; iBlock++) {
8385     if (info[iBlock].bestPossible > best) {
8386       best = info[iBlock].bestPossible;
8387       sequenceIn = info[iBlock].numberAdded;
8388     }
8389   }
8390   // Make sure sequenceOut not eligible
8391   int sequenceOut = model->sequenceOut();
8392   double saveOutDj;
8393   if (sequenceOut >= 0) {
8394     saveOutDj = reducedCost[sequenceOut];
8395     unsigned char thisStatus = status[sequenceOut] & 7;
8396     assert(thisStatus != 0 && thisStatus != 4);
8397     if (thisStatus != 2)
8398       reducedCost[sequenceOut] = COIN_DBL_MAX;
8399     else
8400       reducedCost[sequenceOut] = -COIN_DBL_MAX;
8401   }
8402   if (sequenceIn >= 0) {
8403     // check not flagged
8404     if (model->flagged(sequenceIn) || sequenceIn == sequenceOut) {
8405       int number = model->numberRows() + model->numberColumns();
8406       const unsigned char *COIN_RESTRICT status = model->statusArray();
8407       sequenceIn = -2;
8408       double bestRatio = 0.0;
8409       for (int iSequence = 0; iSequence < number; iSequence++) {
8410         unsigned char thisStatus = status[iSequence] & 7;
8411         double value = reducedCost[iSequence];
8412         if (thisStatus == 3) {
8413         } else if ((thisStatus & 1) != 0) {
8414           // basic or fixed
8415           value = 0.0;
8416         } else if (thisStatus == 2) {
8417           value = -value;
8418         } else {
8419           // free or superbasic
8420           if (fabs(value) > FREE_ACCEPT * -dualTolerance) {
8421             // we are going to bias towards free (but only if reasonable)
8422             value = -fabs(value) * FREE_BIAS;
8423           } else {
8424             value = 0.0;
8425           }
8426         }
8427         if (value < dualTolerance) {
8428           value *= value;
8429           if (value > bestRatio * weights[iSequence]) {
8430             if (!model->flagged(iSequence)) {
8431               sequenceIn = iSequence;
8432               bestRatio = value / weights[iSequence];
8433             }
8434           }
8435         }
8436       }
8437     }
8438   }
8439   if (sequenceOut >= 0)
8440     reducedCost[sequenceOut] = saveOutDj;
8441   model->spareIntArray_[3] = sequenceIn;
8442 }
8443 /*
8444   type - 1 redo infeasible, 2 choose sequenceIn, 3 both
8445   returns sequenceIn (or -1) for type 2
8446  */
8447 int ClpPackedMatrix3::redoInfeasibilities(const ClpSimplex *model,
8448   ClpPrimalColumnSteepest *pivotChoose,
8449   int type)
8450 {
8451   CoinIndexedVector *infeasible = pivotChoose->infeasible();
8452   // change to use blocks
8453   double tolerance = model->currentDualTolerance();
8454   // we can't really trust infeasibilities if there is dual error
8455   // this coding has to mimic coding in checkDualSolution
8456   double error = CoinMin(1.0e-2, model->largestDualError());
8457   // allow tolerance at least slightly bigger than standard
8458   tolerance = tolerance + error;
8459   // reverse sign so test is cleaner
8460   tolerance = -tolerance;
8461   int number = model->numberRows() + model->numberColumns();
8462 
8463   const double *COIN_RESTRICT reducedCost = model->djRegion();
8464   const unsigned char *COIN_RESTRICT status = model->statusArray();
8465   const double *COIN_RESTRICT weights = pivotChoose->weights();
8466   int sequenceIn = -1;
8467   double bestRatio = 0.0;
8468   switch (type) {
8469   case 1:
8470     infeasible->clear();
8471     for (int iSequence = 0; iSequence < number; iSequence++) {
8472       unsigned char thisStatus = status[iSequence] & 7;
8473       double value = reducedCost[iSequence];
8474       if (thisStatus == 3) {
8475       } else if ((thisStatus & 1) != 0) {
8476         // basic or fixed
8477         value = 0.0;
8478       } else if (thisStatus == 2) {
8479         value = -value;
8480       } else {
8481         // free or superbasic
8482         if (fabs(value) > FREE_ACCEPT * -tolerance) {
8483           // we are going to bias towards free (but only if reasonable)
8484           value = -fabs(value) * FREE_BIAS;
8485         } else {
8486           value = 0.0;
8487         }
8488       }
8489       if (value < tolerance) {
8490         // store square in list
8491         infeasible->quickAdd(iSequence, value * value);
8492       }
8493     }
8494     break;
8495   case 2:
8496     infeasible->clear(); // temp for debug
8497     for (int iSequence = 0; iSequence < number; iSequence++) {
8498       unsigned char thisStatus = status[iSequence] & 7;
8499       double value = reducedCost[iSequence];
8500       if (thisStatus == 3) {
8501       } else if ((thisStatus & 1) != 0) {
8502         // basic or fixed
8503         value = 0.0;
8504       } else if (thisStatus == 2) {
8505         value = -value;
8506       } else {
8507         // free or superbasic
8508         if (fabs(value) > FREE_ACCEPT * -tolerance) {
8509           // we are going to bias towards free (but only if reasonable)
8510           value = -fabs(value) * FREE_BIAS;
8511         } else {
8512           value = 0.0;
8513         }
8514       }
8515       if (value < tolerance) {
8516         value *= value;
8517         if (value > bestRatio * weights[iSequence]) {
8518           sequenceIn = iSequence;
8519           bestRatio = value / weights[iSequence];
8520         }
8521       }
8522     }
8523     break;
8524   case 3:
8525     infeasible->clear();
8526     for (int iSequence = 0; iSequence < number; iSequence++) {
8527       unsigned char thisStatus = status[iSequence] & 7;
8528       double value = reducedCost[iSequence];
8529       if (thisStatus == 3) {
8530       } else if ((thisStatus & 1) != 0) {
8531         // basic or fixed
8532         value = 0.0;
8533       } else if (thisStatus == 2) {
8534         value = -value;
8535       } else {
8536         // free or superbasic
8537         if (fabs(value) > FREE_ACCEPT * -tolerance) {
8538           // we are going to bias towards free (but only if reasonable)
8539           value = -fabs(value) * FREE_BIAS;
8540         } else {
8541           value = 0.0;
8542         }
8543       }
8544       if (value < tolerance) {
8545         value *= value;
8546         // store square in list
8547         infeasible->quickAdd(iSequence, value);
8548         if (value > bestRatio * weights[iSequence]) {
8549           sequenceIn = iSequence;
8550           bestRatio = value / weights[iSequence];
8551         }
8552       }
8553     }
8554     break;
8555   }
8556   if (sequenceIn >= 0) {
8557     // check not flagged
8558     if (model->flagged(sequenceIn)) {
8559       sequenceIn = -1;
8560       for (int iSequence = 0; iSequence < number; iSequence++) {
8561         unsigned char thisStatus = status[iSequence] & 7;
8562         double value = reducedCost[iSequence];
8563         if (thisStatus == 3) {
8564         } else if ((thisStatus & 1) != 0) {
8565           // basic or fixed
8566           value = 0.0;
8567         } else if (thisStatus == 2) {
8568           value = -value;
8569         } else {
8570           // free or superbasic
8571           if (fabs(value) > FREE_ACCEPT * -tolerance) {
8572             // we are going to bias towards free (but only if reasonable)
8573             value = -fabs(value) * FREE_BIAS;
8574           } else {
8575             value = 0.0;
8576           }
8577         }
8578         if (value < tolerance) {
8579           value *= value;
8580           if (value > bestRatio * weights[iSequence]) {
8581             if (!model->flagged(iSequence)) {
8582               sequenceIn = iSequence;
8583               bestRatio = value / weights[iSequence];
8584             }
8585           }
8586         }
8587       }
8588     }
8589   }
8590   return sequenceIn;
8591 }
8592 #if COIN_LONG_WORK
8593 // For long double versions
8594 void ClpPackedMatrix::times(CoinWorkDouble scalar,
8595   const CoinWorkDouble *x, CoinWorkDouble *y) const
8596 {
8597   int iRow, iColumn;
8598   // get matrix data pointers
8599   const int *row = matrix_->getIndices();
8600   const CoinBigIndex *columnStart = matrix_->getVectorStarts();
8601   const double *elementByColumn = matrix_->getElements();
8602   //memset(y,0,matrix_->getNumRows()*sizeof(double));
8603   assert(((flags_ & 2) != 0) == matrix_->hasGaps());
8604   if (!(flags_ & 2)) {
8605     for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
8606       CoinBigIndex j;
8607       CoinWorkDouble value = x[iColumn];
8608       if (value) {
8609         CoinBigIndex start = columnStart[iColumn];
8610         CoinBigIndex end = columnStart[iColumn + 1];
8611         value *= scalar;
8612         for (j = start; j < end; j++) {
8613           iRow = row[j];
8614           y[iRow] += value * elementByColumn[j];
8615         }
8616       }
8617     }
8618   } else {
8619     const int *columnLength = matrix_->getVectorLengths();
8620     for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
8621       CoinBigIndex j;
8622       CoinWorkDouble value = x[iColumn];
8623       if (value) {
8624         CoinBigIndex start = columnStart[iColumn];
8625         CoinBigIndex end = start + columnLength[iColumn];
8626         value *= scalar;
8627         for (j = start; j < end; j++) {
8628           iRow = row[j];
8629           y[iRow] += value * elementByColumn[j];
8630         }
8631       }
8632     }
8633   }
8634 }
8635 void ClpPackedMatrix::transposeTimes(CoinWorkDouble scalar,
8636   const CoinWorkDouble *x, CoinWorkDouble *y) const
8637 {
8638   int iColumn;
8639   // get matrix data pointers
8640   const int *row = matrix_->getIndices();
8641   const CoinBigIndex *columnStart = matrix_->getVectorStarts();
8642   const double *elementByColumn = matrix_->getElements();
8643   if (!(flags_ & 2)) {
8644     if (scalar == -1.0) {
8645       CoinBigIndex start = columnStart[0];
8646       for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
8647         CoinBigIndex j;
8648         CoinBigIndex next = columnStart[iColumn + 1];
8649         CoinWorkDouble value = y[iColumn];
8650         for (j = start; j < next; j++) {
8651           int jRow = row[j];
8652           value -= x[jRow] * elementByColumn[j];
8653         }
8654         start = next;
8655         y[iColumn] = value;
8656       }
8657     } else {
8658       CoinBigIndex start = columnStart[0];
8659       for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
8660         CoinBigIndex j;
8661         CoinBigIndex next = columnStart[iColumn + 1];
8662         CoinWorkDouble value = 0.0;
8663         for (j = start; j < next; j++) {
8664           int jRow = row[j];
8665           value += x[jRow] * elementByColumn[j];
8666         }
8667         start = next;
8668         y[iColumn] += value * scalar;
8669       }
8670     }
8671   } else {
8672     const int *columnLength = matrix_->getVectorLengths();
8673     for (iColumn = 0; iColumn < numberActiveColumns_; iColumn++) {
8674       CoinBigIndex j;
8675       CoinWorkDouble value = 0.0;
8676       CoinBigIndex start = columnStart[iColumn];
8677       CoinBigIndex end = start + columnLength[iColumn];
8678       for (j = start; j < end; j++) {
8679         int jRow = row[j];
8680         value += x[jRow] * elementByColumn[j];
8681       }
8682       y[iColumn] += value * scalar;
8683     }
8684   }
8685 }
8686 #endif
8687 #ifdef CLP_ALL_ONE_FILE
8688 #undef reference
8689 #endif
8690 #if 0
8691 #undef ABCSTATE_LITE
8692 #if ABOCA_LITE
8693 /* Meat of transposeTimes by column when not scaled and skipping
8694    and doing part of dualColumn */
8695 static void
8696 dualColumn00(clpTempInfo & info)
8697 {
8698   const int * COIN_RESTRICT which = info.which;
8699   const double * COIN_RESTRICT work = info.work;
8700   int * COIN_RESTRICT index = info.index;
8701   double * COIN_RESTRICT spare = info.spare;
8702   const unsigned char * COIN_RESTRICT status = info.status;
8703   const double * COIN_RESTRICT reducedCost = info.reducedCost;
8704   double upperTheta = info.upperTheta;
8705   double acceptablePivot = info.acceptablePivot;
8706   double dualTolerance = info.tolerance;
8707   int numberToDo=info.numberToDo;
8708   double tentativeTheta = 1.0e15;
8709   int numberRemaining = 0;
8710   double multiplier[] = { -1.0, 1.0};
8711   double dualT = - dualTolerance;
8712   for (int i = 0; i < numberToDo; i++) {
8713     int iSequence = which[i];
8714     int wanted = (status[iSequence] & 3) - 1;
8715     if (wanted) {
8716       double mult = multiplier[wanted-1];
8717       double alpha = work[i] * mult;
8718       if (alpha > 0.0) {
8719 	double oldValue = reducedCost[iSequence] * mult;
8720 	double value = oldValue - tentativeTheta * alpha;
8721 	if (value < dualT) {
8722 	  value = oldValue - upperTheta * alpha;
8723 	  if (value < dualT && alpha >= acceptablePivot) {
8724 	    upperTheta = (oldValue - dualT) / alpha;
8725 	  }
8726 	  // add to list
8727 	  spare[numberRemaining] = alpha * mult;
8728 	  index[numberRemaining++] = iSequence;
8729 	}
8730       }
8731     }
8732   }
8733   info.numberRemaining = numberRemaining;
8734   info.upperTheta = upperTheta;
8735 }
8736 #endif
8737 int
8738 ClpSimplexDual::dualColumn0(const CoinIndexedVector * rowArray,
8739                             const CoinIndexedVector * columnArray,
8740                             CoinIndexedVector * spareArray,
8741                             double acceptablePivot,
8742                             double & upperReturn, double &bestReturn, double & badFree)
8743 {
8744      // do first pass to get possibles
8745      double * spare = spareArray->denseVector();
8746      int * index = spareArray->getIndices();
8747      const double * work;
8748      int number;
8749      const int * which;
8750      const double * reducedCost;
8751      // We can also see if infeasible or pivoting on free
8752      double tentativeTheta = 1.0e15;
8753      double upperTheta = 1.0e31;
8754      double freePivot = acceptablePivot;
8755      double bestPossible = 0.0;
8756      int numberRemaining = 0;
8757      int i;
8758      badFree = 0.0;
8759      if ((moreSpecialOptions_ & 8) != 0) {
8760           // No free or super basic
8761        // bestPossible will re recomputed if necessary
8762        bestPossible=1.0;
8763        //#define COIN_AVX7
8764 #ifndef COIN_AVX2
8765           double multiplier[] = { -1.0, 1.0};
8766 #else
8767 	  double multiplier[4]={0.0,0.0,-1.0,1.0};
8768 #endif
8769           double dualT = - dualTolerance_;
8770 #if ABOCA_LITE == 0
8771 	  int nSections=2;
8772 #else
8773 	  int numberThreads=abcState();
8774 	  int nSections=numberThreads ? 1 : 2;
8775 #endif
8776           for (int iSection = 0; iSection < nSections; iSection++) {
8777 
8778                int addSequence;
8779                unsigned char * statusArray;
8780                if (!iSection) {
8781                     work = rowArray->denseVector();
8782                     number = rowArray->getNumElements();
8783                     which = rowArray->getIndices();
8784                     reducedCost = rowReducedCost_;
8785                     addSequence = numberColumns_;
8786                     statusArray = status_ + numberColumns_;
8787                } else {
8788                     work = columnArray->denseVector();
8789                     number = columnArray->getNumElements();
8790                     which = columnArray->getIndices();
8791                     reducedCost = reducedCostWork_;
8792                     addSequence = 0;
8793                     statusArray = status_;
8794                }
8795 #ifndef COIN_AVX2
8796                for (i = 0; i < number; i++) {
8797                     int iSequence = which[i];
8798                     double alpha;
8799                     double oldValue;
8800                     double value;
8801 
8802                     assert (getStatus(iSequence + addSequence) != isFree
8803                             && getStatus(iSequence + addSequence) != superBasic);
8804                     int iStatus = (statusArray[iSequence] & 3) - 1;
8805                     if (iStatus) {
8806                          double mult = multiplier[iStatus-1];
8807                          alpha = work[i] * mult;
8808                          if (alpha > 0.0) {
8809                               oldValue = reducedCost[iSequence] * mult;
8810                               value = oldValue - tentativeTheta * alpha;
8811                               if (value < dualT) {
8812                                    value = oldValue - upperTheta * alpha;
8813                                    if (value < dualT && alpha >= acceptablePivot) {
8814                                         upperTheta = (oldValue - dualT) / alpha;
8815                                         //tentativeTheta = CoinMin(2.0*upperTheta,tentativeTheta);
8816                                    }
8817                                    // add to list
8818                                    spare[numberRemaining] = alpha * mult;
8819                                    index[numberRemaining++] = iSequence + addSequence;
8820                               }
8821                          }
8822                     }
8823                }
8824 	       //
8825 #else
8826 	       //#define COIN_AVX2 4 // temp
8827 #if COIN_AVX2 == 1
8828 #define COIN_AVX2_SHIFT 0
8829 #elif COIN_AVX2 == 2
8830 #define COIN_AVX2_SHIFT 1
8831 #elif COIN_AVX2 == 4
8832 #define COIN_AVX2_SHIFT 2
8833 #elif COIN_AVX2 == 8
8834 #define COIN_AVX2_SHIFT 3
8835 #else
8836   error;
8837 #endif
8838   //#define COIN_ALIGN 8*COIN_AVX2 // later
8839   //#define COIN_ALIGN_DOUBLE COIN_AVX2
8840 #define CHECK_CHUNK 4
8841 	       // round up
8842 	       int * whichX = const_cast<int *>(which);
8843 	       int nBlocks = (number+CHECK_CHUNK-1)/CHECK_CHUNK;
8844 	       int n=nBlocks*CHECK_CHUNK+1;
8845 	       for (int i=number;i<n;i++)
8846 		 whichX[i]=0; // alpha will be zero so not chosen
8847 	       bool acceptableX[CHECK_CHUNK+1];
8848 	       double oldValueX[CHECK_CHUNK+1];
8849 	       double newValueX[CHECK_CHUNK+1];
8850 	       double alphaX[CHECK_CHUNK+1];
8851 	       newValueX[CHECK_CHUNK]=0.0;
8852 #define USE_USE_AVX
8853 	       //#define CHECK_H 1
8854 #ifdef USE_USE_AVX
8855 #define NEED_AVX
8856 #elif CHECK_H
8857 #define NEED_AVX
8858 #endif
8859 #ifdef NEED_AVX
8860 	       double mult2[CHECK_CHUNK];
8861 	       CoinInt64 acceptableY[CHECK_CHUNK];
8862 	       CoinInt64 goodDj[CHECK_CHUNK];
8863 	       double oldValueY[CHECK_CHUNK];
8864 	       double alphaY[CHECK_CHUNK+1];
8865 	       memset(acceptableY,0,sizeof(acceptableY));
8866 	       memset(goodDj,0,sizeof(goodDj));
8867 	       memset(oldValueY,0,sizeof(oldValueY));
8868 	       memset(alphaY,0,sizeof(alphaY));
8869 	       __m256d tentative2 = _mm256_set1_pd(-tentativeTheta);
8870 	       __m256d dualT2 = _mm256_set1_pd(dualT);
8871 	       __m256d acceptable2 = _mm256_set1_pd(acceptablePivot);
8872 #endif
8873 	       for (int iBlock=0;iBlock<nBlocks;iBlock++) {
8874 		 bool store=false;
8875 		 double alpha=0.0;
8876 		 double oldValue=0.0;
8877 		 double newValue=0.0;
8878 		 double trueAlpha=0.0;
8879 		 int jSequence=0;
8880 #ifndef USE_USE_AVX
8881 		 for (int i = 0; i < CHECK_CHUNK+1; i++) {
8882 		   int iSequence = which[i];
8883 		   int iStatus = (statusArray[iSequence] & 3);
8884 		   double mult = multiplier[iStatus];
8885 		   double newAlpha = work[i]*mult;
8886 		   double oldDj = reducedCost[iSequence]*mult;
8887 		   newValue = (oldDj - tentativeTheta * newAlpha)-dualT;
8888 		   acceptableX[i]=newAlpha>= acceptablePivot;
8889 		   oldValueX[i]=oldDj;
8890 		   newValueX[i]=newValue;
8891 		   alphaX[i]=newAlpha;
8892 		 }
8893 #endif
8894 #ifdef NEED_AVX
8895 		 __m128i columns = _mm_load_si128((const __m128i *)which);
8896 		 // what do we get - this must be wrong
8897 		 // probably only 1 and 2 - can we be clever
8898 		 // fix
8899 		 //__m128i status; // = _mm256_i32gather_ps(statusArray,columns,1);
8900 		 //status.m128i_i32[0]=statusArray[columns.m128i_i32[0]];
8901 		 for (int i=0;i<CHECK_CHUNK;i++) {
8902 		   int iSequence = which[i];
8903 		   int iStatus = (statusArray[iSequence] & 3);
8904 		   mult2[i] = multiplier[iStatus];
8905 		 }
8906 		 //__m256d newAlpha2 = _mm256_i32gather_pd(multiplier,status,1); // mult here
8907 		 __m256d newAlpha2 = _mm256_load_pd(mult2);
8908 		 __m256d oldDj = _mm256_i32gather_pd(reducedCost,columns,8);
8909 		 oldDj=_mm256_mul_pd(oldDj,newAlpha2); // remember newAlpha==mult
8910 		 _mm256_store_pd(oldValueY,oldDj); // redo later
8911 		 __m256d work2 = _mm256_load_pd(work);
8912 		 newAlpha2=_mm256_mul_pd(newAlpha2,work2); // now really newAlpha
8913 		 //__m256d newValue2 = _mm256_fmadd_pd(tentative2,newAlpha2,oldDj);
8914 		 oldDj=_mm256_fmadd_pd(newAlpha2,tentative2,oldDj);
8915 		 __v4df bitsDj=_mm256_cmp_pd(oldDj,dualT2,_CMP_LT_OS);
8916 		 __v4df bitsAcceptable = _mm256_cmp_pd(newAlpha2,acceptable2,_CMP_GE_OS);
8917 		 _mm256_store_pd(reinterpret_cast<double *>(goodDj),bitsDj);
8918 		 _mm256_store_pd(reinterpret_cast<double *>(acceptableY),bitsAcceptable);
8919 		 _mm256_store_pd(alphaY,newAlpha2);
8920 #ifndef USE_USE_AVX
8921 #undef NDEBUG
8922 		 for (int i = 0; i < CHECK_CHUNK; i++) {
8923 		   assert(newValueX[i]>0.0==(goodDj[i]));
8924 		   //assert(acceptableX[i]==(acceptableY[i]));
8925 		   assert(oldValueX[i]==oldValueY[i]);
8926 		   assert(alphaX[i]==alphaY[i]);
8927 		 }
8928 		 for (int i = 0; i < CHECK_CHUNK; i++) {
8929 		   bool g1=newValueX[i]<0.0;
8930 		   bool g2=goodDj[i]!=0;
8931 		   if(g1!=g2)abort();
8932 		   //if(acceptableX[i]!=(acceptableY[i]))abort();
8933 		   if(fabs(oldValueX[i]-oldValueY[i])>1.0e-5+
8934 		      +(1.0e-10*fabs(oldValueX[i])))abort();
8935 		   if(alphaX[i]!=alphaY[i])abort();
8936 		 }
8937 #endif
8938 #endif
8939 		 for (int i = 0; i < CHECK_CHUNK+1; i++) {
8940 #ifndef USE_USE_AVX
8941 		   double newValue=newValueX[i];
8942 		   bool newStore = newValue < 0.0;
8943 		   if (store) {
8944 		     // add to list
8945 		     bool acceptable = acceptableX[i-1];
8946 		     spare[numberRemaining] = work[i-1];
8947 		     index[numberRemaining++] = which[i-1] + addSequence;
8948 		     double value = oldValueX[i-1] - upperTheta * alphaX[i-1];
8949 		     if (value < dualT && acceptable) {
8950 		       upperTheta = (oldValueX[i-1] - dualT) / alphaX[i-1];
8951 		     }
8952 		   }
8953 #else
8954 		   bool newStore = goodDj[i]!=0;
8955 		   if (store) {
8956 		     // add to list
8957 		     bool acceptable = acceptableY[i-1];
8958 		     spare[numberRemaining] = work[i-1];
8959 		     index[numberRemaining++] = which[i-1] + addSequence;
8960 		     double value = oldValueY[i-1] - upperTheta * alphaY[i-1];
8961 		     if (value < dualT && acceptable) {
8962 		       upperTheta = (oldValueY[i-1] - dualT) / alphaY[i-1];
8963 		     }
8964 		   }
8965 #endif
8966 		   store=newStore;
8967 		 }
8968 		 which += CHECK_CHUNK;
8969 		 work += CHECK_CHUNK;
8970 	       }
8971 #endif
8972           }
8973 #if ABOCA_LITE
8974 	  if (numberThreads) {
8975 	  work = columnArray->denseVector();
8976 	  number = columnArray->getNumElements();
8977 	  which = columnArray->getIndices();
8978 	  reducedCost = reducedCostWork_;
8979 	  unsigned char * statusArray = status_;
8980 
8981 	  clpTempInfo info[ABOCA_LITE];
8982 	  int chunk = (number+numberThreads-1)/numberThreads;
8983 	  int n=0;
8984 	  int nR=numberRemaining;
8985 	  for (int i=0;i<numberThreads;i++) {
8986 	    info[i].which=const_cast<int *>(which+n);
8987 	    info[i].work=const_cast<double *>(work+n);
8988 	    info[i].numberToDo=CoinMin(chunk,number-n);
8989 	    n += chunk;
8990 	    info[i].index = index+nR;
8991 	    info[i].spare = spare+nR;
8992 	    nR += chunk;
8993 	    info[i].reducedCost = const_cast<double *>(reducedCost);
8994 	    info[i].upperTheta = upperTheta;
8995 	    info[i].acceptablePivot = acceptablePivot;
8996 	    info[i].status = statusArray;
8997 	    info[i].tolerance=dualTolerance_;
8998 	  }
8999 	  for (int i=0;i<numberThreads;i++) {
9000 	    cilk_spawn dualColumn00(info[i]);
9001 	  }
9002 	  cilk_sync;
9003 	  moveAndZero(info,1,NULL);
9004 	  for (int i=0;i<numberThreads;i++) {
9005 	    numberRemaining += info[i].numberRemaining;
9006 	    upperTheta = CoinMin(upperTheta,static_cast<double>(info[i].upperTheta));
9007 	  }
9008 	  }
9009 #endif
9010      } else {
9011           // some free or super basic
9012           for (int iSection = 0; iSection < 2; iSection++) {
9013 
9014                int addSequence;
9015 
9016                if (!iSection) {
9017                     work = rowArray->denseVector();
9018                     number = rowArray->getNumElements();
9019                     which = rowArray->getIndices();
9020                     reducedCost = rowReducedCost_;
9021                     addSequence = numberColumns_;
9022                } else {
9023                     work = columnArray->denseVector();
9024                     number = columnArray->getNumElements();
9025                     which = columnArray->getIndices();
9026                     reducedCost = reducedCostWork_;
9027                     addSequence = 0;
9028                }
9029 
9030                for (i = 0; i < number; i++) {
9031                     int iSequence = which[i];
9032                     double alpha;
9033                     double oldValue;
9034                     double value;
9035                     bool keep;
9036 
9037                     switch(getStatus(iSequence + addSequence)) {
9038 
9039                     case basic:
9040                     case ClpSimplex::isFixed:
9041                          break;
9042                     case isFree:
9043                     case superBasic:
9044                          alpha = work[i];
9045                          bestPossible = CoinMax(bestPossible, fabs(alpha));
9046                          oldValue = reducedCost[iSequence];
9047                          // If free has to be very large - should come in via dualRow
9048                          //if (getStatus(iSequence+addSequence)==isFree&&fabs(alpha)<1.0e-3)
9049                          //break;
9050                          if (oldValue > dualTolerance_) {
9051                               keep = true;
9052                          } else if (oldValue < -dualTolerance_) {
9053                               keep = true;
9054                          } else {
9055                               if (fabs(alpha) > CoinMax(10.0 * acceptablePivot, 1.0e-5)) {
9056                                    keep = true;
9057                               } else {
9058                                    keep = false;
9059                                    badFree = CoinMax(badFree, fabs(alpha));
9060                               }
9061                          }
9062                          if (keep) {
9063                               // free - choose largest
9064                               if (fabs(alpha) > freePivot) {
9065                                    freePivot = fabs(alpha);
9066                                    sequenceIn_ = iSequence + addSequence;
9067                                    theta_ = oldValue / alpha;
9068                                    alpha_ = alpha;
9069                               }
9070 			      // give fake bounds if possible
9071 			      int jSequence=iSequence+addSequence;
9072 			      if (2.0*fabs(solution_[jSequence])<
9073 				  dualBound_) {
9074 				FakeBound bound = getFakeBound(jSequence);
9075 				assert (bound == ClpSimplexDual::noFake);
9076 				setFakeBound(jSequence,ClpSimplexDual::bothFake);
9077 				numberFake_++;
9078 				value = oldValue - tentativeTheta * alpha;
9079 				if (value > dualTolerance_) {
9080 				  // pretend coming in from upper bound
9081 				  upper_[jSequence] = solution_[jSequence];
9082 				  lower_[jSequence] = upper_[jSequence] - dualBound_;
9083 				  setColumnStatus(jSequence,ClpSimplex::atUpperBound);
9084 				} else {
9085 				  // pretend coming in from lower bound
9086 				  lower_[jSequence] = solution_[jSequence];
9087 				  upper_[jSequence] = lower_[jSequence] + dualBound_;
9088 				  setColumnStatus(jSequence,ClpSimplex::atLowerBound);
9089 				}
9090 			      }
9091                          }
9092                          break;
9093                     case atUpperBound:
9094                          alpha = work[i];
9095                          oldValue = reducedCost[iSequence];
9096                          value = oldValue - tentativeTheta * alpha;
9097                          //assert (oldValue<=dualTolerance_*1.0001);
9098                          if (value > dualTolerance_) {
9099                               bestPossible = CoinMax(bestPossible, -alpha);
9100                               value = oldValue - upperTheta * alpha;
9101                               if (value > dualTolerance_ && -alpha >= acceptablePivot) {
9102                                    upperTheta = (oldValue - dualTolerance_) / alpha;
9103                                    //tentativeTheta = CoinMin(2.0*upperTheta,tentativeTheta);
9104                               }
9105                               // add to list
9106                               spare[numberRemaining] = alpha;
9107                               index[numberRemaining++] = iSequence + addSequence;
9108                          }
9109                          break;
9110                     case atLowerBound:
9111                          alpha = work[i];
9112                          oldValue = reducedCost[iSequence];
9113                          value = oldValue - tentativeTheta * alpha;
9114                          //assert (oldValue>=-dualTolerance_*1.0001);
9115                          if (value < -dualTolerance_) {
9116                               bestPossible = CoinMax(bestPossible, alpha);
9117                               value = oldValue - upperTheta * alpha;
9118                               if (value < -dualTolerance_ && alpha >= acceptablePivot) {
9119                                    upperTheta = (oldValue + dualTolerance_) / alpha;
9120                                    //tentativeTheta = CoinMin(2.0*upperTheta,tentativeTheta);
9121                               }
9122                               // add to list
9123                               spare[numberRemaining] = alpha;
9124                               index[numberRemaining++] = iSequence + addSequence;
9125                          }
9126                          break;
9127                     }
9128                }
9129           }
9130      }
9131      upperReturn = upperTheta;
9132      bestReturn = bestPossible;
9133      return numberRemaining;
9134 }
9135 #endif
9136 
9137 /* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
9138 */
9139