1 /* $Id: CoinAbcHelperFunctions.cpp 2385 2019-01-06 19:43:06Z unxusr $ */
2 // Copyright (C) 2003, International Business Machines
3 // Corporation and others, Copyright (C) 2012, FasterCoin.  All Rights Reserved.
4 // This code is licensed under the terms of the Eclipse Public License (EPL).
5 
6 #include <cfloat>
7 #include <cstdlib>
8 #include <cmath>
9 #include "CoinHelperFunctions.hpp"
10 #include "CoinAbcHelperFunctions.hpp"
11 #include "CoinTypes.hpp"
12 #include "CoinFinite.hpp"
13 #include "CoinAbcCommon.hpp"
14 //#define AVX2 1
15 #if AVX2 == 1
16 #define set_const_v2df(bb, b)                         \
17   {                                                   \
18     double *temp = reinterpret_cast< double * >(&bb); \
19     temp[0] = b;                                      \
20     temp[1] = b;                                      \
21   }
22 #endif
23 #if INLINE_SCATTER == 0
CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)24 void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
25   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
26   const int *COIN_RESTRICT thisIndex,
27   CoinFactorizationDouble *COIN_RESTRICT region)
28 {
29 #if UNROLL_SCATTER == 0
30   for (CoinBigIndex j = number - 1; j >= 0; j--) {
31     CoinSimplexInt iRow = thisIndex[j];
32     CoinFactorizationDouble regionValue = region[iRow];
33     CoinFactorizationDouble value = thisElement[j];
34     assert(value);
35     region[iRow] = regionValue - value * pivotValue;
36   }
37 #elif UNROLL_SCATTER == 1
38   if ((number & 1) != 0) {
39     number--;
40     CoinSimplexInt iRow = thisIndex[number];
41     CoinFactorizationDouble regionValue = region[iRow];
42     CoinFactorizationDouble value = thisElement[number];
43     region[iRow] = regionValue - value * pivotValue;
44   }
45   for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
46     CoinSimplexInt iRow0 = thisIndex[j];
47     CoinSimplexInt iRow1 = thisIndex[j - 1];
48     CoinFactorizationDouble regionValue0 = region[iRow0];
49     CoinFactorizationDouble regionValue1 = region[iRow1];
50     region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
51     region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
52   }
53 #elif UNROLL_SCATTER == 2
54   if ((number & 1) != 0) {
55     number--;
56     CoinSimplexInt iRow = thisIndex[number];
57     CoinFactorizationDouble regionValue = region[iRow];
58     CoinFactorizationDouble value = thisElement[number];
59     region[iRow] = regionValue - value * pivotValue;
60   }
61   if ((number & 2) != 0) {
62     CoinSimplexInt iRow0 = thisIndex[number - 1];
63     CoinFactorizationDouble regionValue0 = region[iRow0];
64     CoinFactorizationDouble value0 = thisElement[number - 1];
65     CoinSimplexInt iRow1 = thisIndex[number - 2];
66     CoinFactorizationDouble regionValue1 = region[iRow1];
67     CoinFactorizationDouble value1 = thisElement[number - 2];
68     region[iRow0] = regionValue0 - value0 * pivotValue;
69     region[iRow1] = regionValue1 - value1 * pivotValue;
70     number -= 2;
71   }
72   for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
73     CoinSimplexInt iRow0 = thisIndex[j];
74     CoinSimplexInt iRow1 = thisIndex[j - 1];
75     CoinFactorizationDouble regionValue0 = region[iRow0];
76     CoinFactorizationDouble regionValue1 = region[iRow1];
77     region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
78     region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
79     CoinSimplexInt iRow2 = thisIndex[j - 2];
80     CoinSimplexInt iRow3 = thisIndex[j - 3];
81     CoinFactorizationDouble regionValue2 = region[iRow2];
82     CoinFactorizationDouble regionValue3 = region[iRow3];
83     region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
84     region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
85   }
86 #elif UNROLL_SCATTER == 3
87   CoinSimplexInt iRow0;
88   CoinSimplexInt iRow1;
89   CoinFactorizationDouble regionValue0;
90   CoinFactorizationDouble regionValue1;
91   switch (static_cast< unsigned int >(number)) {
92   case 0:
93     break;
94   case 1:
95     iRow0 = thisIndex[0];
96     regionValue0 = region[iRow0];
97     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
98     break;
99   case 2:
100     iRow0 = thisIndex[0];
101     iRow1 = thisIndex[1];
102     regionValue0 = region[iRow0];
103     regionValue1 = region[iRow1];
104     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
105     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
106     break;
107   case 3:
108     iRow0 = thisIndex[0];
109     iRow1 = thisIndex[1];
110     regionValue0 = region[iRow0];
111     regionValue1 = region[iRow1];
112     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
113     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
114     iRow0 = thisIndex[2];
115     regionValue0 = region[iRow0];
116     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
117     break;
118   case 4:
119     iRow0 = thisIndex[0];
120     iRow1 = thisIndex[1];
121     regionValue0 = region[iRow0];
122     regionValue1 = region[iRow1];
123     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
124     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
125     iRow0 = thisIndex[2];
126     iRow1 = thisIndex[3];
127     regionValue0 = region[iRow0];
128     regionValue1 = region[iRow1];
129     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
130     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
131     break;
132   case 5:
133     iRow0 = thisIndex[0];
134     iRow1 = thisIndex[1];
135     regionValue0 = region[iRow0];
136     regionValue1 = region[iRow1];
137     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
138     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
139     iRow0 = thisIndex[2];
140     iRow1 = thisIndex[3];
141     regionValue0 = region[iRow0];
142     regionValue1 = region[iRow1];
143     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
144     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
145     iRow0 = thisIndex[4];
146     regionValue0 = region[iRow0];
147     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
148     break;
149   case 6:
150     iRow0 = thisIndex[0];
151     iRow1 = thisIndex[1];
152     regionValue0 = region[iRow0];
153     regionValue1 = region[iRow1];
154     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
155     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
156     iRow0 = thisIndex[2];
157     iRow1 = thisIndex[3];
158     regionValue0 = region[iRow0];
159     regionValue1 = region[iRow1];
160     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
161     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
162     iRow0 = thisIndex[4];
163     iRow1 = thisIndex[5];
164     regionValue0 = region[iRow0];
165     regionValue1 = region[iRow1];
166     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
167     region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
168     break;
169   case 7:
170     iRow0 = thisIndex[0];
171     iRow1 = thisIndex[1];
172     regionValue0 = region[iRow0];
173     regionValue1 = region[iRow1];
174     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
175     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
176     iRow0 = thisIndex[2];
177     iRow1 = thisIndex[3];
178     regionValue0 = region[iRow0];
179     regionValue1 = region[iRow1];
180     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
181     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
182     iRow0 = thisIndex[4];
183     iRow1 = thisIndex[5];
184     regionValue0 = region[iRow0];
185     regionValue1 = region[iRow1];
186     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
187     region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
188     iRow0 = thisIndex[6];
189     regionValue0 = region[iRow0];
190     region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
191     break;
192   case 8:
193     iRow0 = thisIndex[0];
194     iRow1 = thisIndex[1];
195     regionValue0 = region[iRow0];
196     regionValue1 = region[iRow1];
197     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
198     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
199     iRow0 = thisIndex[2];
200     iRow1 = thisIndex[3];
201     regionValue0 = region[iRow0];
202     regionValue1 = region[iRow1];
203     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
204     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
205     iRow0 = thisIndex[4];
206     iRow1 = thisIndex[5];
207     regionValue0 = region[iRow0];
208     regionValue1 = region[iRow1];
209     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
210     region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
211     iRow0 = thisIndex[6];
212     iRow1 = thisIndex[7];
213     regionValue0 = region[iRow0];
214     regionValue1 = region[iRow1];
215     region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
216     region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
217     break;
218   default:
219     if ((number & 1) != 0) {
220       number--;
221       CoinSimplexInt iRow = thisIndex[number];
222       CoinFactorizationDouble regionValue = region[iRow];
223       CoinFactorizationDouble value = thisElement[number];
224       region[iRow] = regionValue - value * pivotValue;
225     }
226     for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
227       CoinSimplexInt iRow0 = thisIndex[j];
228       CoinSimplexInt iRow1 = thisIndex[j - 1];
229       CoinFactorizationDouble regionValue0 = region[iRow0];
230       CoinFactorizationDouble regionValue1 = region[iRow1];
231       region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
232       region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
233     }
234     break;
235   }
236 #endif
237 }
238 #endif
239 #if INLINE_GATHER == 0
240 CoinFactorizationDouble
CoinAbcGatherUpdate(CoinSimplexInt number,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)241 CoinAbcGatherUpdate(CoinSimplexInt number,
242   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
243   const int *COIN_RESTRICT thisIndex,
244   CoinFactorizationDouble *COIN_RESTRICT region)
245 {
246 #if UNROLL_GATHER == 0
247   CoinFactorizationDouble pivotValue = 0.0;
248   for (CoinBigIndex j = 0; j < number; j++) {
249     CoinFactorizationDouble value = thisElement[j];
250     CoinSimplexInt jRow = thisIndex[j];
251     value *= region[jRow];
252     pivotValue -= value;
253   }
254   return pivotValue;
255 #else
256 #error code
257 #endif
258 }
259 #endif
260 #if INLINE_MULTIPLY_ADD == 0
CoinAbcMultiplyIndexed(int number,const CoinFactorizationDouble * COIN_RESTRICT multiplier,const int * COIN_RESTRICT thisIndex,CoinSimplexDouble * COIN_RESTRICT region)261 void CoinAbcMultiplyIndexed(int number,
262   const CoinFactorizationDouble *COIN_RESTRICT multiplier,
263   const int *COIN_RESTRICT thisIndex,
264   CoinSimplexDouble *COIN_RESTRICT region)
265 {
266 #ifndef INTEL_COMPILER
267 // was #pragma simd
268 #endif
269 #if UNROLL_MULTIPLY_ADD == 0
270   for (CoinSimplexInt i = 0; i < number; i++) {
271     CoinSimplexInt iRow = thisIndex[i];
272     CoinSimplexDouble value = region[iRow];
273     value *= multiplier[iRow];
274     region[iRow] = value;
275   }
276 #else
277 #error code
278 #endif
279 }
CoinAbcMultiplyIndexed(int number,const long double * COIN_RESTRICT multiplier,const int * COIN_RESTRICT thisIndex,long double * COIN_RESTRICT region)280 void CoinAbcMultiplyIndexed(int number,
281   const long double *COIN_RESTRICT multiplier,
282   const int *COIN_RESTRICT thisIndex,
283   long double *COIN_RESTRICT region)
284 {
285 #ifndef INTEL_COMPILER
286 // was #pragma simd
287 #endif
288 #if UNROLL_MULTIPLY_ADD == 0
289   for (CoinSimplexInt i = 0; i < number; i++) {
290     CoinSimplexInt iRow = thisIndex[i];
291     long double value = region[iRow];
292     value *= multiplier[iRow];
293     region[iRow] = value;
294   }
295 #else
296 #error code
297 #endif
298 }
299 #endif
300 double
CoinAbcMaximumAbsElement(const double * region,int sizeIn)301 CoinAbcMaximumAbsElement(const double *region, int sizeIn)
302 {
303   int size = sizeIn;
304   double maxValue = 0.0;
305   //printf("a\n");
306 #ifndef INTEL_COMPILER
307 // was #pragma simd
308 #endif
309   /*cilk_*/ for (int i = 0; i < size; i++)
310     maxValue = CoinMax(maxValue, fabs(region[i]));
311   return maxValue;
312 }
CoinAbcMinMaxAbsElement(const double * region,int sizeIn,double & minimum,double & maximum)313 void CoinAbcMinMaxAbsElement(const double *region, int sizeIn, double &minimum, double &maximum)
314 {
315   double minValue = minimum;
316   double maxValue = maximum;
317   int size = sizeIn;
318   //printf("b\n");
319 #ifndef INTEL_COMPILER
320 // was #pragma simd
321 #endif
322   /*cilk_*/ for (int i = 0; i < size; i++) {
323     double value = fabs(region[i]);
324     minValue = CoinMin(value, minValue);
325     maxValue = CoinMax(value, maxValue);
326   }
327   minimum = minValue;
328   maximum = maxValue;
329 }
CoinAbcMinMaxAbsNormalValues(const double * region,int sizeIn,double & minimum,double & maximum)330 void CoinAbcMinMaxAbsNormalValues(const double *region, int sizeIn, double &minimum, double &maximum)
331 {
332   int size = sizeIn;
333   double minValue = minimum;
334   double maxValue = maximum;
335 #define NORMAL_LOW_VALUE 1.0e-8
336 #define NORMAL_HIGH_VALUE 1.0e8
337   //printf("c\n");
338 #ifndef INTEL_COMPILER
339 // was #pragma simd
340 #endif
341   /*cilk_*/ for (int i = 0; i < size; i++) {
342     double value = fabs(region[i]);
343     if (value > NORMAL_LOW_VALUE && value < NORMAL_HIGH_VALUE) {
344       minValue = CoinMin(value, minValue);
345       maxValue = CoinMax(value, maxValue);
346     }
347   }
348   minimum = minValue;
349   maximum = maxValue;
350 }
CoinAbcScale(double * region,double multiplier,int sizeIn)351 void CoinAbcScale(double *region, double multiplier, int sizeIn)
352 {
353   int size = sizeIn;
354   // used printf("d\n");
355 #ifndef INTEL_COMPILER
356 // was #pragma simd
357 #endif
358 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
359   cilk_for(int i = 0; i < size; i++)
360   {
361     region[i] *= multiplier;
362   }
363 }
CoinAbcScaleNormalValues(double * region,double multiplier,double killIfLessThanThis,int sizeIn)364 void CoinAbcScaleNormalValues(double *region, double multiplier, double killIfLessThanThis, int sizeIn)
365 {
366   int size = sizeIn;
367   // used printf("e\n");
368 #ifndef INTEL_COMPILER
369 // was #pragma simd
370 #endif
371 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
372   cilk_for(int i = 0; i < size; i++)
373   {
374     double value = fabs(region[i]);
375     if (value > killIfLessThanThis) {
376       if (value != COIN_DBL_MAX) {
377         value *= multiplier;
378         if (value > killIfLessThanThis)
379           region[i] *= multiplier;
380         else
381           region[i] = 0.0;
382       }
383     } else {
384       region[i] = 0.0;
385     }
386   }
387 }
388 // maximum fabs(region[i]) and then region[i]*=multiplier
389 double
CoinAbcMaximumAbsElementAndScale(double * region,double multiplier,int sizeIn)390 CoinAbcMaximumAbsElementAndScale(double *region, double multiplier, int sizeIn)
391 {
392   double maxValue = 0.0;
393   int size = sizeIn;
394   //printf("f\n");
395 #ifndef INTEL_COMPILER
396 // was #pragma simd
397 #endif
398   /*cilk_*/ for (int i = 0; i < size; i++) {
399     double value = fabs(region[i]);
400     region[i] *= multiplier;
401     maxValue = CoinMax(value, maxValue);
402   }
403   return maxValue;
404 }
CoinAbcSetElements(double * region,int sizeIn,double value)405 void CoinAbcSetElements(double *region, int sizeIn, double value)
406 {
407   int size = sizeIn;
408   printf("g\n");
409 #ifndef INTEL_COMPILER
410 // was #pragma simd
411 #endif
412 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
413   cilk_for(int i = 0; i < size; i++)
414     region[i]
415     = value;
416 }
CoinAbcMultiplyAdd(const double * region1,int sizeIn,double multiplier1,double * regionChanged,double multiplier2)417 void CoinAbcMultiplyAdd(const double *region1, int sizeIn, double multiplier1,
418   double *regionChanged, double multiplier2)
419 {
420   //printf("h\n");
421   int size = sizeIn;
422   if (multiplier1 == 1.0) {
423     if (multiplier2 == 1.0) {
424 #ifndef INTEL_COMPILER
425 // was #pragma simd
426 #endif
427       /*cilk_*/ for (int i = 0; i < size; i++)
428         regionChanged[i] = region1[i] + regionChanged[i];
429     } else if (multiplier2 == -1.0) {
430 #ifndef INTEL_COMPILER
431 // was #pragma simd
432 #endif
433       /*cilk_*/ for (int i = 0; i < size; i++)
434         regionChanged[i] = region1[i] - regionChanged[i];
435     } else if (multiplier2 == 0.0) {
436 #ifndef INTEL_COMPILER
437 // was #pragma simd
438 #endif
439       /*cilk_*/ for (int i = 0; i < size; i++)
440         regionChanged[i] = region1[i];
441     } else {
442 #ifndef INTEL_COMPILER
443 // was #pragma simd
444 #endif
445       /*cilk_*/ for (int i = 0; i < size; i++)
446         regionChanged[i] = region1[i] + multiplier2 * regionChanged[i];
447     }
448   } else if (multiplier1 == -1.0) {
449     if (multiplier2 == 1.0) {
450 #ifndef INTEL_COMPILER
451 // was #pragma simd
452 #endif
453       /*cilk_*/ for (int i = 0; i < size; i++)
454         regionChanged[i] = -region1[i] + regionChanged[i];
455     } else if (multiplier2 == -1.0) {
456 #ifndef INTEL_COMPILER
457 // was #pragma simd
458 #endif
459       /*cilk_*/ for (int i = 0; i < size; i++)
460         regionChanged[i] = -region1[i] - regionChanged[i];
461     } else if (multiplier2 == 0.0) {
462 #ifndef INTEL_COMPILER
463 // was #pragma simd
464 #endif
465       /*cilk_*/ for (int i = 0; i < size; i++)
466         regionChanged[i] = -region1[i];
467     } else {
468 #ifndef INTEL_COMPILER
469 // was #pragma simd
470 #endif
471       /*cilk_*/ for (int i = 0; i < size; i++)
472         regionChanged[i] = -region1[i] + multiplier2 * regionChanged[i];
473     }
474   } else if (multiplier1 == 0.0) {
475     if (multiplier2 == 1.0) {
476       // nothing to do
477     } else if (multiplier2 == -1.0) {
478 #ifndef INTEL_COMPILER
479 // was #pragma simd
480 #endif
481       /*cilk_*/ for (int i = 0; i < size; i++)
482         regionChanged[i] = -regionChanged[i];
483     } else if (multiplier2 == 0.0) {
484 #ifndef INTEL_COMPILER
485 // was #pragma simd
486 #endif
487       /*cilk_*/ for (int i = 0; i < size; i++)
488         regionChanged[i] = 0.0;
489     } else {
490 #ifndef INTEL_COMPILER
491 // was #pragma simd
492 #endif
493       /*cilk_*/ for (int i = 0; i < size; i++)
494         regionChanged[i] = multiplier2 * regionChanged[i];
495     }
496   } else {
497     if (multiplier2 == 1.0) {
498 #ifndef INTEL_COMPILER
499 // was #pragma simd
500 #endif
501       /*cilk_*/ for (int i = 0; i < size; i++)
502         regionChanged[i] = multiplier1 * region1[i] + regionChanged[i];
503     } else if (multiplier2 == -1.0) {
504 #ifndef INTEL_COMPILER
505 // was #pragma simd
506 #endif
507       /*cilk_*/ for (int i = 0; i < size; i++)
508         regionChanged[i] = multiplier1 * region1[i] - regionChanged[i];
509     } else if (multiplier2 == 0.0) {
510 #ifndef INTEL_COMPILER
511 // was #pragma simd
512 #endif
513       /*cilk_*/ for (int i = 0; i < size; i++)
514         regionChanged[i] = multiplier1 * region1[i];
515     } else {
516 #ifndef INTEL_COMPILER
517 // was #pragma simd
518 #endif
519       /*cilk_*/ for (int i = 0; i < size; i++)
520         regionChanged[i] = multiplier1 * region1[i] + multiplier2 * regionChanged[i];
521     }
522   }
523 }
524 double
CoinAbcInnerProduct(const double * region1,int sizeIn,const double * region2)525 CoinAbcInnerProduct(const double *region1, int sizeIn, const double *region2)
526 {
527   int size = sizeIn;
528   //printf("i\n");
529   double value = 0.0;
530 #ifndef INTEL_COMPILER
531 // was #pragma simd
532 #endif
533   /*cilk_*/ for (int i = 0; i < size; i++)
534     value += region1[i] * region2[i];
535   return value;
536 }
CoinAbcGetNorms(const double * region,int sizeIn,double & norm1,double & norm2)537 void CoinAbcGetNorms(const double *region, int sizeIn, double &norm1, double &norm2)
538 {
539   int size = sizeIn;
540   //printf("j\n");
541   norm1 = 0.0;
542   norm2 = 0.0;
543 #ifndef INTEL_COMPILER
544 // was #pragma simd
545 #endif
546   /*cilk_*/ for (int i = 0; i < size; i++) {
547     norm2 += region[i] * region[i];
548     norm1 = CoinMax(norm1, fabs(region[i]));
549   }
550 }
551 // regionTo[index[i]]=regionFrom[i]
CoinAbcScatterTo(const double * regionFrom,double * regionTo,const int * index,int numberIn)552 void CoinAbcScatterTo(const double *regionFrom, double *regionTo, const int *index, int numberIn)
553 {
554   int number = numberIn;
555   // used printf("k\n");
556 #ifndef INTEL_COMPILER
557 // was #pragma simd
558 #endif
559 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
560   cilk_for(int i = 0; i < number; i++)
561   {
562     int k = index[i];
563     regionTo[k] = regionFrom[i];
564   }
565 }
566 // regionTo[i]=regionFrom[index[i]]
CoinAbcGatherFrom(const double * regionFrom,double * regionTo,const int * index,int numberIn)567 void CoinAbcGatherFrom(const double *regionFrom, double *regionTo, const int *index, int numberIn)
568 {
569   int number = numberIn;
570   // used printf("l\n");
571 #ifndef INTEL_COMPILER
572 // was #pragma simd
573 #endif
574 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
575   cilk_for(int i = 0; i < number; i++)
576   {
577     int k = index[i];
578     regionTo[i] = regionFrom[k];
579   }
580 }
581 // regionTo[index[i]]=0.0
CoinAbcScatterZeroTo(double * regionTo,const int * index,int numberIn)582 void CoinAbcScatterZeroTo(double *regionTo, const int *index, int numberIn)
583 {
584   int number = numberIn;
585   // used printf("m\n");
586 #ifndef INTEL_COMPILER
587 // was #pragma simd
588 #endif
589 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
590   cilk_for(int i = 0; i < number; i++)
591   {
592     int k = index[i];
593     regionTo[k] = 0.0;
594   }
595 }
596 // regionTo[indexScatter[indexList[i]]]=regionFrom[indexList[i]]
CoinAbcScatterToList(const double * regionFrom,double * regionTo,const int * indexList,const int * indexScatter,int numberIn)597 void CoinAbcScatterToList(const double *regionFrom, double *regionTo,
598   const int *indexList, const int *indexScatter, int numberIn)
599 {
600   int number = numberIn;
601   //printf("n\n");
602 #ifndef INTEL_COMPILER
603 // was #pragma simd
604 #endif
605   /*cilk_*/ for (int i = 0; i < number; i++) {
606     int j = indexList[i];
607     double value = regionFrom[j];
608     int k = indexScatter[j];
609     regionTo[k] = value;
610   }
611 }
CoinAbcInverseSqrts(double * array,int nIn)612 void CoinAbcInverseSqrts(double *array, int nIn)
613 {
614   int n = nIn;
615   // used printf("o\n");
616 #ifndef INTEL_COMPILER
617 // was #pragma simd
618 #endif
619 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
620   cilk_for(int i = 0; i < n; i++)
621     array[i]
622     = 1.0 / sqrt(array[i]);
623 }
CoinAbcReciprocal(double * array,int nIn,const double * input)624 void CoinAbcReciprocal(double *array, int nIn, const double *input)
625 {
626   int n = nIn;
627   // used printf("p\n");
628 #ifndef INTEL_COMPILER
629 // was #pragma simd
630 #endif
631 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
632   cilk_for(int i = 0; i < n; i++)
633     array[i]
634     = 1.0 / input[i];
635 }
CoinAbcMemcpyLong(double * array,const double * arrayFrom,int size)636 void CoinAbcMemcpyLong(double *array, const double *arrayFrom, int size)
637 {
638   memcpy(array, arrayFrom, size * sizeof(double));
639 }
CoinAbcMemcpyLong(int * array,const int * arrayFrom,int size)640 void CoinAbcMemcpyLong(int *array, const int *arrayFrom, int size)
641 {
642   memcpy(array, arrayFrom, size * sizeof(int));
643 }
CoinAbcMemcpyLong(unsigned char * array,const unsigned char * arrayFrom,int size)644 void CoinAbcMemcpyLong(unsigned char *array, const unsigned char *arrayFrom, int size)
645 {
646   memcpy(array, arrayFrom, size * sizeof(unsigned char));
647 }
CoinAbcMemset0Long(double * array,int size)648 void CoinAbcMemset0Long(double *array, int size)
649 {
650   memset(array, 0, size * sizeof(double));
651 }
CoinAbcMemset0Long(int * array,int size)652 void CoinAbcMemset0Long(int *array, int size)
653 {
654   memset(array, 0, size * sizeof(int));
655 }
CoinAbcMemset0Long(unsigned char * array,int size)656 void CoinAbcMemset0Long(unsigned char *array, int size)
657 {
658   memset(array, 0, size * sizeof(unsigned char));
659 }
CoinAbcMemmove(double * array,const double * arrayFrom,int size)660 void CoinAbcMemmove(double *array, const double *arrayFrom, int size)
661 {
662   memmove(array, arrayFrom, size * sizeof(double));
663 }
CoinAbcMemmove(int * array,const int * arrayFrom,int size)664 void CoinAbcMemmove(int *array, const int *arrayFrom, int size)
665 {
666   memmove(array, arrayFrom, size * sizeof(int));
667 }
CoinAbcMemmove(unsigned char * array,const unsigned char * arrayFrom,int size)668 void CoinAbcMemmove(unsigned char *array, const unsigned char *arrayFrom, int size)
669 {
670   memmove(array, arrayFrom, size * sizeof(unsigned char));
671 }
672 // This moves down and zeroes out end
CoinAbcMemmoveAndZero(double * array,double * arrayFrom,int size)673 void CoinAbcMemmoveAndZero(double *array, double *arrayFrom, int size)
674 {
675   assert(arrayFrom - array >= 0);
676   memmove(array, arrayFrom, size * sizeof(double));
677   double *endArray = array + size;
678   double *endArrayFrom = arrayFrom + size;
679   double *startZero = CoinMax(arrayFrom, endArray);
680   memset(startZero, 0, (endArrayFrom - startZero) * sizeof(double));
681 }
682 // This compacts several sections and zeroes out end
CoinAbcCompact(int numberSections,int alreadyDone,double * array,const int * starts,const int * lengths)683 int CoinAbcCompact(int numberSections, int alreadyDone, double *array, const int *starts, const int *lengths)
684 {
685   int totalLength = alreadyDone;
686   for (int i = 0; i < numberSections; i++) {
687     totalLength += lengths[i];
688   }
689   for (int i = 0; i < numberSections; i++) {
690     int start = starts[i];
691     int length = lengths[i];
692     int end = start + length;
693     memmove(array + alreadyDone, array + start, length * sizeof(double));
694     if (end > totalLength) {
695       start = CoinMax(start, totalLength);
696       memset(array + start, 0, (end - start) * sizeof(double));
697     }
698     alreadyDone += length;
699   }
700   return totalLength;
701 }
702 // This compacts several sections
CoinAbcCompact(int numberSections,int alreadyDone,int * array,const int * starts,const int * lengths)703 int CoinAbcCompact(int numberSections, int alreadyDone, int *array, const int *starts, const int *lengths)
704 {
705   for (int i = 0; i < numberSections; i++) {
706     memmove(array + alreadyDone, array + starts[i], lengths[i] * sizeof(int));
707     alreadyDone += lengths[i];
708   }
709   return alreadyDone;
710 }
CoinAbcScatterUpdate0(int number,CoinFactorizationDouble,const CoinFactorizationDouble * COIN_RESTRICT,const int * COIN_RESTRICT,CoinFactorizationDouble * COIN_RESTRICT)711 void CoinAbcScatterUpdate0(int number, CoinFactorizationDouble /*pivotValue*/,
712   const CoinFactorizationDouble *COIN_RESTRICT /*thisElement*/,
713   const int *COIN_RESTRICT /*thisIndex*/,
714   CoinFactorizationDouble *COIN_RESTRICT /*region*/)
715 {
716   assert(number == 0);
717 }
CoinAbcScatterUpdate1(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)718 void CoinAbcScatterUpdate1(int number, CoinFactorizationDouble pivotValue,
719   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
720   const int *COIN_RESTRICT thisIndex,
721   CoinFactorizationDouble *COIN_RESTRICT region)
722 {
723   assert(number == 1);
724   int iRow0 = thisIndex[0];
725   CoinFactorizationDouble regionValue0 = region[iRow0];
726   region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
727 }
CoinAbcScatterUpdate2(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)728 void CoinAbcScatterUpdate2(int number, CoinFactorizationDouble pivotValue,
729   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
730   const int *COIN_RESTRICT thisIndex,
731   CoinFactorizationDouble *COIN_RESTRICT region)
732 {
733   assert(number == 2);
734   int iRow0 = thisIndex[0];
735   int iRow1 = thisIndex[1];
736   CoinFactorizationDouble regionValue0 = region[iRow0];
737   CoinFactorizationDouble regionValue1 = region[iRow1];
738   region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
739   region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
740 }
CoinAbcScatterUpdate3(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)741 void CoinAbcScatterUpdate3(int number, CoinFactorizationDouble pivotValue,
742   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
743   const int *COIN_RESTRICT thisIndex,
744   CoinFactorizationDouble *COIN_RESTRICT region)
745 {
746   assert(number == 3);
747   int iRow0 = thisIndex[0];
748   int iRow1 = thisIndex[1];
749   CoinFactorizationDouble regionValue0 = region[iRow0];
750   CoinFactorizationDouble regionValue1 = region[iRow1];
751   region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
752   region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
753   iRow0 = thisIndex[2];
754   regionValue0 = region[iRow0];
755   region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
756 }
CoinAbcScatterUpdate4(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)757 void CoinAbcScatterUpdate4(int number, CoinFactorizationDouble pivotValue,
758   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
759   const int *COIN_RESTRICT thisIndex,
760   CoinFactorizationDouble *COIN_RESTRICT region)
761 {
762   assert(number == 4);
763   int iRow0 = thisIndex[0];
764   int iRow1 = thisIndex[1];
765   CoinFactorizationDouble regionValue0 = region[iRow0];
766   CoinFactorizationDouble regionValue1 = region[iRow1];
767   region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
768   region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
769   iRow0 = thisIndex[2];
770   iRow1 = thisIndex[3];
771   regionValue0 = region[iRow0];
772   regionValue1 = region[iRow1];
773   region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
774   region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
775 }
CoinAbcScatterUpdate5(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)776 void CoinAbcScatterUpdate5(int number, CoinFactorizationDouble pivotValue,
777   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
778   const int *COIN_RESTRICT thisIndex,
779   CoinFactorizationDouble *COIN_RESTRICT region)
780 {
781   assert(number == 5);
782   int iRow0 = thisIndex[0];
783   int iRow1 = thisIndex[1];
784   CoinFactorizationDouble regionValue0 = region[iRow0];
785   CoinFactorizationDouble regionValue1 = region[iRow1];
786   region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
787   region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
788   iRow0 = thisIndex[2];
789   iRow1 = thisIndex[3];
790   regionValue0 = region[iRow0];
791   regionValue1 = region[iRow1];
792   region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
793   region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
794   iRow0 = thisIndex[4];
795   regionValue0 = region[iRow0];
796   region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
797 }
CoinAbcScatterUpdate6(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)798 void CoinAbcScatterUpdate6(int number, CoinFactorizationDouble pivotValue,
799   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
800   const int *COIN_RESTRICT thisIndex,
801   CoinFactorizationDouble *COIN_RESTRICT region)
802 {
803   assert(number == 6);
804   int iRow0 = thisIndex[0];
805   int iRow1 = thisIndex[1];
806   CoinFactorizationDouble regionValue0 = region[iRow0];
807   CoinFactorizationDouble regionValue1 = region[iRow1];
808   region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
809   region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
810   iRow0 = thisIndex[2];
811   iRow1 = thisIndex[3];
812   regionValue0 = region[iRow0];
813   regionValue1 = region[iRow1];
814   region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
815   region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
816   iRow0 = thisIndex[4];
817   iRow1 = thisIndex[5];
818   regionValue0 = region[iRow0];
819   regionValue1 = region[iRow1];
820   region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
821   region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
822 }
CoinAbcScatterUpdate7(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)823 void CoinAbcScatterUpdate7(int number, CoinFactorizationDouble pivotValue,
824   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
825   const int *COIN_RESTRICT thisIndex,
826   CoinFactorizationDouble *COIN_RESTRICT region)
827 {
828   assert(number == 7);
829   int iRow0 = thisIndex[0];
830   int iRow1 = thisIndex[1];
831   CoinFactorizationDouble regionValue0 = region[iRow0];
832   CoinFactorizationDouble regionValue1 = region[iRow1];
833   region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
834   region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
835   iRow0 = thisIndex[2];
836   iRow1 = thisIndex[3];
837   regionValue0 = region[iRow0];
838   regionValue1 = region[iRow1];
839   region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
840   region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
841   iRow0 = thisIndex[4];
842   iRow1 = thisIndex[5];
843   regionValue0 = region[iRow0];
844   regionValue1 = region[iRow1];
845   region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
846   region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
847   iRow0 = thisIndex[6];
848   regionValue0 = region[iRow0];
849   region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
850 }
CoinAbcScatterUpdate8(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)851 void CoinAbcScatterUpdate8(int number, CoinFactorizationDouble pivotValue,
852   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
853   const int *COIN_RESTRICT thisIndex,
854   CoinFactorizationDouble *COIN_RESTRICT region)
855 {
856   assert(number == 8);
857   int iRow0 = thisIndex[0];
858   int iRow1 = thisIndex[1];
859   CoinFactorizationDouble regionValue0 = region[iRow0];
860   CoinFactorizationDouble regionValue1 = region[iRow1];
861   region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
862   region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
863   iRow0 = thisIndex[2];
864   iRow1 = thisIndex[3];
865   regionValue0 = region[iRow0];
866   regionValue1 = region[iRow1];
867   region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
868   region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
869   iRow0 = thisIndex[4];
870   iRow1 = thisIndex[5];
871   regionValue0 = region[iRow0];
872   regionValue1 = region[iRow1];
873   region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
874   region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
875   iRow0 = thisIndex[6];
876   iRow1 = thisIndex[7];
877   regionValue0 = region[iRow0];
878   regionValue1 = region[iRow1];
879   region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
880   region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
881 }
CoinAbcScatterUpdate4N(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)882 void CoinAbcScatterUpdate4N(int number, CoinFactorizationDouble pivotValue,
883   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
884   const int *COIN_RESTRICT thisIndex,
885   CoinFactorizationDouble *COIN_RESTRICT region)
886 {
887   assert((number & 3) == 0);
888   for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
889     CoinSimplexInt iRow0 = thisIndex[j];
890     CoinSimplexInt iRow1 = thisIndex[j - 1];
891     CoinFactorizationDouble regionValue0 = region[iRow0];
892     CoinFactorizationDouble regionValue1 = region[iRow1];
893     region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
894     region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
895     iRow0 = thisIndex[j - 2];
896     iRow1 = thisIndex[j - 3];
897     regionValue0 = region[iRow0];
898     regionValue1 = region[iRow1];
899     region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
900     region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
901   }
902 }
CoinAbcScatterUpdate4NPlus1(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)903 void CoinAbcScatterUpdate4NPlus1(int number, CoinFactorizationDouble pivotValue,
904   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
905   const int *COIN_RESTRICT thisIndex,
906   CoinFactorizationDouble *COIN_RESTRICT region)
907 {
908   assert((number & 3) == 1);
909   int iRow0 = thisIndex[number - 1];
910   CoinFactorizationDouble regionValue0 = region[iRow0];
911   region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
912   number -= 1;
913   for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
914     CoinSimplexInt iRow0 = thisIndex[j];
915     CoinSimplexInt iRow1 = thisIndex[j - 1];
916     CoinFactorizationDouble regionValue0 = region[iRow0];
917     CoinFactorizationDouble regionValue1 = region[iRow1];
918     region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
919     region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
920     iRow0 = thisIndex[j - 2];
921     iRow1 = thisIndex[j - 3];
922     regionValue0 = region[iRow0];
923     regionValue1 = region[iRow1];
924     region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
925     region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
926   }
927 }
CoinAbcScatterUpdate4NPlus2(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)928 void CoinAbcScatterUpdate4NPlus2(int number, CoinFactorizationDouble pivotValue,
929   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
930   const int *COIN_RESTRICT thisIndex,
931   CoinFactorizationDouble *COIN_RESTRICT region)
932 {
933   assert((number & 3) == 2);
934   int iRow0 = thisIndex[number - 1];
935   int iRow1 = thisIndex[number - 2];
936   CoinFactorizationDouble regionValue0 = region[iRow0];
937   CoinFactorizationDouble regionValue1 = region[iRow1];
938   region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
939   region[iRow1] = regionValue1 - thisElement[number - 2] * pivotValue;
940   number -= 2;
941   for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
942     CoinSimplexInt iRow0 = thisIndex[j];
943     CoinSimplexInt iRow1 = thisIndex[j - 1];
944     CoinFactorizationDouble regionValue0 = region[iRow0];
945     CoinFactorizationDouble regionValue1 = region[iRow1];
946     region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
947     region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
948     iRow0 = thisIndex[j - 2];
949     iRow1 = thisIndex[j - 3];
950     regionValue0 = region[iRow0];
951     regionValue1 = region[iRow1];
952     region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
953     region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
954   }
955 }
CoinAbcScatterUpdate4NPlus3(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)956 void CoinAbcScatterUpdate4NPlus3(int number, CoinFactorizationDouble pivotValue,
957   const CoinFactorizationDouble *COIN_RESTRICT thisElement,
958   const int *COIN_RESTRICT thisIndex,
959   CoinFactorizationDouble *COIN_RESTRICT region)
960 {
961   assert((number & 3) == 3);
962   int iRow0 = thisIndex[number - 1];
963   int iRow1 = thisIndex[number - 2];
964   CoinFactorizationDouble regionValue0 = region[iRow0];
965   CoinFactorizationDouble regionValue1 = region[iRow1];
966   region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
967   region[iRow1] = regionValue1 - thisElement[number - 2] * pivotValue;
968   iRow0 = thisIndex[number - 3];
969   regionValue0 = region[iRow0];
970   region[iRow0] = regionValue0 - thisElement[number - 3] * pivotValue;
971   number -= 3;
972   for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
973     CoinSimplexInt iRow0 = thisIndex[j];
974     CoinSimplexInt iRow1 = thisIndex[j - 1];
975     CoinFactorizationDouble regionValue0 = region[iRow0];
976     CoinFactorizationDouble regionValue1 = region[iRow1];
977     region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
978     region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
979     iRow0 = thisIndex[j - 2];
980     iRow1 = thisIndex[j - 3];
981     regionValue0 = region[iRow0];
982     regionValue1 = region[iRow1];
983     region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
984     region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
985   }
986 }
CoinAbcScatterUpdate0(int numberIn,CoinFactorizationDouble,const CoinFactorizationDouble * COIN_RESTRICT element,CoinFactorizationDouble * COIN_RESTRICT)987 void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble /*multiplier*/,
988   const CoinFactorizationDouble *COIN_RESTRICT element,
989   CoinFactorizationDouble *COIN_RESTRICT /*region*/)
990 {
991 #ifndef NDEBUG
992   assert(numberIn == 0);
993 #endif
994 }
995 // start alternatives
996 #if 0
997 void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier,
998 			  const CoinFactorizationDouble *  COIN_RESTRICT element,
999 			  CoinFactorizationDouble * COIN_RESTRICT region)
1000 {
1001 #ifndef NDEBUG
1002   assert (numberIn==1);
1003 #endif
1004   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1005   element++;
1006   int iColumn0=thisColumn[0];
1007   CoinFactorizationDouble value0=region[iColumn0];
1008   value0 += multiplier*element[0];
1009   region[iColumn0]=value0;
1010 }
1011 void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier,
1012 			  const CoinFactorizationDouble *  COIN_RESTRICT element,
1013 			  CoinFactorizationDouble * COIN_RESTRICT region)
1014 {
1015 #ifndef NDEBUG
1016   assert (numberIn==2);
1017 #endif
1018   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1019 #if NEW_CHUNK_SIZE == 2
1020   int nFull=2&(~(NEW_CHUNK_SIZE-1));
1021   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1022     coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1023     int iColumn0=thisColumn[0];
1024     int iColumn1=thisColumn[1];
1025     CoinFactorizationDouble value0=region[iColumn0];
1026     CoinFactorizationDouble value1=region[iColumn1];
1027     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1028     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1029     region[iColumn0]=value0;
1030     region[iColumn1]=value1;
1031     element+=NEW_CHUNK_SIZE_INCREMENT;
1032     thisColumn = reinterpret_cast<const int *>(element);
1033   }
1034 #endif
1035 #if NEW_CHUNK_SIZE == 4
1036   element++;
1037   int iColumn0=thisColumn[0];
1038   int iColumn1=thisColumn[1];
1039   CoinFactorizationDouble value0=region[iColumn0];
1040   CoinFactorizationDouble value1=region[iColumn1];
1041   value0 += multiplier*element[0];
1042   value1 += multiplier*element[1];
1043   region[iColumn0]=value0;
1044   region[iColumn1]=value1;
1045 #endif
1046 }
1047 void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier,
1048 			  const CoinFactorizationDouble *  COIN_RESTRICT element,
1049 			  CoinFactorizationDouble * COIN_RESTRICT region)
1050 {
1051 #ifndef NDEBUG
1052   assert (numberIn==3);
1053 #endif
1054   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1055 #if NEW_CHUNK_SIZE == 2
1056   int nFull=3&(~(NEW_CHUNK_SIZE-1));
1057   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1058     coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1059     int iColumn0=thisColumn[0];
1060     int iColumn1=thisColumn[1];
1061     CoinFactorizationDouble value0=region[iColumn0];
1062     CoinFactorizationDouble value1=region[iColumn1];
1063     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1064     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1065     region[iColumn0]=value0;
1066     region[iColumn1]=value1;
1067     element+=NEW_CHUNK_SIZE_INCREMENT;
1068     thisColumn = reinterpret_cast<const int *>(element);
1069   }
1070 #endif
1071 #if NEW_CHUNK_SIZE == 2
1072   element++;
1073   int iColumn0=thisColumn[0];
1074   CoinFactorizationDouble value0=region[iColumn0];
1075   value0 += multiplier*element[0];
1076   region[iColumn0]=value0;
1077 #else
1078   element+=2;
1079   int iColumn0=thisColumn[0];
1080   int iColumn1=thisColumn[1];
1081   int iColumn2=thisColumn[2];
1082   CoinFactorizationDouble value0=region[iColumn0];
1083   CoinFactorizationDouble value1=region[iColumn1];
1084   CoinFactorizationDouble value2=region[iColumn2];
1085   value0 += multiplier*element[0];
1086   value1 += multiplier*element[1];
1087   value2 += multiplier*element[2];
1088   region[iColumn0]=value0;
1089   region[iColumn1]=value1;
1090   region[iColumn2]=value2;
1091 #endif
1092 }
1093 void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier,
1094 			  const CoinFactorizationDouble *  COIN_RESTRICT element,
1095 			  CoinFactorizationDouble * COIN_RESTRICT region)
1096 {
1097 #ifndef NDEBUG
1098   assert (numberIn==4);
1099 #endif
1100   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1101   int nFull=4&(~(NEW_CHUNK_SIZE-1));
1102   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1103     //coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1104 #if NEW_CHUNK_SIZE == 2
1105     int iColumn0=thisColumn[0];
1106     int iColumn1=thisColumn[1];
1107     CoinFactorizationDouble value0=region[iColumn0];
1108     CoinFactorizationDouble value1=region[iColumn1];
1109     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1110     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1111     region[iColumn0]=value0;
1112     region[iColumn1]=value1;
1113 #elif NEW_CHUNK_SIZE == 4
1114     int iColumn0=thisColumn[0];
1115     int iColumn1=thisColumn[1];
1116     int iColumn2=thisColumn[2];
1117     int iColumn3=thisColumn[3];
1118     CoinFactorizationDouble value0=region[iColumn0];
1119     CoinFactorizationDouble value1=region[iColumn1];
1120     CoinFactorizationDouble value2=region[iColumn2];
1121     CoinFactorizationDouble value3=region[iColumn3];
1122     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1123     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1124     value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1125     value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1126     region[iColumn0]=value0;
1127     region[iColumn1]=value1;
1128     region[iColumn2]=value2;
1129     region[iColumn3]=value3;
1130 #else
1131     abort();
1132 #endif
1133     element+=NEW_CHUNK_SIZE_INCREMENT;
1134     thisColumn = reinterpret_cast<const int *>(element);
1135   }
1136 }
1137 void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier,
1138 			  const CoinFactorizationDouble *  COIN_RESTRICT element,
1139 			  CoinFactorizationDouble * COIN_RESTRICT region)
1140 {
1141 #ifndef NDEBUG
1142   assert (numberIn==5);
1143 #endif
1144   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1145   int nFull=5&(~(NEW_CHUNK_SIZE-1));
1146   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1147     coin_prefetch_const(element+6);
1148 #if NEW_CHUNK_SIZE == 2
1149     int iColumn0=thisColumn[0];
1150     int iColumn1=thisColumn[1];
1151     CoinFactorizationDouble value0=region[iColumn0];
1152     CoinFactorizationDouble value1=region[iColumn1];
1153     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1154     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1155     region[iColumn0]=value0;
1156     region[iColumn1]=value1;
1157 #elif NEW_CHUNK_SIZE == 4
1158     int iColumn0=thisColumn[0];
1159     int iColumn1=thisColumn[1];
1160     int iColumn2=thisColumn[2];
1161     int iColumn3=thisColumn[3];
1162     CoinFactorizationDouble value0=region[iColumn0];
1163     CoinFactorizationDouble value1=region[iColumn1];
1164     CoinFactorizationDouble value2=region[iColumn2];
1165     CoinFactorizationDouble value3=region[iColumn3];
1166     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1167     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1168     value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1169     value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1170     region[iColumn0]=value0;
1171     region[iColumn1]=value1;
1172     region[iColumn2]=value2;
1173     region[iColumn3]=value3;
1174 #else
1175     abort();
1176 #endif
1177     element+=NEW_CHUNK_SIZE_INCREMENT;
1178     thisColumn = reinterpret_cast<const int *>(element);
1179   }
1180   element++;
1181   int iColumn0=thisColumn[0];
1182   CoinFactorizationDouble value0=region[iColumn0];
1183   value0 += multiplier*element[0];
1184   region[iColumn0]=value0;
1185 }
1186 void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier,
1187 			  const CoinFactorizationDouble *  COIN_RESTRICT element,
1188 			  CoinFactorizationDouble * COIN_RESTRICT region)
1189 {
1190 #ifndef NDEBUG
1191   assert (numberIn==6);
1192 #endif
1193   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1194   int nFull=6&(~(NEW_CHUNK_SIZE-1));
1195   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1196     coin_prefetch_const(element+6);
1197 #if NEW_CHUNK_SIZE == 2
1198     int iColumn0=thisColumn[0];
1199     int iColumn1=thisColumn[1];
1200     CoinFactorizationDouble value0=region[iColumn0];
1201     CoinFactorizationDouble value1=region[iColumn1];
1202     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1203     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1204     region[iColumn0]=value0;
1205     region[iColumn1]=value1;
1206 #elif NEW_CHUNK_SIZE == 4
1207     int iColumn0=thisColumn[0];
1208     int iColumn1=thisColumn[1];
1209     int iColumn2=thisColumn[2];
1210     int iColumn3=thisColumn[3];
1211     CoinFactorizationDouble value0=region[iColumn0];
1212     CoinFactorizationDouble value1=region[iColumn1];
1213     CoinFactorizationDouble value2=region[iColumn2];
1214     CoinFactorizationDouble value3=region[iColumn3];
1215     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1216     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1217     value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1218     value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1219     region[iColumn0]=value0;
1220     region[iColumn1]=value1;
1221     region[iColumn2]=value2;
1222     region[iColumn3]=value3;
1223 #else
1224     abort();
1225 #endif
1226     element+=NEW_CHUNK_SIZE_INCREMENT;
1227     thisColumn = reinterpret_cast<const int *>(element);
1228   }
1229 #if NEW_CHUNK_SIZE == 4
1230   element++;
1231   int iColumn0=thisColumn[0];
1232   int iColumn1=thisColumn[1];
1233   CoinFactorizationDouble value0=region[iColumn0];
1234   CoinFactorizationDouble value1=region[iColumn1];
1235   value0 += multiplier*element[0];
1236   value1 += multiplier*element[1];
1237   region[iColumn0]=value0;
1238   region[iColumn1]=value1;
1239 #endif
1240 }
1241 void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier,
1242 			  const CoinFactorizationDouble *  COIN_RESTRICT element,
1243 			  CoinFactorizationDouble * COIN_RESTRICT region)
1244 {
1245 #ifndef NDEBUG
1246   assert (numberIn==7);
1247 #endif
1248   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1249   int nFull=7&(~(NEW_CHUNK_SIZE-1));
1250   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1251     coin_prefetch_const(element+6);
1252 #if NEW_CHUNK_SIZE == 2
1253     int iColumn0=thisColumn[0];
1254     int iColumn1=thisColumn[1];
1255     CoinFactorizationDouble value0=region[iColumn0];
1256     CoinFactorizationDouble value1=region[iColumn1];
1257     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1258     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1259     region[iColumn0]=value0;
1260     region[iColumn1]=value1;
1261 #elif NEW_CHUNK_SIZE == 4
1262     int iColumn0=thisColumn[0];
1263     int iColumn1=thisColumn[1];
1264     int iColumn2=thisColumn[2];
1265     int iColumn3=thisColumn[3];
1266     CoinFactorizationDouble value0=region[iColumn0];
1267     CoinFactorizationDouble value1=region[iColumn1];
1268     CoinFactorizationDouble value2=region[iColumn2];
1269     CoinFactorizationDouble value3=region[iColumn3];
1270     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1271     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1272     value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1273     value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1274     region[iColumn0]=value0;
1275     region[iColumn1]=value1;
1276     region[iColumn2]=value2;
1277     region[iColumn3]=value3;
1278 #else
1279     abort();
1280 #endif
1281     element+=NEW_CHUNK_SIZE_INCREMENT;
1282     thisColumn = reinterpret_cast<const int *>(element);
1283   }
1284 #if NEW_CHUNK_SIZE == 2
1285   element++;
1286   int iColumn0=thisColumn[0];
1287   CoinFactorizationDouble value0=region[iColumn0];
1288   value0 += multiplier*element[0];
1289   region[iColumn0]=value0;
1290 #else
1291   element+=2;
1292   int iColumn0=thisColumn[0];
1293   int iColumn1=thisColumn[1];
1294   int iColumn2=thisColumn[2];
1295   CoinFactorizationDouble value0=region[iColumn0];
1296   CoinFactorizationDouble value1=region[iColumn1];
1297   CoinFactorizationDouble value2=region[iColumn2];
1298   value0 += multiplier*element[0];
1299   value1 += multiplier*element[1];
1300   value2 += multiplier*element[2];
1301   region[iColumn0]=value0;
1302   region[iColumn1]=value1;
1303   region[iColumn2]=value2;
1304 #endif
1305 }
1306 void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier,
1307 			  const CoinFactorizationDouble *  COIN_RESTRICT element,
1308 			  CoinFactorizationDouble * COIN_RESTRICT region)
1309 {
1310 #ifndef NDEBUG
1311   assert (numberIn==8);
1312 #endif
1313   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1314   int nFull=8&(~(NEW_CHUNK_SIZE-1));
1315   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1316     coin_prefetch_const(element+6);
1317 #if NEW_CHUNK_SIZE == 2
1318     int iColumn0=thisColumn[0];
1319     int iColumn1=thisColumn[1];
1320     CoinFactorizationDouble value0=region[iColumn0];
1321     CoinFactorizationDouble value1=region[iColumn1];
1322     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1323     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1324     region[iColumn0]=value0;
1325     region[iColumn1]=value1;
1326 #elif NEW_CHUNK_SIZE == 4
1327     int iColumn0=thisColumn[0];
1328     int iColumn1=thisColumn[1];
1329     int iColumn2=thisColumn[2];
1330     int iColumn3=thisColumn[3];
1331     CoinFactorizationDouble value0=region[iColumn0];
1332     CoinFactorizationDouble value1=region[iColumn1];
1333     CoinFactorizationDouble value2=region[iColumn2];
1334     CoinFactorizationDouble value3=region[iColumn3];
1335     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1336     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1337     value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1338     value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1339     region[iColumn0]=value0;
1340     region[iColumn1]=value1;
1341     region[iColumn2]=value2;
1342     region[iColumn3]=value3;
1343 #else
1344     abort();
1345 #endif
1346     element+=NEW_CHUNK_SIZE_INCREMENT;
1347     thisColumn = reinterpret_cast<const int *>(element);
1348   }
1349 }
1350 void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier,
1351 				 const CoinFactorizationDouble *  COIN_RESTRICT element,
1352 				 CoinFactorizationDouble * COIN_RESTRICT region)
1353 {
1354   assert ((numberIn&3)==0);
1355   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1356   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1357   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1358     coin_prefetch_const(element+6);
1359 #if NEW_CHUNK_SIZE == 2
1360     int iColumn0=thisColumn[0];
1361     int iColumn1=thisColumn[1];
1362     CoinFactorizationDouble value0=region[iColumn0];
1363     CoinFactorizationDouble value1=region[iColumn1];
1364     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1365     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1366     region[iColumn0]=value0;
1367     region[iColumn1]=value1;
1368 #elif NEW_CHUNK_SIZE == 4
1369     int iColumn0=thisColumn[0];
1370     int iColumn1=thisColumn[1];
1371     int iColumn2=thisColumn[2];
1372     int iColumn3=thisColumn[3];
1373     CoinFactorizationDouble value0=region[iColumn0];
1374     CoinFactorizationDouble value1=region[iColumn1];
1375     CoinFactorizationDouble value2=region[iColumn2];
1376     CoinFactorizationDouble value3=region[iColumn3];
1377     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1378     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1379     value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1380     value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1381     region[iColumn0]=value0;
1382     region[iColumn1]=value1;
1383     region[iColumn2]=value2;
1384     region[iColumn3]=value3;
1385 #else
1386     abort();
1387 #endif
1388     element+=NEW_CHUNK_SIZE_INCREMENT;
1389     thisColumn = reinterpret_cast<const int *>(element);
1390   }
1391 }
1392 void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier,
1393 				 const CoinFactorizationDouble *  COIN_RESTRICT element,
1394 				 CoinFactorizationDouble * COIN_RESTRICT region)
1395 {
1396   assert ((numberIn&3)==1);
1397   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1398   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1399   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1400     coin_prefetch_const(element+6);
1401 #if NEW_CHUNK_SIZE == 2
1402     int iColumn0=thisColumn[0];
1403     int iColumn1=thisColumn[1];
1404     CoinFactorizationDouble value0=region[iColumn0];
1405     CoinFactorizationDouble value1=region[iColumn1];
1406     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1407     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1408     region[iColumn0]=value0;
1409     region[iColumn1]=value1;
1410 #elif NEW_CHUNK_SIZE == 4
1411     int iColumn0=thisColumn[0];
1412     int iColumn1=thisColumn[1];
1413     int iColumn2=thisColumn[2];
1414     int iColumn3=thisColumn[3];
1415     CoinFactorizationDouble value0=region[iColumn0];
1416     CoinFactorizationDouble value1=region[iColumn1];
1417     CoinFactorizationDouble value2=region[iColumn2];
1418     CoinFactorizationDouble value3=region[iColumn3];
1419     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1420     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1421     value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1422     value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1423     region[iColumn0]=value0;
1424     region[iColumn1]=value1;
1425     region[iColumn2]=value2;
1426     region[iColumn3]=value3;
1427 #else
1428     abort();
1429 #endif
1430     element+=NEW_CHUNK_SIZE_INCREMENT;
1431     thisColumn = reinterpret_cast<const int *>(element);
1432   }
1433   element++;
1434   int iColumn0=thisColumn[0];
1435   CoinFactorizationDouble value0=region[iColumn0];
1436   value0 += multiplier*element[0];
1437   region[iColumn0]=value0;
1438 }
1439 void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier,
1440 				 const CoinFactorizationDouble *  COIN_RESTRICT element,
1441 				 CoinFactorizationDouble * COIN_RESTRICT region)
1442 {
1443   assert ((numberIn&3)==2);
1444   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1445   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1446   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1447     coin_prefetch_const(element+6);
1448 #if NEW_CHUNK_SIZE == 2
1449     int iColumn0=thisColumn[0];
1450     int iColumn1=thisColumn[1];
1451     CoinFactorizationDouble value0=region[iColumn0];
1452     CoinFactorizationDouble value1=region[iColumn1];
1453     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1454     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1455     region[iColumn0]=value0;
1456     region[iColumn1]=value1;
1457 #elif NEW_CHUNK_SIZE == 4
1458     int iColumn0=thisColumn[0];
1459     int iColumn1=thisColumn[1];
1460     int iColumn2=thisColumn[2];
1461     int iColumn3=thisColumn[3];
1462     CoinFactorizationDouble value0=region[iColumn0];
1463     CoinFactorizationDouble value1=region[iColumn1];
1464     CoinFactorizationDouble value2=region[iColumn2];
1465     CoinFactorizationDouble value3=region[iColumn3];
1466     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1467     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1468     value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1469     value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1470     region[iColumn0]=value0;
1471     region[iColumn1]=value1;
1472     region[iColumn2]=value2;
1473     region[iColumn3]=value3;
1474 #else
1475     abort();
1476 #endif
1477     element+=NEW_CHUNK_SIZE_INCREMENT;
1478     thisColumn = reinterpret_cast<const int *>(element);
1479   }
1480 #if NEW_CHUNK_SIZE == 4
1481   element++;
1482   int iColumn0=thisColumn[0];
1483   int iColumn1=thisColumn[1];
1484   CoinFactorizationDouble value0=region[iColumn0];
1485   CoinFactorizationDouble value1=region[iColumn1];
1486   value0 += multiplier*element[0];
1487   value1 += multiplier*element[1];
1488   region[iColumn0]=value0;
1489   region[iColumn1]=value1;
1490 #endif
1491 }
1492 void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier,
1493 				 const CoinFactorizationDouble *  COIN_RESTRICT element,
1494 				 CoinFactorizationDouble * COIN_RESTRICT region)
1495 {
1496   assert ((numberIn&3)==3);
1497   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1498   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1499   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1500     coin_prefetch_const(element+6);
1501 #if NEW_CHUNK_SIZE == 2
1502     int iColumn0=thisColumn[0];
1503     int iColumn1=thisColumn[1];
1504     CoinFactorizationDouble value0=region[iColumn0];
1505     CoinFactorizationDouble value1=region[iColumn1];
1506     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1507     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1508     region[iColumn0]=value0;
1509     region[iColumn1]=value1;
1510 #elif NEW_CHUNK_SIZE == 4
1511     int iColumn0=thisColumn[0];
1512     int iColumn1=thisColumn[1];
1513     int iColumn2=thisColumn[2];
1514     int iColumn3=thisColumn[3];
1515     CoinFactorizationDouble value0=region[iColumn0];
1516     CoinFactorizationDouble value1=region[iColumn1];
1517     CoinFactorizationDouble value2=region[iColumn2];
1518     CoinFactorizationDouble value3=region[iColumn3];
1519     value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1520     value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1521     value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1522     value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1523     region[iColumn0]=value0;
1524     region[iColumn1]=value1;
1525     region[iColumn2]=value2;
1526     region[iColumn3]=value3;
1527 #else
1528     abort();
1529 #endif
1530     element+=NEW_CHUNK_SIZE_INCREMENT;
1531     thisColumn = reinterpret_cast<const int *>(element);
1532   }
1533 #if NEW_CHUNK_SIZE == 2
1534   element++;
1535   int iColumn0=thisColumn[0];
1536   CoinFactorizationDouble value0=region[iColumn0];
1537   value0 += multiplier*element[0];
1538   region[iColumn0]=value0;
1539 #else
1540   element+=2;
1541   int iColumn0=thisColumn[0];
1542   int iColumn1=thisColumn[1];
1543   int iColumn2=thisColumn[2];
1544   CoinFactorizationDouble value0=region[iColumn0];
1545   CoinFactorizationDouble value1=region[iColumn1];
1546   CoinFactorizationDouble value2=region[iColumn2];
1547   value0 += multiplier*element[0];
1548   value1 += multiplier*element[1];
1549   value2 += multiplier*element[2];
1550   region[iColumn0]=value0;
1551   region[iColumn1]=value1;
1552   region[iColumn2]=value2;
1553 #endif
1554 }
1555 #else
1556 // alternative
1557 #define CACHE_LINE_SIZE 16
1558 #define OPERATION +=
1559 #define functionName(zname) CoinAbc##zname
1560 #define ABC_CREATE_SCATTER_FUNCTION 1
1561 #include "CoinAbcHelperFunctions.hpp"
1562 #undef OPERATION
1563 #undef functionName
1564 #define OPERATION -=
1565 #define functionName(zname) CoinAbc##zname##Subtract
1566 #include "CoinAbcHelperFunctions.hpp"
1567 #undef OPERATION
1568 #undef functionName
1569 #define OPERATION +=
1570 #define functionName(zname) CoinAbc##zname##Add
1571 #include "CoinAbcHelperFunctions.hpp"
1572 scatterUpdate AbcScatterLow[] = {
1573   &CoinAbcScatterUpdate0,
1574   &CoinAbcScatterUpdate1,
1575   &CoinAbcScatterUpdate2,
1576   &CoinAbcScatterUpdate3,
1577   &CoinAbcScatterUpdate4,
1578   &CoinAbcScatterUpdate5,
1579   &CoinAbcScatterUpdate6,
1580   &CoinAbcScatterUpdate7,
1581   &CoinAbcScatterUpdate8
1582 };
1583 scatterUpdate AbcScatterHigh[] = {
1584   &CoinAbcScatterUpdate4N,
1585   &CoinAbcScatterUpdate4NPlus1,
1586   &CoinAbcScatterUpdate4NPlus2,
1587   &CoinAbcScatterUpdate4NPlus3
1588 };
1589 scatterUpdate AbcScatterLowSubtract[] = {
1590   &CoinAbcScatterUpdate0,
1591   &CoinAbcScatterUpdate1Subtract,
1592   &CoinAbcScatterUpdate2Subtract,
1593   &CoinAbcScatterUpdate3Subtract,
1594   &CoinAbcScatterUpdate4Subtract,
1595   &CoinAbcScatterUpdate5Subtract,
1596   &CoinAbcScatterUpdate6Subtract,
1597   &CoinAbcScatterUpdate7Subtract,
1598   &CoinAbcScatterUpdate8Subtract
1599 };
1600 scatterUpdate AbcScatterHighSubtract[] = {
1601   &CoinAbcScatterUpdate4NSubtract,
1602   &CoinAbcScatterUpdate4NPlus1Subtract,
1603   &CoinAbcScatterUpdate4NPlus2Subtract,
1604   &CoinAbcScatterUpdate4NPlus3Subtract
1605 };
1606 scatterUpdate AbcScatterLowAdd[] = {
1607   &CoinAbcScatterUpdate0,
1608   &CoinAbcScatterUpdate1Add,
1609   &CoinAbcScatterUpdate2Add,
1610   &CoinAbcScatterUpdate3Add,
1611   &CoinAbcScatterUpdate4Add,
1612   &CoinAbcScatterUpdate5Add,
1613   &CoinAbcScatterUpdate6Add,
1614   &CoinAbcScatterUpdate7Add,
1615   &CoinAbcScatterUpdate8Add
1616 };
1617 scatterUpdate AbcScatterHighAdd[] = {
1618   &CoinAbcScatterUpdate4NAdd,
1619   &CoinAbcScatterUpdate4NPlus1Add,
1620   &CoinAbcScatterUpdate4NPlus2Add,
1621   &CoinAbcScatterUpdate4NPlus3Add
1622 };
1623 #endif
1624 #include "CoinPragma.hpp"
1625 
1626 #include <math.h>
1627 
1628 #include <cfloat>
1629 #include <cassert>
1630 #include <string>
1631 #include <stdio.h>
1632 #include <iostream>
1633 #include "CoinAbcCommonFactorization.hpp"
1634 #if 1
1635 #if AVX2 == 1
1636 #include "emmintrin.h"
1637 #elif AVX2 == 2
1638 #include <immintrin.h>
1639 #elif AVX2 == 3
1640 #include "avx2intrin.h"
1641 #endif
1642 #endif
1643 // cilk seems a bit fragile
1644 #define CILK_FRAGILE 0
1645 #if CILK_FRAGILE > 1
1646 #undef cilk_spawn
1647 #undef cilk_sync
1648 #define cilk_spawn
1649 #define cilk_sync
1650 #define ONWARD 0
1651 #elif CILK_FRAGILE == 1
1652 #define ONWARD 0
1653 #else
1654 #define ONWARD 1
1655 #endif
1656 #define BLOCKING1 8 // factorization strip
1657 #define BLOCKING2 8 // dgemm recursive
1658 #define BLOCKING3 32 // dgemm parallel
CoinAbcDgemm(int m,int n,int k,double * COIN_RESTRICT a,int lda,double * COIN_RESTRICT b,double * COIN_RESTRICT c,int parallelMode)1659 void CoinAbcDgemm(int m, int n, int k, double *COIN_RESTRICT a, int lda,
1660   double *COIN_RESTRICT b, double *COIN_RESTRICT c
1661 #if ABC_PARALLEL == 2
1662   ,
1663   int parallelMode
1664 #endif
1665 )
1666 {
1667   assert((m & (BLOCKING8 - 1)) == 0 && (n & (BLOCKING8 - 1)) == 0 && (k & (BLOCKING8 - 1)) == 0);
1668   /* entry for column j and row i (when multiple of BLOCKING8)
1669      is at aBlocked+j*m+i*BLOCKING8
1670   */
1671   // k is always smallish
1672   if (m <= BLOCKING8 && n <= BLOCKING8) {
1673     assert(m == BLOCKING8 && n == BLOCKING8 && k == BLOCKING8);
1674     double *COIN_RESTRICT aBase2 = a;
1675     double *COIN_RESTRICT bBase2 = b;
1676     double *COIN_RESTRICT cBase2 = c;
1677     for (int j = 0; j < BLOCKING8; j++) {
1678       double *COIN_RESTRICT aBase = aBase2;
1679 #if AVX2 != 2
1680 #if 1
1681       double c0 = cBase2[0];
1682       double c1 = cBase2[1];
1683       double c2 = cBase2[2];
1684       double c3 = cBase2[3];
1685       double c4 = cBase2[4];
1686       double c5 = cBase2[5];
1687       double c6 = cBase2[6];
1688       double c7 = cBase2[7];
1689       for (int l = 0; l < BLOCKING8; l++) {
1690         double bValue = bBase2[l];
1691         if (bValue) {
1692           c0 -= bValue * aBase[0];
1693           c1 -= bValue * aBase[1];
1694           c2 -= bValue * aBase[2];
1695           c3 -= bValue * aBase[3];
1696           c4 -= bValue * aBase[4];
1697           c5 -= bValue * aBase[5];
1698           c6 -= bValue * aBase[6];
1699           c7 -= bValue * aBase[7];
1700         }
1701         aBase += BLOCKING8;
1702       }
1703       cBase2[0] = c0;
1704       cBase2[1] = c1;
1705       cBase2[2] = c2;
1706       cBase2[3] = c3;
1707       cBase2[4] = c4;
1708       cBase2[5] = c5;
1709       cBase2[6] = c6;
1710       cBase2[7] = c7;
1711 #else
1712       for (int l = 0; l < BLOCKING8; l++) {
1713         double bValue = bBase2[l];
1714         if (bValue) {
1715           for (int i = 0; i < BLOCKING8; i++) {
1716             cBase2[i] -= bValue * aBase[i];
1717           }
1718         }
1719         aBase += BLOCKING8;
1720       }
1721 #endif
1722 #else
1723       //__m256d c0=_mm256_load_pd(cBase2);
1724       __m256d c0 = *reinterpret_cast< __m256d * >(cBase2);
1725       //__m256d c1=_mm256_load_pd(cBase2+4);
1726       __m256d c1 = *reinterpret_cast< __m256d * >(cBase2 + 4);
1727       for (int l = 0; l < BLOCKING8; l++) {
1728         //__m256d bb = _mm256_broadcast_sd(bBase2+l);
1729         __m256d bb = static_cast< __m256d >(__builtin_ia32_vbroadcastsd256(bBase2 + l));
1730         //__m256d a0 = _mm256_load_pd(aBase);
1731         __m256d a0 = *reinterpret_cast< __m256d * >(aBase);
1732         //__m256d a1 = _mm256_load_pd(aBase+4);
1733         __m256d a1 = *reinterpret_cast< __m256d * >(aBase + 4);
1734         c0 -= bb * a0;
1735         c1 -= bb * a1;
1736         aBase += BLOCKING8;
1737       }
1738       //_mm256_store_pd (cBase2, c0);
1739       *reinterpret_cast< __m256d * >(cBase2) = c0;
1740       //_mm256_store_pd (cBase2+4, c1);
1741       *reinterpret_cast< __m256d * >(cBase2 + 4) = c1;
1742 #endif
1743       bBase2 += BLOCKING8;
1744       cBase2 += BLOCKING8;
1745     }
1746   } else if (m > n) {
1747     // make sure mNew1 multiple of BLOCKING8
1748 #if BLOCKING8 == 8
1749     int mNew1 = ((m + 15) >> 4) << 3;
1750 #elif BLOCKING8 == 4
1751     int mNew1 = ((m + 7) >> 3) << 2;
1752 #elif BLOCKING8 == 2
1753     int mNew1 = ((m + 3) >> 2) << 1;
1754 #else
1755     abort();
1756 #endif
1757     assert(mNew1 > 0 && m - mNew1 > 0);
1758 #if ABC_PARALLEL == 2
1759     if (mNew1 <= BLOCKING3 || !parallelMode) {
1760 #endif
1761       //printf("splitMa mNew1 %d\n",mNew1);
1762       CoinAbcDgemm(mNew1, n, k, a, lda, b, c
1763 #if ABC_PARALLEL == 2
1764         ,
1765         0
1766 #endif
1767       );
1768       //printf("splitMb mNew1 %d\n",mNew1);
1769       CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8
1770 #if ABC_PARALLEL == 2
1771         ,
1772         0
1773 #endif
1774       );
1775 #if ABC_PARALLEL == 2
1776     } else {
1777       //printf("splitMa mNew1 %d\n",mNew1);
1778       cilk_spawn CoinAbcDgemm(mNew1, n, k, a, lda, b, c, ONWARD);
1779       //printf("splitMb mNew1 %d\n",mNew1);
1780       CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8, ONWARD);
1781       cilk_sync;
1782     }
1783 #endif
1784   } else {
1785     // make sure nNew1 multiple of BLOCKING8
1786 #if BLOCKING8 == 8
1787     int nNew1 = ((n + 15) >> 4) << 3;
1788 #elif BLOCKING8 == 4
1789     int nNew1 = ((n + 7) >> 3) << 2;
1790 #elif BLOCKING8 == 2
1791     int nNew1 = ((n + 3) >> 2) << 1;
1792 #else
1793     abort();
1794 #endif
1795     assert(nNew1 > 0 && n - nNew1 > 0);
1796 #if ABC_PARALLEL == 2
1797     if (nNew1 <= BLOCKING3 || !parallelMode) {
1798 #endif
1799       //printf("splitNa nNew1 %d\n",nNew1);
1800       CoinAbcDgemm(m, nNew1, k, a, lda, b, c
1801 #if ABC_PARALLEL == 2
1802         ,
1803         0
1804 #endif
1805       );
1806       //printf("splitNb nNew1 %d\n",nNew1);
1807       CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1
1808 #if ABC_PARALLEL == 2
1809         ,
1810         0
1811 #endif
1812       );
1813 #if ABC_PARALLEL == 2
1814     } else {
1815       //printf("splitNa nNew1 %d\n",nNew1);
1816       cilk_spawn CoinAbcDgemm(m, nNew1, k, a, lda, b, c, ONWARD);
1817       //printf("splitNb nNew1 %d\n",nNew1);
1818       CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1, ONWARD);
1819       cilk_sync;
1820     }
1821 #endif
1822   }
1823 }
1824 #ifdef ABC_LONG_FACTORIZATION
1825 // Start long double version
CoinAbcDgemm(int m,int n,int k,long double * COIN_RESTRICT a,int lda,long double * COIN_RESTRICT b,long double * COIN_RESTRICT c,int parallelMode)1826 void CoinAbcDgemm(int m, int n, int k, long double *COIN_RESTRICT a, int lda,
1827   long double *COIN_RESTRICT b, long double *COIN_RESTRICT c
1828 #if ABC_PARALLEL == 2
1829   ,
1830   int parallelMode
1831 #endif
1832 )
1833 {
1834   assert((m & (BLOCKING8 - 1)) == 0 && (n & (BLOCKING8 - 1)) == 0 && (k & (BLOCKING8 - 1)) == 0);
1835   /* entry for column j and row i (when multiple of BLOCKING8)
1836      is at aBlocked+j*m+i*BLOCKING8
1837   */
1838   // k is always smallish
1839   if (m <= BLOCKING8 && n <= BLOCKING8) {
1840     assert(m == BLOCKING8 && n == BLOCKING8 && k == BLOCKING8);
1841     long double *COIN_RESTRICT aBase2 = a;
1842     long double *COIN_RESTRICT bBase2 = b;
1843     long double *COIN_RESTRICT cBase2 = c;
1844     for (int j = 0; j < BLOCKING8; j++) {
1845       long double *COIN_RESTRICT aBase = aBase2;
1846 #if AVX2 != 2
1847 #if 1
1848       long double c0 = cBase2[0];
1849       long double c1 = cBase2[1];
1850       long double c2 = cBase2[2];
1851       long double c3 = cBase2[3];
1852       long double c4 = cBase2[4];
1853       long double c5 = cBase2[5];
1854       long double c6 = cBase2[6];
1855       long double c7 = cBase2[7];
1856       for (int l = 0; l < BLOCKING8; l++) {
1857         long double bValue = bBase2[l];
1858         if (bValue) {
1859           c0 -= bValue * aBase[0];
1860           c1 -= bValue * aBase[1];
1861           c2 -= bValue * aBase[2];
1862           c3 -= bValue * aBase[3];
1863           c4 -= bValue * aBase[4];
1864           c5 -= bValue * aBase[5];
1865           c6 -= bValue * aBase[6];
1866           c7 -= bValue * aBase[7];
1867         }
1868         aBase += BLOCKING8;
1869       }
1870       cBase2[0] = c0;
1871       cBase2[1] = c1;
1872       cBase2[2] = c2;
1873       cBase2[3] = c3;
1874       cBase2[4] = c4;
1875       cBase2[5] = c5;
1876       cBase2[6] = c6;
1877       cBase2[7] = c7;
1878 #else
1879       for (int l = 0; l < BLOCKING8; l++) {
1880         long double bValue = bBase2[l];
1881         if (bValue) {
1882           for (int i = 0; i < BLOCKING8; i++) {
1883             cBase2[i] -= bValue * aBase[i];
1884           }
1885         }
1886         aBase += BLOCKING8;
1887       }
1888 #endif
1889 #else
1890       //__m256d c0=_mm256_load_pd(cBase2);
1891       __m256d c0 = *reinterpret_cast< __m256d * >(cBase2);
1892       //__m256d c1=_mm256_load_pd(cBase2+4);
1893       __m256d c1 = *reinterpret_cast< __m256d * >(cBase2 + 4);
1894       for (int l = 0; l < BLOCKING8; l++) {
1895         //__m256d bb = _mm256_broadcast_sd(bBase2+l);
1896         __m256d bb = static_cast< __m256d >(__builtin_ia32_vbroadcastsd256(bBase2 + l));
1897         //__m256d a0 = _mm256_load_pd(aBase);
1898         __m256d a0 = *reinterpret_cast< __m256d * >(aBase);
1899         //__m256d a1 = _mm256_load_pd(aBase+4);
1900         __m256d a1 = *reinterpret_cast< __m256d * >(aBase + 4);
1901         c0 -= bb * a0;
1902         c1 -= bb * a1;
1903         aBase += BLOCKING8;
1904       }
1905       //_mm256_store_pd (cBase2, c0);
1906       *reinterpret_cast< __m256d * >(cBase2) = c0;
1907       //_mm256_store_pd (cBase2+4, c1);
1908       *reinterpret_cast< __m256d * >(cBase2 + 4) = c1;
1909 #endif
1910       bBase2 += BLOCKING8;
1911       cBase2 += BLOCKING8;
1912     }
1913   } else if (m > n) {
1914     // make sure mNew1 multiple of BLOCKING8
1915 #if BLOCKING8 == 8
1916     int mNew1 = ((m + 15) >> 4) << 3;
1917 #elif BLOCKING8 == 4
1918     int mNew1 = ((m + 7) >> 3) << 2;
1919 #elif BLOCKING8 == 2
1920     int mNew1 = ((m + 3) >> 2) << 1;
1921 #else
1922     abort();
1923 #endif
1924     assert(mNew1 > 0 && m - mNew1 > 0);
1925 #if ABC_PARALLEL == 2
1926     if (mNew1 <= BLOCKING3 || !parallelMode) {
1927 #endif
1928       //printf("splitMa mNew1 %d\n",mNew1);
1929       CoinAbcDgemm(mNew1, n, k, a, lda, b, c
1930 #if ABC_PARALLEL == 2
1931         ,
1932         0
1933 #endif
1934       );
1935       //printf("splitMb mNew1 %d\n",mNew1);
1936       CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8
1937 #if ABC_PARALLEL == 2
1938         ,
1939         0
1940 #endif
1941       );
1942 #if ABC_PARALLEL == 2
1943     } else {
1944       //printf("splitMa mNew1 %d\n",mNew1);
1945       cilk_spawn CoinAbcDgemm(mNew1, n, k, a, lda, b, c, ONWARD);
1946       //printf("splitMb mNew1 %d\n",mNew1);
1947       CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8, ONWARD);
1948       cilk_sync;
1949     }
1950 #endif
1951   } else {
1952     // make sure nNew1 multiple of BLOCKING8
1953 #if BLOCKING8 == 8
1954     int nNew1 = ((n + 15) >> 4) << 3;
1955 #elif BLOCKING8 == 4
1956     int nNew1 = ((n + 7) >> 3) << 2;
1957 #elif BLOCKING8 == 2
1958     int nNew1 = ((n + 3) >> 2) << 1;
1959 #else
1960     abort();
1961 #endif
1962     assert(nNew1 > 0 && n - nNew1 > 0);
1963 #if ABC_PARALLEL == 2
1964     if (nNew1 <= BLOCKING3 || !parallelMode) {
1965 #endif
1966       //printf("splitNa nNew1 %d\n",nNew1);
1967       CoinAbcDgemm(m, nNew1, k, a, lda, b, c
1968 #if ABC_PARALLEL == 2
1969         ,
1970         0
1971 #endif
1972       );
1973       //printf("splitNb nNew1 %d\n",nNew1);
1974       CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1
1975 #if ABC_PARALLEL == 2
1976         ,
1977         0
1978 #endif
1979       );
1980 #if ABC_PARALLEL == 2
1981     } else {
1982       //printf("splitNa nNew1 %d\n",nNew1);
1983       cilk_spawn CoinAbcDgemm(m, nNew1, k, a, lda, b, c, ONWARD);
1984       //printf("splitNb nNew1 %d\n",nNew1);
1985       CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1, ONWARD);
1986       cilk_sync;
1987     }
1988 #endif
1989   }
1990 }
1991 #endif
1992 #if 0
1993 // From Intel site
1994 // get AVX intrinsics
1995 #include <immintrin.h>
1996 // get CPUID capability
1997 //#include <intrin.h>
1998 #define cpuid(func, ax, bx, cx, dx)                             \
1999   __asm__ __volatile__("cpuid"                                  \
2000                        : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
2001                        : "a"(func));
2002 // written for clarity, not conciseness
2003 #define OSXSAVEFlag (1UL << 27)
2004 #define AVXFlag ((1UL << 28) | OSXSAVEFlag)
2005 #define VAESFlag ((1UL << 25) | AVXFlag | OSXSAVEFlag)
2006 #define FMAFlag ((1UL << 12) | AVXFlag | OSXSAVEFlag)
2007 #define CLMULFlag ((1UL << 1) | AVXFlag | OSXSAVEFlag)
2008 
2009 bool DetectFeature(unsigned int feature)
2010 {
2011   int CPUInfo[4]; //, InfoType=1, ECX = 1;
2012   //__cpuidex(CPUInfo, 1, 1);       // read the desired CPUID format
2013   cpuid(1,CPUInfo[0],CPUInfo[1],CPUInfo[2],CPUInfo[3]);
2014   unsigned int ECX = CPUInfo[2];  // the output of CPUID in the ECX register.
2015   if ((ECX & feature) != feature) // Missing feature
2016     return false;
2017 #if 0
2018   long int val = _xgetbv(0);       // read XFEATURE_ENABLED_MASK register
2019   if ((val&6) != 6)               // check OS has enabled both XMM and YMM support.
2020     return false;
2021 #endif
2022   return true;
2023 }
2024 #endif
2025 
2026 /* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
2027 */
2028