1 /* $Id: CoinAbcHelperFunctions.cpp 2385 2019-01-06 19:43:06Z unxusr $ */
2 // Copyright (C) 2003, International Business Machines
3 // Corporation and others, Copyright (C) 2012, FasterCoin. All Rights Reserved.
4 // This code is licensed under the terms of the Eclipse Public License (EPL).
5
6 #include <cfloat>
7 #include <cstdlib>
8 #include <cmath>
9 #include "CoinHelperFunctions.hpp"
10 #include "CoinAbcHelperFunctions.hpp"
11 #include "CoinTypes.hpp"
12 #include "CoinFinite.hpp"
13 #include "CoinAbcCommon.hpp"
14 //#define AVX2 1
15 #if AVX2 == 1
16 #define set_const_v2df(bb, b) \
17 { \
18 double *temp = reinterpret_cast< double * >(&bb); \
19 temp[0] = b; \
20 temp[1] = b; \
21 }
22 #endif
23 #if INLINE_SCATTER == 0
CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)24 void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
25 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
26 const int *COIN_RESTRICT thisIndex,
27 CoinFactorizationDouble *COIN_RESTRICT region)
28 {
29 #if UNROLL_SCATTER == 0
30 for (CoinBigIndex j = number - 1; j >= 0; j--) {
31 CoinSimplexInt iRow = thisIndex[j];
32 CoinFactorizationDouble regionValue = region[iRow];
33 CoinFactorizationDouble value = thisElement[j];
34 assert(value);
35 region[iRow] = regionValue - value * pivotValue;
36 }
37 #elif UNROLL_SCATTER == 1
38 if ((number & 1) != 0) {
39 number--;
40 CoinSimplexInt iRow = thisIndex[number];
41 CoinFactorizationDouble regionValue = region[iRow];
42 CoinFactorizationDouble value = thisElement[number];
43 region[iRow] = regionValue - value * pivotValue;
44 }
45 for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
46 CoinSimplexInt iRow0 = thisIndex[j];
47 CoinSimplexInt iRow1 = thisIndex[j - 1];
48 CoinFactorizationDouble regionValue0 = region[iRow0];
49 CoinFactorizationDouble regionValue1 = region[iRow1];
50 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
51 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
52 }
53 #elif UNROLL_SCATTER == 2
54 if ((number & 1) != 0) {
55 number--;
56 CoinSimplexInt iRow = thisIndex[number];
57 CoinFactorizationDouble regionValue = region[iRow];
58 CoinFactorizationDouble value = thisElement[number];
59 region[iRow] = regionValue - value * pivotValue;
60 }
61 if ((number & 2) != 0) {
62 CoinSimplexInt iRow0 = thisIndex[number - 1];
63 CoinFactorizationDouble regionValue0 = region[iRow0];
64 CoinFactorizationDouble value0 = thisElement[number - 1];
65 CoinSimplexInt iRow1 = thisIndex[number - 2];
66 CoinFactorizationDouble regionValue1 = region[iRow1];
67 CoinFactorizationDouble value1 = thisElement[number - 2];
68 region[iRow0] = regionValue0 - value0 * pivotValue;
69 region[iRow1] = regionValue1 - value1 * pivotValue;
70 number -= 2;
71 }
72 for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
73 CoinSimplexInt iRow0 = thisIndex[j];
74 CoinSimplexInt iRow1 = thisIndex[j - 1];
75 CoinFactorizationDouble regionValue0 = region[iRow0];
76 CoinFactorizationDouble regionValue1 = region[iRow1];
77 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
78 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
79 CoinSimplexInt iRow2 = thisIndex[j - 2];
80 CoinSimplexInt iRow3 = thisIndex[j - 3];
81 CoinFactorizationDouble regionValue2 = region[iRow2];
82 CoinFactorizationDouble regionValue3 = region[iRow3];
83 region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
84 region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
85 }
86 #elif UNROLL_SCATTER == 3
87 CoinSimplexInt iRow0;
88 CoinSimplexInt iRow1;
89 CoinFactorizationDouble regionValue0;
90 CoinFactorizationDouble regionValue1;
91 switch (static_cast< unsigned int >(number)) {
92 case 0:
93 break;
94 case 1:
95 iRow0 = thisIndex[0];
96 regionValue0 = region[iRow0];
97 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
98 break;
99 case 2:
100 iRow0 = thisIndex[0];
101 iRow1 = thisIndex[1];
102 regionValue0 = region[iRow0];
103 regionValue1 = region[iRow1];
104 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
105 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
106 break;
107 case 3:
108 iRow0 = thisIndex[0];
109 iRow1 = thisIndex[1];
110 regionValue0 = region[iRow0];
111 regionValue1 = region[iRow1];
112 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
113 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
114 iRow0 = thisIndex[2];
115 regionValue0 = region[iRow0];
116 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
117 break;
118 case 4:
119 iRow0 = thisIndex[0];
120 iRow1 = thisIndex[1];
121 regionValue0 = region[iRow0];
122 regionValue1 = region[iRow1];
123 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
124 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
125 iRow0 = thisIndex[2];
126 iRow1 = thisIndex[3];
127 regionValue0 = region[iRow0];
128 regionValue1 = region[iRow1];
129 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
130 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
131 break;
132 case 5:
133 iRow0 = thisIndex[0];
134 iRow1 = thisIndex[1];
135 regionValue0 = region[iRow0];
136 regionValue1 = region[iRow1];
137 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
138 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
139 iRow0 = thisIndex[2];
140 iRow1 = thisIndex[3];
141 regionValue0 = region[iRow0];
142 regionValue1 = region[iRow1];
143 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
144 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
145 iRow0 = thisIndex[4];
146 regionValue0 = region[iRow0];
147 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
148 break;
149 case 6:
150 iRow0 = thisIndex[0];
151 iRow1 = thisIndex[1];
152 regionValue0 = region[iRow0];
153 regionValue1 = region[iRow1];
154 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
155 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
156 iRow0 = thisIndex[2];
157 iRow1 = thisIndex[3];
158 regionValue0 = region[iRow0];
159 regionValue1 = region[iRow1];
160 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
161 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
162 iRow0 = thisIndex[4];
163 iRow1 = thisIndex[5];
164 regionValue0 = region[iRow0];
165 regionValue1 = region[iRow1];
166 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
167 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
168 break;
169 case 7:
170 iRow0 = thisIndex[0];
171 iRow1 = thisIndex[1];
172 regionValue0 = region[iRow0];
173 regionValue1 = region[iRow1];
174 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
175 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
176 iRow0 = thisIndex[2];
177 iRow1 = thisIndex[3];
178 regionValue0 = region[iRow0];
179 regionValue1 = region[iRow1];
180 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
181 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
182 iRow0 = thisIndex[4];
183 iRow1 = thisIndex[5];
184 regionValue0 = region[iRow0];
185 regionValue1 = region[iRow1];
186 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
187 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
188 iRow0 = thisIndex[6];
189 regionValue0 = region[iRow0];
190 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
191 break;
192 case 8:
193 iRow0 = thisIndex[0];
194 iRow1 = thisIndex[1];
195 regionValue0 = region[iRow0];
196 regionValue1 = region[iRow1];
197 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
198 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
199 iRow0 = thisIndex[2];
200 iRow1 = thisIndex[3];
201 regionValue0 = region[iRow0];
202 regionValue1 = region[iRow1];
203 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
204 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
205 iRow0 = thisIndex[4];
206 iRow1 = thisIndex[5];
207 regionValue0 = region[iRow0];
208 regionValue1 = region[iRow1];
209 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
210 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
211 iRow0 = thisIndex[6];
212 iRow1 = thisIndex[7];
213 regionValue0 = region[iRow0];
214 regionValue1 = region[iRow1];
215 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
216 region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
217 break;
218 default:
219 if ((number & 1) != 0) {
220 number--;
221 CoinSimplexInt iRow = thisIndex[number];
222 CoinFactorizationDouble regionValue = region[iRow];
223 CoinFactorizationDouble value = thisElement[number];
224 region[iRow] = regionValue - value * pivotValue;
225 }
226 for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
227 CoinSimplexInt iRow0 = thisIndex[j];
228 CoinSimplexInt iRow1 = thisIndex[j - 1];
229 CoinFactorizationDouble regionValue0 = region[iRow0];
230 CoinFactorizationDouble regionValue1 = region[iRow1];
231 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
232 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
233 }
234 break;
235 }
236 #endif
237 }
238 #endif
239 #if INLINE_GATHER == 0
240 CoinFactorizationDouble
CoinAbcGatherUpdate(CoinSimplexInt number,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)241 CoinAbcGatherUpdate(CoinSimplexInt number,
242 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
243 const int *COIN_RESTRICT thisIndex,
244 CoinFactorizationDouble *COIN_RESTRICT region)
245 {
246 #if UNROLL_GATHER == 0
247 CoinFactorizationDouble pivotValue = 0.0;
248 for (CoinBigIndex j = 0; j < number; j++) {
249 CoinFactorizationDouble value = thisElement[j];
250 CoinSimplexInt jRow = thisIndex[j];
251 value *= region[jRow];
252 pivotValue -= value;
253 }
254 return pivotValue;
255 #else
256 #error code
257 #endif
258 }
259 #endif
260 #if INLINE_MULTIPLY_ADD == 0
CoinAbcMultiplyIndexed(int number,const CoinFactorizationDouble * COIN_RESTRICT multiplier,const int * COIN_RESTRICT thisIndex,CoinSimplexDouble * COIN_RESTRICT region)261 void CoinAbcMultiplyIndexed(int number,
262 const CoinFactorizationDouble *COIN_RESTRICT multiplier,
263 const int *COIN_RESTRICT thisIndex,
264 CoinSimplexDouble *COIN_RESTRICT region)
265 {
266 #ifndef INTEL_COMPILER
267 // was #pragma simd
268 #endif
269 #if UNROLL_MULTIPLY_ADD == 0
270 for (CoinSimplexInt i = 0; i < number; i++) {
271 CoinSimplexInt iRow = thisIndex[i];
272 CoinSimplexDouble value = region[iRow];
273 value *= multiplier[iRow];
274 region[iRow] = value;
275 }
276 #else
277 #error code
278 #endif
279 }
CoinAbcMultiplyIndexed(int number,const long double * COIN_RESTRICT multiplier,const int * COIN_RESTRICT thisIndex,long double * COIN_RESTRICT region)280 void CoinAbcMultiplyIndexed(int number,
281 const long double *COIN_RESTRICT multiplier,
282 const int *COIN_RESTRICT thisIndex,
283 long double *COIN_RESTRICT region)
284 {
285 #ifndef INTEL_COMPILER
286 // was #pragma simd
287 #endif
288 #if UNROLL_MULTIPLY_ADD == 0
289 for (CoinSimplexInt i = 0; i < number; i++) {
290 CoinSimplexInt iRow = thisIndex[i];
291 long double value = region[iRow];
292 value *= multiplier[iRow];
293 region[iRow] = value;
294 }
295 #else
296 #error code
297 #endif
298 }
299 #endif
300 double
CoinAbcMaximumAbsElement(const double * region,int sizeIn)301 CoinAbcMaximumAbsElement(const double *region, int sizeIn)
302 {
303 int size = sizeIn;
304 double maxValue = 0.0;
305 //printf("a\n");
306 #ifndef INTEL_COMPILER
307 // was #pragma simd
308 #endif
309 /*cilk_*/ for (int i = 0; i < size; i++)
310 maxValue = CoinMax(maxValue, fabs(region[i]));
311 return maxValue;
312 }
CoinAbcMinMaxAbsElement(const double * region,int sizeIn,double & minimum,double & maximum)313 void CoinAbcMinMaxAbsElement(const double *region, int sizeIn, double &minimum, double &maximum)
314 {
315 double minValue = minimum;
316 double maxValue = maximum;
317 int size = sizeIn;
318 //printf("b\n");
319 #ifndef INTEL_COMPILER
320 // was #pragma simd
321 #endif
322 /*cilk_*/ for (int i = 0; i < size; i++) {
323 double value = fabs(region[i]);
324 minValue = CoinMin(value, minValue);
325 maxValue = CoinMax(value, maxValue);
326 }
327 minimum = minValue;
328 maximum = maxValue;
329 }
CoinAbcMinMaxAbsNormalValues(const double * region,int sizeIn,double & minimum,double & maximum)330 void CoinAbcMinMaxAbsNormalValues(const double *region, int sizeIn, double &minimum, double &maximum)
331 {
332 int size = sizeIn;
333 double minValue = minimum;
334 double maxValue = maximum;
335 #define NORMAL_LOW_VALUE 1.0e-8
336 #define NORMAL_HIGH_VALUE 1.0e8
337 //printf("c\n");
338 #ifndef INTEL_COMPILER
339 // was #pragma simd
340 #endif
341 /*cilk_*/ for (int i = 0; i < size; i++) {
342 double value = fabs(region[i]);
343 if (value > NORMAL_LOW_VALUE && value < NORMAL_HIGH_VALUE) {
344 minValue = CoinMin(value, minValue);
345 maxValue = CoinMax(value, maxValue);
346 }
347 }
348 minimum = minValue;
349 maximum = maxValue;
350 }
CoinAbcScale(double * region,double multiplier,int sizeIn)351 void CoinAbcScale(double *region, double multiplier, int sizeIn)
352 {
353 int size = sizeIn;
354 // used printf("d\n");
355 #ifndef INTEL_COMPILER
356 // was #pragma simd
357 #endif
358 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
359 cilk_for(int i = 0; i < size; i++)
360 {
361 region[i] *= multiplier;
362 }
363 }
CoinAbcScaleNormalValues(double * region,double multiplier,double killIfLessThanThis,int sizeIn)364 void CoinAbcScaleNormalValues(double *region, double multiplier, double killIfLessThanThis, int sizeIn)
365 {
366 int size = sizeIn;
367 // used printf("e\n");
368 #ifndef INTEL_COMPILER
369 // was #pragma simd
370 #endif
371 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
372 cilk_for(int i = 0; i < size; i++)
373 {
374 double value = fabs(region[i]);
375 if (value > killIfLessThanThis) {
376 if (value != COIN_DBL_MAX) {
377 value *= multiplier;
378 if (value > killIfLessThanThis)
379 region[i] *= multiplier;
380 else
381 region[i] = 0.0;
382 }
383 } else {
384 region[i] = 0.0;
385 }
386 }
387 }
388 // maximum fabs(region[i]) and then region[i]*=multiplier
389 double
CoinAbcMaximumAbsElementAndScale(double * region,double multiplier,int sizeIn)390 CoinAbcMaximumAbsElementAndScale(double *region, double multiplier, int sizeIn)
391 {
392 double maxValue = 0.0;
393 int size = sizeIn;
394 //printf("f\n");
395 #ifndef INTEL_COMPILER
396 // was #pragma simd
397 #endif
398 /*cilk_*/ for (int i = 0; i < size; i++) {
399 double value = fabs(region[i]);
400 region[i] *= multiplier;
401 maxValue = CoinMax(value, maxValue);
402 }
403 return maxValue;
404 }
CoinAbcSetElements(double * region,int sizeIn,double value)405 void CoinAbcSetElements(double *region, int sizeIn, double value)
406 {
407 int size = sizeIn;
408 printf("g\n");
409 #ifndef INTEL_COMPILER
410 // was #pragma simd
411 #endif
412 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
413 cilk_for(int i = 0; i < size; i++)
414 region[i]
415 = value;
416 }
CoinAbcMultiplyAdd(const double * region1,int sizeIn,double multiplier1,double * regionChanged,double multiplier2)417 void CoinAbcMultiplyAdd(const double *region1, int sizeIn, double multiplier1,
418 double *regionChanged, double multiplier2)
419 {
420 //printf("h\n");
421 int size = sizeIn;
422 if (multiplier1 == 1.0) {
423 if (multiplier2 == 1.0) {
424 #ifndef INTEL_COMPILER
425 // was #pragma simd
426 #endif
427 /*cilk_*/ for (int i = 0; i < size; i++)
428 regionChanged[i] = region1[i] + regionChanged[i];
429 } else if (multiplier2 == -1.0) {
430 #ifndef INTEL_COMPILER
431 // was #pragma simd
432 #endif
433 /*cilk_*/ for (int i = 0; i < size; i++)
434 regionChanged[i] = region1[i] - regionChanged[i];
435 } else if (multiplier2 == 0.0) {
436 #ifndef INTEL_COMPILER
437 // was #pragma simd
438 #endif
439 /*cilk_*/ for (int i = 0; i < size; i++)
440 regionChanged[i] = region1[i];
441 } else {
442 #ifndef INTEL_COMPILER
443 // was #pragma simd
444 #endif
445 /*cilk_*/ for (int i = 0; i < size; i++)
446 regionChanged[i] = region1[i] + multiplier2 * regionChanged[i];
447 }
448 } else if (multiplier1 == -1.0) {
449 if (multiplier2 == 1.0) {
450 #ifndef INTEL_COMPILER
451 // was #pragma simd
452 #endif
453 /*cilk_*/ for (int i = 0; i < size; i++)
454 regionChanged[i] = -region1[i] + regionChanged[i];
455 } else if (multiplier2 == -1.0) {
456 #ifndef INTEL_COMPILER
457 // was #pragma simd
458 #endif
459 /*cilk_*/ for (int i = 0; i < size; i++)
460 regionChanged[i] = -region1[i] - regionChanged[i];
461 } else if (multiplier2 == 0.0) {
462 #ifndef INTEL_COMPILER
463 // was #pragma simd
464 #endif
465 /*cilk_*/ for (int i = 0; i < size; i++)
466 regionChanged[i] = -region1[i];
467 } else {
468 #ifndef INTEL_COMPILER
469 // was #pragma simd
470 #endif
471 /*cilk_*/ for (int i = 0; i < size; i++)
472 regionChanged[i] = -region1[i] + multiplier2 * regionChanged[i];
473 }
474 } else if (multiplier1 == 0.0) {
475 if (multiplier2 == 1.0) {
476 // nothing to do
477 } else if (multiplier2 == -1.0) {
478 #ifndef INTEL_COMPILER
479 // was #pragma simd
480 #endif
481 /*cilk_*/ for (int i = 0; i < size; i++)
482 regionChanged[i] = -regionChanged[i];
483 } else if (multiplier2 == 0.0) {
484 #ifndef INTEL_COMPILER
485 // was #pragma simd
486 #endif
487 /*cilk_*/ for (int i = 0; i < size; i++)
488 regionChanged[i] = 0.0;
489 } else {
490 #ifndef INTEL_COMPILER
491 // was #pragma simd
492 #endif
493 /*cilk_*/ for (int i = 0; i < size; i++)
494 regionChanged[i] = multiplier2 * regionChanged[i];
495 }
496 } else {
497 if (multiplier2 == 1.0) {
498 #ifndef INTEL_COMPILER
499 // was #pragma simd
500 #endif
501 /*cilk_*/ for (int i = 0; i < size; i++)
502 regionChanged[i] = multiplier1 * region1[i] + regionChanged[i];
503 } else if (multiplier2 == -1.0) {
504 #ifndef INTEL_COMPILER
505 // was #pragma simd
506 #endif
507 /*cilk_*/ for (int i = 0; i < size; i++)
508 regionChanged[i] = multiplier1 * region1[i] - regionChanged[i];
509 } else if (multiplier2 == 0.0) {
510 #ifndef INTEL_COMPILER
511 // was #pragma simd
512 #endif
513 /*cilk_*/ for (int i = 0; i < size; i++)
514 regionChanged[i] = multiplier1 * region1[i];
515 } else {
516 #ifndef INTEL_COMPILER
517 // was #pragma simd
518 #endif
519 /*cilk_*/ for (int i = 0; i < size; i++)
520 regionChanged[i] = multiplier1 * region1[i] + multiplier2 * regionChanged[i];
521 }
522 }
523 }
524 double
CoinAbcInnerProduct(const double * region1,int sizeIn,const double * region2)525 CoinAbcInnerProduct(const double *region1, int sizeIn, const double *region2)
526 {
527 int size = sizeIn;
528 //printf("i\n");
529 double value = 0.0;
530 #ifndef INTEL_COMPILER
531 // was #pragma simd
532 #endif
533 /*cilk_*/ for (int i = 0; i < size; i++)
534 value += region1[i] * region2[i];
535 return value;
536 }
CoinAbcGetNorms(const double * region,int sizeIn,double & norm1,double & norm2)537 void CoinAbcGetNorms(const double *region, int sizeIn, double &norm1, double &norm2)
538 {
539 int size = sizeIn;
540 //printf("j\n");
541 norm1 = 0.0;
542 norm2 = 0.0;
543 #ifndef INTEL_COMPILER
544 // was #pragma simd
545 #endif
546 /*cilk_*/ for (int i = 0; i < size; i++) {
547 norm2 += region[i] * region[i];
548 norm1 = CoinMax(norm1, fabs(region[i]));
549 }
550 }
551 // regionTo[index[i]]=regionFrom[i]
CoinAbcScatterTo(const double * regionFrom,double * regionTo,const int * index,int numberIn)552 void CoinAbcScatterTo(const double *regionFrom, double *regionTo, const int *index, int numberIn)
553 {
554 int number = numberIn;
555 // used printf("k\n");
556 #ifndef INTEL_COMPILER
557 // was #pragma simd
558 #endif
559 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
560 cilk_for(int i = 0; i < number; i++)
561 {
562 int k = index[i];
563 regionTo[k] = regionFrom[i];
564 }
565 }
566 // regionTo[i]=regionFrom[index[i]]
CoinAbcGatherFrom(const double * regionFrom,double * regionTo,const int * index,int numberIn)567 void CoinAbcGatherFrom(const double *regionFrom, double *regionTo, const int *index, int numberIn)
568 {
569 int number = numberIn;
570 // used printf("l\n");
571 #ifndef INTEL_COMPILER
572 // was #pragma simd
573 #endif
574 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
575 cilk_for(int i = 0; i < number; i++)
576 {
577 int k = index[i];
578 regionTo[i] = regionFrom[k];
579 }
580 }
581 // regionTo[index[i]]=0.0
CoinAbcScatterZeroTo(double * regionTo,const int * index,int numberIn)582 void CoinAbcScatterZeroTo(double *regionTo, const int *index, int numberIn)
583 {
584 int number = numberIn;
585 // used printf("m\n");
586 #ifndef INTEL_COMPILER
587 // was #pragma simd
588 #endif
589 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
590 cilk_for(int i = 0; i < number; i++)
591 {
592 int k = index[i];
593 regionTo[k] = 0.0;
594 }
595 }
596 // regionTo[indexScatter[indexList[i]]]=regionFrom[indexList[i]]
CoinAbcScatterToList(const double * regionFrom,double * regionTo,const int * indexList,const int * indexScatter,int numberIn)597 void CoinAbcScatterToList(const double *regionFrom, double *regionTo,
598 const int *indexList, const int *indexScatter, int numberIn)
599 {
600 int number = numberIn;
601 //printf("n\n");
602 #ifndef INTEL_COMPILER
603 // was #pragma simd
604 #endif
605 /*cilk_*/ for (int i = 0; i < number; i++) {
606 int j = indexList[i];
607 double value = regionFrom[j];
608 int k = indexScatter[j];
609 regionTo[k] = value;
610 }
611 }
CoinAbcInverseSqrts(double * array,int nIn)612 void CoinAbcInverseSqrts(double *array, int nIn)
613 {
614 int n = nIn;
615 // used printf("o\n");
616 #ifndef INTEL_COMPILER
617 // was #pragma simd
618 #endif
619 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
620 cilk_for(int i = 0; i < n; i++)
621 array[i]
622 = 1.0 / sqrt(array[i]);
623 }
CoinAbcReciprocal(double * array,int nIn,const double * input)624 void CoinAbcReciprocal(double *array, int nIn, const double *input)
625 {
626 int n = nIn;
627 // used printf("p\n");
628 #ifndef INTEL_COMPILER
629 // was #pragma simd
630 #endif
631 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
632 cilk_for(int i = 0; i < n; i++)
633 array[i]
634 = 1.0 / input[i];
635 }
CoinAbcMemcpyLong(double * array,const double * arrayFrom,int size)636 void CoinAbcMemcpyLong(double *array, const double *arrayFrom, int size)
637 {
638 memcpy(array, arrayFrom, size * sizeof(double));
639 }
CoinAbcMemcpyLong(int * array,const int * arrayFrom,int size)640 void CoinAbcMemcpyLong(int *array, const int *arrayFrom, int size)
641 {
642 memcpy(array, arrayFrom, size * sizeof(int));
643 }
CoinAbcMemcpyLong(unsigned char * array,const unsigned char * arrayFrom,int size)644 void CoinAbcMemcpyLong(unsigned char *array, const unsigned char *arrayFrom, int size)
645 {
646 memcpy(array, arrayFrom, size * sizeof(unsigned char));
647 }
CoinAbcMemset0Long(double * array,int size)648 void CoinAbcMemset0Long(double *array, int size)
649 {
650 memset(array, 0, size * sizeof(double));
651 }
CoinAbcMemset0Long(int * array,int size)652 void CoinAbcMemset0Long(int *array, int size)
653 {
654 memset(array, 0, size * sizeof(int));
655 }
CoinAbcMemset0Long(unsigned char * array,int size)656 void CoinAbcMemset0Long(unsigned char *array, int size)
657 {
658 memset(array, 0, size * sizeof(unsigned char));
659 }
CoinAbcMemmove(double * array,const double * arrayFrom,int size)660 void CoinAbcMemmove(double *array, const double *arrayFrom, int size)
661 {
662 memmove(array, arrayFrom, size * sizeof(double));
663 }
CoinAbcMemmove(int * array,const int * arrayFrom,int size)664 void CoinAbcMemmove(int *array, const int *arrayFrom, int size)
665 {
666 memmove(array, arrayFrom, size * sizeof(int));
667 }
CoinAbcMemmove(unsigned char * array,const unsigned char * arrayFrom,int size)668 void CoinAbcMemmove(unsigned char *array, const unsigned char *arrayFrom, int size)
669 {
670 memmove(array, arrayFrom, size * sizeof(unsigned char));
671 }
672 // This moves down and zeroes out end
CoinAbcMemmoveAndZero(double * array,double * arrayFrom,int size)673 void CoinAbcMemmoveAndZero(double *array, double *arrayFrom, int size)
674 {
675 assert(arrayFrom - array >= 0);
676 memmove(array, arrayFrom, size * sizeof(double));
677 double *endArray = array + size;
678 double *endArrayFrom = arrayFrom + size;
679 double *startZero = CoinMax(arrayFrom, endArray);
680 memset(startZero, 0, (endArrayFrom - startZero) * sizeof(double));
681 }
682 // This compacts several sections and zeroes out end
CoinAbcCompact(int numberSections,int alreadyDone,double * array,const int * starts,const int * lengths)683 int CoinAbcCompact(int numberSections, int alreadyDone, double *array, const int *starts, const int *lengths)
684 {
685 int totalLength = alreadyDone;
686 for (int i = 0; i < numberSections; i++) {
687 totalLength += lengths[i];
688 }
689 for (int i = 0; i < numberSections; i++) {
690 int start = starts[i];
691 int length = lengths[i];
692 int end = start + length;
693 memmove(array + alreadyDone, array + start, length * sizeof(double));
694 if (end > totalLength) {
695 start = CoinMax(start, totalLength);
696 memset(array + start, 0, (end - start) * sizeof(double));
697 }
698 alreadyDone += length;
699 }
700 return totalLength;
701 }
702 // This compacts several sections
CoinAbcCompact(int numberSections,int alreadyDone,int * array,const int * starts,const int * lengths)703 int CoinAbcCompact(int numberSections, int alreadyDone, int *array, const int *starts, const int *lengths)
704 {
705 for (int i = 0; i < numberSections; i++) {
706 memmove(array + alreadyDone, array + starts[i], lengths[i] * sizeof(int));
707 alreadyDone += lengths[i];
708 }
709 return alreadyDone;
710 }
CoinAbcScatterUpdate0(int number,CoinFactorizationDouble,const CoinFactorizationDouble * COIN_RESTRICT,const int * COIN_RESTRICT,CoinFactorizationDouble * COIN_RESTRICT)711 void CoinAbcScatterUpdate0(int number, CoinFactorizationDouble /*pivotValue*/,
712 const CoinFactorizationDouble *COIN_RESTRICT /*thisElement*/,
713 const int *COIN_RESTRICT /*thisIndex*/,
714 CoinFactorizationDouble *COIN_RESTRICT /*region*/)
715 {
716 assert(number == 0);
717 }
CoinAbcScatterUpdate1(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)718 void CoinAbcScatterUpdate1(int number, CoinFactorizationDouble pivotValue,
719 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
720 const int *COIN_RESTRICT thisIndex,
721 CoinFactorizationDouble *COIN_RESTRICT region)
722 {
723 assert(number == 1);
724 int iRow0 = thisIndex[0];
725 CoinFactorizationDouble regionValue0 = region[iRow0];
726 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
727 }
CoinAbcScatterUpdate2(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)728 void CoinAbcScatterUpdate2(int number, CoinFactorizationDouble pivotValue,
729 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
730 const int *COIN_RESTRICT thisIndex,
731 CoinFactorizationDouble *COIN_RESTRICT region)
732 {
733 assert(number == 2);
734 int iRow0 = thisIndex[0];
735 int iRow1 = thisIndex[1];
736 CoinFactorizationDouble regionValue0 = region[iRow0];
737 CoinFactorizationDouble regionValue1 = region[iRow1];
738 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
739 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
740 }
CoinAbcScatterUpdate3(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)741 void CoinAbcScatterUpdate3(int number, CoinFactorizationDouble pivotValue,
742 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
743 const int *COIN_RESTRICT thisIndex,
744 CoinFactorizationDouble *COIN_RESTRICT region)
745 {
746 assert(number == 3);
747 int iRow0 = thisIndex[0];
748 int iRow1 = thisIndex[1];
749 CoinFactorizationDouble regionValue0 = region[iRow0];
750 CoinFactorizationDouble regionValue1 = region[iRow1];
751 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
752 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
753 iRow0 = thisIndex[2];
754 regionValue0 = region[iRow0];
755 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
756 }
CoinAbcScatterUpdate4(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)757 void CoinAbcScatterUpdate4(int number, CoinFactorizationDouble pivotValue,
758 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
759 const int *COIN_RESTRICT thisIndex,
760 CoinFactorizationDouble *COIN_RESTRICT region)
761 {
762 assert(number == 4);
763 int iRow0 = thisIndex[0];
764 int iRow1 = thisIndex[1];
765 CoinFactorizationDouble regionValue0 = region[iRow0];
766 CoinFactorizationDouble regionValue1 = region[iRow1];
767 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
768 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
769 iRow0 = thisIndex[2];
770 iRow1 = thisIndex[3];
771 regionValue0 = region[iRow0];
772 regionValue1 = region[iRow1];
773 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
774 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
775 }
CoinAbcScatterUpdate5(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)776 void CoinAbcScatterUpdate5(int number, CoinFactorizationDouble pivotValue,
777 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
778 const int *COIN_RESTRICT thisIndex,
779 CoinFactorizationDouble *COIN_RESTRICT region)
780 {
781 assert(number == 5);
782 int iRow0 = thisIndex[0];
783 int iRow1 = thisIndex[1];
784 CoinFactorizationDouble regionValue0 = region[iRow0];
785 CoinFactorizationDouble regionValue1 = region[iRow1];
786 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
787 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
788 iRow0 = thisIndex[2];
789 iRow1 = thisIndex[3];
790 regionValue0 = region[iRow0];
791 regionValue1 = region[iRow1];
792 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
793 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
794 iRow0 = thisIndex[4];
795 regionValue0 = region[iRow0];
796 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
797 }
CoinAbcScatterUpdate6(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)798 void CoinAbcScatterUpdate6(int number, CoinFactorizationDouble pivotValue,
799 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
800 const int *COIN_RESTRICT thisIndex,
801 CoinFactorizationDouble *COIN_RESTRICT region)
802 {
803 assert(number == 6);
804 int iRow0 = thisIndex[0];
805 int iRow1 = thisIndex[1];
806 CoinFactorizationDouble regionValue0 = region[iRow0];
807 CoinFactorizationDouble regionValue1 = region[iRow1];
808 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
809 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
810 iRow0 = thisIndex[2];
811 iRow1 = thisIndex[3];
812 regionValue0 = region[iRow0];
813 regionValue1 = region[iRow1];
814 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
815 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
816 iRow0 = thisIndex[4];
817 iRow1 = thisIndex[5];
818 regionValue0 = region[iRow0];
819 regionValue1 = region[iRow1];
820 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
821 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
822 }
CoinAbcScatterUpdate7(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)823 void CoinAbcScatterUpdate7(int number, CoinFactorizationDouble pivotValue,
824 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
825 const int *COIN_RESTRICT thisIndex,
826 CoinFactorizationDouble *COIN_RESTRICT region)
827 {
828 assert(number == 7);
829 int iRow0 = thisIndex[0];
830 int iRow1 = thisIndex[1];
831 CoinFactorizationDouble regionValue0 = region[iRow0];
832 CoinFactorizationDouble regionValue1 = region[iRow1];
833 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
834 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
835 iRow0 = thisIndex[2];
836 iRow1 = thisIndex[3];
837 regionValue0 = region[iRow0];
838 regionValue1 = region[iRow1];
839 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
840 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
841 iRow0 = thisIndex[4];
842 iRow1 = thisIndex[5];
843 regionValue0 = region[iRow0];
844 regionValue1 = region[iRow1];
845 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
846 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
847 iRow0 = thisIndex[6];
848 regionValue0 = region[iRow0];
849 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
850 }
CoinAbcScatterUpdate8(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)851 void CoinAbcScatterUpdate8(int number, CoinFactorizationDouble pivotValue,
852 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
853 const int *COIN_RESTRICT thisIndex,
854 CoinFactorizationDouble *COIN_RESTRICT region)
855 {
856 assert(number == 8);
857 int iRow0 = thisIndex[0];
858 int iRow1 = thisIndex[1];
859 CoinFactorizationDouble regionValue0 = region[iRow0];
860 CoinFactorizationDouble regionValue1 = region[iRow1];
861 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
862 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
863 iRow0 = thisIndex[2];
864 iRow1 = thisIndex[3];
865 regionValue0 = region[iRow0];
866 regionValue1 = region[iRow1];
867 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
868 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
869 iRow0 = thisIndex[4];
870 iRow1 = thisIndex[5];
871 regionValue0 = region[iRow0];
872 regionValue1 = region[iRow1];
873 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
874 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
875 iRow0 = thisIndex[6];
876 iRow1 = thisIndex[7];
877 regionValue0 = region[iRow0];
878 regionValue1 = region[iRow1];
879 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
880 region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
881 }
CoinAbcScatterUpdate4N(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)882 void CoinAbcScatterUpdate4N(int number, CoinFactorizationDouble pivotValue,
883 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
884 const int *COIN_RESTRICT thisIndex,
885 CoinFactorizationDouble *COIN_RESTRICT region)
886 {
887 assert((number & 3) == 0);
888 for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
889 CoinSimplexInt iRow0 = thisIndex[j];
890 CoinSimplexInt iRow1 = thisIndex[j - 1];
891 CoinFactorizationDouble regionValue0 = region[iRow0];
892 CoinFactorizationDouble regionValue1 = region[iRow1];
893 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
894 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
895 iRow0 = thisIndex[j - 2];
896 iRow1 = thisIndex[j - 3];
897 regionValue0 = region[iRow0];
898 regionValue1 = region[iRow1];
899 region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
900 region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
901 }
902 }
CoinAbcScatterUpdate4NPlus1(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)903 void CoinAbcScatterUpdate4NPlus1(int number, CoinFactorizationDouble pivotValue,
904 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
905 const int *COIN_RESTRICT thisIndex,
906 CoinFactorizationDouble *COIN_RESTRICT region)
907 {
908 assert((number & 3) == 1);
909 int iRow0 = thisIndex[number - 1];
910 CoinFactorizationDouble regionValue0 = region[iRow0];
911 region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
912 number -= 1;
913 for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
914 CoinSimplexInt iRow0 = thisIndex[j];
915 CoinSimplexInt iRow1 = thisIndex[j - 1];
916 CoinFactorizationDouble regionValue0 = region[iRow0];
917 CoinFactorizationDouble regionValue1 = region[iRow1];
918 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
919 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
920 iRow0 = thisIndex[j - 2];
921 iRow1 = thisIndex[j - 3];
922 regionValue0 = region[iRow0];
923 regionValue1 = region[iRow1];
924 region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
925 region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
926 }
927 }
CoinAbcScatterUpdate4NPlus2(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)928 void CoinAbcScatterUpdate4NPlus2(int number, CoinFactorizationDouble pivotValue,
929 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
930 const int *COIN_RESTRICT thisIndex,
931 CoinFactorizationDouble *COIN_RESTRICT region)
932 {
933 assert((number & 3) == 2);
934 int iRow0 = thisIndex[number - 1];
935 int iRow1 = thisIndex[number - 2];
936 CoinFactorizationDouble regionValue0 = region[iRow0];
937 CoinFactorizationDouble regionValue1 = region[iRow1];
938 region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
939 region[iRow1] = regionValue1 - thisElement[number - 2] * pivotValue;
940 number -= 2;
941 for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
942 CoinSimplexInt iRow0 = thisIndex[j];
943 CoinSimplexInt iRow1 = thisIndex[j - 1];
944 CoinFactorizationDouble regionValue0 = region[iRow0];
945 CoinFactorizationDouble regionValue1 = region[iRow1];
946 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
947 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
948 iRow0 = thisIndex[j - 2];
949 iRow1 = thisIndex[j - 3];
950 regionValue0 = region[iRow0];
951 regionValue1 = region[iRow1];
952 region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
953 region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
954 }
955 }
CoinAbcScatterUpdate4NPlus3(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)956 void CoinAbcScatterUpdate4NPlus3(int number, CoinFactorizationDouble pivotValue,
957 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
958 const int *COIN_RESTRICT thisIndex,
959 CoinFactorizationDouble *COIN_RESTRICT region)
960 {
961 assert((number & 3) == 3);
962 int iRow0 = thisIndex[number - 1];
963 int iRow1 = thisIndex[number - 2];
964 CoinFactorizationDouble regionValue0 = region[iRow0];
965 CoinFactorizationDouble regionValue1 = region[iRow1];
966 region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
967 region[iRow1] = regionValue1 - thisElement[number - 2] * pivotValue;
968 iRow0 = thisIndex[number - 3];
969 regionValue0 = region[iRow0];
970 region[iRow0] = regionValue0 - thisElement[number - 3] * pivotValue;
971 number -= 3;
972 for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
973 CoinSimplexInt iRow0 = thisIndex[j];
974 CoinSimplexInt iRow1 = thisIndex[j - 1];
975 CoinFactorizationDouble regionValue0 = region[iRow0];
976 CoinFactorizationDouble regionValue1 = region[iRow1];
977 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
978 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
979 iRow0 = thisIndex[j - 2];
980 iRow1 = thisIndex[j - 3];
981 regionValue0 = region[iRow0];
982 regionValue1 = region[iRow1];
983 region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
984 region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
985 }
986 }
CoinAbcScatterUpdate0(int numberIn,CoinFactorizationDouble,const CoinFactorizationDouble * COIN_RESTRICT element,CoinFactorizationDouble * COIN_RESTRICT)987 void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble /*multiplier*/,
988 const CoinFactorizationDouble *COIN_RESTRICT element,
989 CoinFactorizationDouble *COIN_RESTRICT /*region*/)
990 {
991 #ifndef NDEBUG
992 assert(numberIn == 0);
993 #endif
994 }
995 // start alternatives
996 #if 0
997 void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier,
998 const CoinFactorizationDouble * COIN_RESTRICT element,
999 CoinFactorizationDouble * COIN_RESTRICT region)
1000 {
1001 #ifndef NDEBUG
1002 assert (numberIn==1);
1003 #endif
1004 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1005 element++;
1006 int iColumn0=thisColumn[0];
1007 CoinFactorizationDouble value0=region[iColumn0];
1008 value0 += multiplier*element[0];
1009 region[iColumn0]=value0;
1010 }
1011 void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier,
1012 const CoinFactorizationDouble * COIN_RESTRICT element,
1013 CoinFactorizationDouble * COIN_RESTRICT region)
1014 {
1015 #ifndef NDEBUG
1016 assert (numberIn==2);
1017 #endif
1018 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1019 #if NEW_CHUNK_SIZE == 2
1020 int nFull=2&(~(NEW_CHUNK_SIZE-1));
1021 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1022 coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1023 int iColumn0=thisColumn[0];
1024 int iColumn1=thisColumn[1];
1025 CoinFactorizationDouble value0=region[iColumn0];
1026 CoinFactorizationDouble value1=region[iColumn1];
1027 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1028 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1029 region[iColumn0]=value0;
1030 region[iColumn1]=value1;
1031 element+=NEW_CHUNK_SIZE_INCREMENT;
1032 thisColumn = reinterpret_cast<const int *>(element);
1033 }
1034 #endif
1035 #if NEW_CHUNK_SIZE == 4
1036 element++;
1037 int iColumn0=thisColumn[0];
1038 int iColumn1=thisColumn[1];
1039 CoinFactorizationDouble value0=region[iColumn0];
1040 CoinFactorizationDouble value1=region[iColumn1];
1041 value0 += multiplier*element[0];
1042 value1 += multiplier*element[1];
1043 region[iColumn0]=value0;
1044 region[iColumn1]=value1;
1045 #endif
1046 }
1047 void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier,
1048 const CoinFactorizationDouble * COIN_RESTRICT element,
1049 CoinFactorizationDouble * COIN_RESTRICT region)
1050 {
1051 #ifndef NDEBUG
1052 assert (numberIn==3);
1053 #endif
1054 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1055 #if NEW_CHUNK_SIZE == 2
1056 int nFull=3&(~(NEW_CHUNK_SIZE-1));
1057 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1058 coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1059 int iColumn0=thisColumn[0];
1060 int iColumn1=thisColumn[1];
1061 CoinFactorizationDouble value0=region[iColumn0];
1062 CoinFactorizationDouble value1=region[iColumn1];
1063 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1064 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1065 region[iColumn0]=value0;
1066 region[iColumn1]=value1;
1067 element+=NEW_CHUNK_SIZE_INCREMENT;
1068 thisColumn = reinterpret_cast<const int *>(element);
1069 }
1070 #endif
1071 #if NEW_CHUNK_SIZE == 2
1072 element++;
1073 int iColumn0=thisColumn[0];
1074 CoinFactorizationDouble value0=region[iColumn0];
1075 value0 += multiplier*element[0];
1076 region[iColumn0]=value0;
1077 #else
1078 element+=2;
1079 int iColumn0=thisColumn[0];
1080 int iColumn1=thisColumn[1];
1081 int iColumn2=thisColumn[2];
1082 CoinFactorizationDouble value0=region[iColumn0];
1083 CoinFactorizationDouble value1=region[iColumn1];
1084 CoinFactorizationDouble value2=region[iColumn2];
1085 value0 += multiplier*element[0];
1086 value1 += multiplier*element[1];
1087 value2 += multiplier*element[2];
1088 region[iColumn0]=value0;
1089 region[iColumn1]=value1;
1090 region[iColumn2]=value2;
1091 #endif
1092 }
1093 void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier,
1094 const CoinFactorizationDouble * COIN_RESTRICT element,
1095 CoinFactorizationDouble * COIN_RESTRICT region)
1096 {
1097 #ifndef NDEBUG
1098 assert (numberIn==4);
1099 #endif
1100 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1101 int nFull=4&(~(NEW_CHUNK_SIZE-1));
1102 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1103 //coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1104 #if NEW_CHUNK_SIZE == 2
1105 int iColumn0=thisColumn[0];
1106 int iColumn1=thisColumn[1];
1107 CoinFactorizationDouble value0=region[iColumn0];
1108 CoinFactorizationDouble value1=region[iColumn1];
1109 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1110 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1111 region[iColumn0]=value0;
1112 region[iColumn1]=value1;
1113 #elif NEW_CHUNK_SIZE == 4
1114 int iColumn0=thisColumn[0];
1115 int iColumn1=thisColumn[1];
1116 int iColumn2=thisColumn[2];
1117 int iColumn3=thisColumn[3];
1118 CoinFactorizationDouble value0=region[iColumn0];
1119 CoinFactorizationDouble value1=region[iColumn1];
1120 CoinFactorizationDouble value2=region[iColumn2];
1121 CoinFactorizationDouble value3=region[iColumn3];
1122 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1123 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1124 value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1125 value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1126 region[iColumn0]=value0;
1127 region[iColumn1]=value1;
1128 region[iColumn2]=value2;
1129 region[iColumn3]=value3;
1130 #else
1131 abort();
1132 #endif
1133 element+=NEW_CHUNK_SIZE_INCREMENT;
1134 thisColumn = reinterpret_cast<const int *>(element);
1135 }
1136 }
1137 void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier,
1138 const CoinFactorizationDouble * COIN_RESTRICT element,
1139 CoinFactorizationDouble * COIN_RESTRICT region)
1140 {
1141 #ifndef NDEBUG
1142 assert (numberIn==5);
1143 #endif
1144 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1145 int nFull=5&(~(NEW_CHUNK_SIZE-1));
1146 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1147 coin_prefetch_const(element+6);
1148 #if NEW_CHUNK_SIZE == 2
1149 int iColumn0=thisColumn[0];
1150 int iColumn1=thisColumn[1];
1151 CoinFactorizationDouble value0=region[iColumn0];
1152 CoinFactorizationDouble value1=region[iColumn1];
1153 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1154 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1155 region[iColumn0]=value0;
1156 region[iColumn1]=value1;
1157 #elif NEW_CHUNK_SIZE == 4
1158 int iColumn0=thisColumn[0];
1159 int iColumn1=thisColumn[1];
1160 int iColumn2=thisColumn[2];
1161 int iColumn3=thisColumn[3];
1162 CoinFactorizationDouble value0=region[iColumn0];
1163 CoinFactorizationDouble value1=region[iColumn1];
1164 CoinFactorizationDouble value2=region[iColumn2];
1165 CoinFactorizationDouble value3=region[iColumn3];
1166 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1167 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1168 value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1169 value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1170 region[iColumn0]=value0;
1171 region[iColumn1]=value1;
1172 region[iColumn2]=value2;
1173 region[iColumn3]=value3;
1174 #else
1175 abort();
1176 #endif
1177 element+=NEW_CHUNK_SIZE_INCREMENT;
1178 thisColumn = reinterpret_cast<const int *>(element);
1179 }
1180 element++;
1181 int iColumn0=thisColumn[0];
1182 CoinFactorizationDouble value0=region[iColumn0];
1183 value0 += multiplier*element[0];
1184 region[iColumn0]=value0;
1185 }
1186 void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier,
1187 const CoinFactorizationDouble * COIN_RESTRICT element,
1188 CoinFactorizationDouble * COIN_RESTRICT region)
1189 {
1190 #ifndef NDEBUG
1191 assert (numberIn==6);
1192 #endif
1193 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1194 int nFull=6&(~(NEW_CHUNK_SIZE-1));
1195 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1196 coin_prefetch_const(element+6);
1197 #if NEW_CHUNK_SIZE == 2
1198 int iColumn0=thisColumn[0];
1199 int iColumn1=thisColumn[1];
1200 CoinFactorizationDouble value0=region[iColumn0];
1201 CoinFactorizationDouble value1=region[iColumn1];
1202 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1203 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1204 region[iColumn0]=value0;
1205 region[iColumn1]=value1;
1206 #elif NEW_CHUNK_SIZE == 4
1207 int iColumn0=thisColumn[0];
1208 int iColumn1=thisColumn[1];
1209 int iColumn2=thisColumn[2];
1210 int iColumn3=thisColumn[3];
1211 CoinFactorizationDouble value0=region[iColumn0];
1212 CoinFactorizationDouble value1=region[iColumn1];
1213 CoinFactorizationDouble value2=region[iColumn2];
1214 CoinFactorizationDouble value3=region[iColumn3];
1215 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1216 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1217 value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1218 value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1219 region[iColumn0]=value0;
1220 region[iColumn1]=value1;
1221 region[iColumn2]=value2;
1222 region[iColumn3]=value3;
1223 #else
1224 abort();
1225 #endif
1226 element+=NEW_CHUNK_SIZE_INCREMENT;
1227 thisColumn = reinterpret_cast<const int *>(element);
1228 }
1229 #if NEW_CHUNK_SIZE == 4
1230 element++;
1231 int iColumn0=thisColumn[0];
1232 int iColumn1=thisColumn[1];
1233 CoinFactorizationDouble value0=region[iColumn0];
1234 CoinFactorizationDouble value1=region[iColumn1];
1235 value0 += multiplier*element[0];
1236 value1 += multiplier*element[1];
1237 region[iColumn0]=value0;
1238 region[iColumn1]=value1;
1239 #endif
1240 }
1241 void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier,
1242 const CoinFactorizationDouble * COIN_RESTRICT element,
1243 CoinFactorizationDouble * COIN_RESTRICT region)
1244 {
1245 #ifndef NDEBUG
1246 assert (numberIn==7);
1247 #endif
1248 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1249 int nFull=7&(~(NEW_CHUNK_SIZE-1));
1250 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1251 coin_prefetch_const(element+6);
1252 #if NEW_CHUNK_SIZE == 2
1253 int iColumn0=thisColumn[0];
1254 int iColumn1=thisColumn[1];
1255 CoinFactorizationDouble value0=region[iColumn0];
1256 CoinFactorizationDouble value1=region[iColumn1];
1257 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1258 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1259 region[iColumn0]=value0;
1260 region[iColumn1]=value1;
1261 #elif NEW_CHUNK_SIZE == 4
1262 int iColumn0=thisColumn[0];
1263 int iColumn1=thisColumn[1];
1264 int iColumn2=thisColumn[2];
1265 int iColumn3=thisColumn[3];
1266 CoinFactorizationDouble value0=region[iColumn0];
1267 CoinFactorizationDouble value1=region[iColumn1];
1268 CoinFactorizationDouble value2=region[iColumn2];
1269 CoinFactorizationDouble value3=region[iColumn3];
1270 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1271 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1272 value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1273 value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1274 region[iColumn0]=value0;
1275 region[iColumn1]=value1;
1276 region[iColumn2]=value2;
1277 region[iColumn3]=value3;
1278 #else
1279 abort();
1280 #endif
1281 element+=NEW_CHUNK_SIZE_INCREMENT;
1282 thisColumn = reinterpret_cast<const int *>(element);
1283 }
1284 #if NEW_CHUNK_SIZE == 2
1285 element++;
1286 int iColumn0=thisColumn[0];
1287 CoinFactorizationDouble value0=region[iColumn0];
1288 value0 += multiplier*element[0];
1289 region[iColumn0]=value0;
1290 #else
1291 element+=2;
1292 int iColumn0=thisColumn[0];
1293 int iColumn1=thisColumn[1];
1294 int iColumn2=thisColumn[2];
1295 CoinFactorizationDouble value0=region[iColumn0];
1296 CoinFactorizationDouble value1=region[iColumn1];
1297 CoinFactorizationDouble value2=region[iColumn2];
1298 value0 += multiplier*element[0];
1299 value1 += multiplier*element[1];
1300 value2 += multiplier*element[2];
1301 region[iColumn0]=value0;
1302 region[iColumn1]=value1;
1303 region[iColumn2]=value2;
1304 #endif
1305 }
1306 void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier,
1307 const CoinFactorizationDouble * COIN_RESTRICT element,
1308 CoinFactorizationDouble * COIN_RESTRICT region)
1309 {
1310 #ifndef NDEBUG
1311 assert (numberIn==8);
1312 #endif
1313 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1314 int nFull=8&(~(NEW_CHUNK_SIZE-1));
1315 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1316 coin_prefetch_const(element+6);
1317 #if NEW_CHUNK_SIZE == 2
1318 int iColumn0=thisColumn[0];
1319 int iColumn1=thisColumn[1];
1320 CoinFactorizationDouble value0=region[iColumn0];
1321 CoinFactorizationDouble value1=region[iColumn1];
1322 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1323 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1324 region[iColumn0]=value0;
1325 region[iColumn1]=value1;
1326 #elif NEW_CHUNK_SIZE == 4
1327 int iColumn0=thisColumn[0];
1328 int iColumn1=thisColumn[1];
1329 int iColumn2=thisColumn[2];
1330 int iColumn3=thisColumn[3];
1331 CoinFactorizationDouble value0=region[iColumn0];
1332 CoinFactorizationDouble value1=region[iColumn1];
1333 CoinFactorizationDouble value2=region[iColumn2];
1334 CoinFactorizationDouble value3=region[iColumn3];
1335 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1336 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1337 value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1338 value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1339 region[iColumn0]=value0;
1340 region[iColumn1]=value1;
1341 region[iColumn2]=value2;
1342 region[iColumn3]=value3;
1343 #else
1344 abort();
1345 #endif
1346 element+=NEW_CHUNK_SIZE_INCREMENT;
1347 thisColumn = reinterpret_cast<const int *>(element);
1348 }
1349 }
1350 void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier,
1351 const CoinFactorizationDouble * COIN_RESTRICT element,
1352 CoinFactorizationDouble * COIN_RESTRICT region)
1353 {
1354 assert ((numberIn&3)==0);
1355 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1356 int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1357 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1358 coin_prefetch_const(element+6);
1359 #if NEW_CHUNK_SIZE == 2
1360 int iColumn0=thisColumn[0];
1361 int iColumn1=thisColumn[1];
1362 CoinFactorizationDouble value0=region[iColumn0];
1363 CoinFactorizationDouble value1=region[iColumn1];
1364 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1365 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1366 region[iColumn0]=value0;
1367 region[iColumn1]=value1;
1368 #elif NEW_CHUNK_SIZE == 4
1369 int iColumn0=thisColumn[0];
1370 int iColumn1=thisColumn[1];
1371 int iColumn2=thisColumn[2];
1372 int iColumn3=thisColumn[3];
1373 CoinFactorizationDouble value0=region[iColumn0];
1374 CoinFactorizationDouble value1=region[iColumn1];
1375 CoinFactorizationDouble value2=region[iColumn2];
1376 CoinFactorizationDouble value3=region[iColumn3];
1377 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1378 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1379 value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1380 value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1381 region[iColumn0]=value0;
1382 region[iColumn1]=value1;
1383 region[iColumn2]=value2;
1384 region[iColumn3]=value3;
1385 #else
1386 abort();
1387 #endif
1388 element+=NEW_CHUNK_SIZE_INCREMENT;
1389 thisColumn = reinterpret_cast<const int *>(element);
1390 }
1391 }
1392 void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier,
1393 const CoinFactorizationDouble * COIN_RESTRICT element,
1394 CoinFactorizationDouble * COIN_RESTRICT region)
1395 {
1396 assert ((numberIn&3)==1);
1397 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1398 int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1399 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1400 coin_prefetch_const(element+6);
1401 #if NEW_CHUNK_SIZE == 2
1402 int iColumn0=thisColumn[0];
1403 int iColumn1=thisColumn[1];
1404 CoinFactorizationDouble value0=region[iColumn0];
1405 CoinFactorizationDouble value1=region[iColumn1];
1406 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1407 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1408 region[iColumn0]=value0;
1409 region[iColumn1]=value1;
1410 #elif NEW_CHUNK_SIZE == 4
1411 int iColumn0=thisColumn[0];
1412 int iColumn1=thisColumn[1];
1413 int iColumn2=thisColumn[2];
1414 int iColumn3=thisColumn[3];
1415 CoinFactorizationDouble value0=region[iColumn0];
1416 CoinFactorizationDouble value1=region[iColumn1];
1417 CoinFactorizationDouble value2=region[iColumn2];
1418 CoinFactorizationDouble value3=region[iColumn3];
1419 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1420 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1421 value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1422 value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1423 region[iColumn0]=value0;
1424 region[iColumn1]=value1;
1425 region[iColumn2]=value2;
1426 region[iColumn3]=value3;
1427 #else
1428 abort();
1429 #endif
1430 element+=NEW_CHUNK_SIZE_INCREMENT;
1431 thisColumn = reinterpret_cast<const int *>(element);
1432 }
1433 element++;
1434 int iColumn0=thisColumn[0];
1435 CoinFactorizationDouble value0=region[iColumn0];
1436 value0 += multiplier*element[0];
1437 region[iColumn0]=value0;
1438 }
1439 void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier,
1440 const CoinFactorizationDouble * COIN_RESTRICT element,
1441 CoinFactorizationDouble * COIN_RESTRICT region)
1442 {
1443 assert ((numberIn&3)==2);
1444 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1445 int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1446 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1447 coin_prefetch_const(element+6);
1448 #if NEW_CHUNK_SIZE == 2
1449 int iColumn0=thisColumn[0];
1450 int iColumn1=thisColumn[1];
1451 CoinFactorizationDouble value0=region[iColumn0];
1452 CoinFactorizationDouble value1=region[iColumn1];
1453 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1454 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1455 region[iColumn0]=value0;
1456 region[iColumn1]=value1;
1457 #elif NEW_CHUNK_SIZE == 4
1458 int iColumn0=thisColumn[0];
1459 int iColumn1=thisColumn[1];
1460 int iColumn2=thisColumn[2];
1461 int iColumn3=thisColumn[3];
1462 CoinFactorizationDouble value0=region[iColumn0];
1463 CoinFactorizationDouble value1=region[iColumn1];
1464 CoinFactorizationDouble value2=region[iColumn2];
1465 CoinFactorizationDouble value3=region[iColumn3];
1466 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1467 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1468 value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1469 value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1470 region[iColumn0]=value0;
1471 region[iColumn1]=value1;
1472 region[iColumn2]=value2;
1473 region[iColumn3]=value3;
1474 #else
1475 abort();
1476 #endif
1477 element+=NEW_CHUNK_SIZE_INCREMENT;
1478 thisColumn = reinterpret_cast<const int *>(element);
1479 }
1480 #if NEW_CHUNK_SIZE == 4
1481 element++;
1482 int iColumn0=thisColumn[0];
1483 int iColumn1=thisColumn[1];
1484 CoinFactorizationDouble value0=region[iColumn0];
1485 CoinFactorizationDouble value1=region[iColumn1];
1486 value0 += multiplier*element[0];
1487 value1 += multiplier*element[1];
1488 region[iColumn0]=value0;
1489 region[iColumn1]=value1;
1490 #endif
1491 }
1492 void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier,
1493 const CoinFactorizationDouble * COIN_RESTRICT element,
1494 CoinFactorizationDouble * COIN_RESTRICT region)
1495 {
1496 assert ((numberIn&3)==3);
1497 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1498 int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1499 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1500 coin_prefetch_const(element+6);
1501 #if NEW_CHUNK_SIZE == 2
1502 int iColumn0=thisColumn[0];
1503 int iColumn1=thisColumn[1];
1504 CoinFactorizationDouble value0=region[iColumn0];
1505 CoinFactorizationDouble value1=region[iColumn1];
1506 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1507 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1508 region[iColumn0]=value0;
1509 region[iColumn1]=value1;
1510 #elif NEW_CHUNK_SIZE == 4
1511 int iColumn0=thisColumn[0];
1512 int iColumn1=thisColumn[1];
1513 int iColumn2=thisColumn[2];
1514 int iColumn3=thisColumn[3];
1515 CoinFactorizationDouble value0=region[iColumn0];
1516 CoinFactorizationDouble value1=region[iColumn1];
1517 CoinFactorizationDouble value2=region[iColumn2];
1518 CoinFactorizationDouble value3=region[iColumn3];
1519 value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1520 value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1521 value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1522 value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1523 region[iColumn0]=value0;
1524 region[iColumn1]=value1;
1525 region[iColumn2]=value2;
1526 region[iColumn3]=value3;
1527 #else
1528 abort();
1529 #endif
1530 element+=NEW_CHUNK_SIZE_INCREMENT;
1531 thisColumn = reinterpret_cast<const int *>(element);
1532 }
1533 #if NEW_CHUNK_SIZE == 2
1534 element++;
1535 int iColumn0=thisColumn[0];
1536 CoinFactorizationDouble value0=region[iColumn0];
1537 value0 += multiplier*element[0];
1538 region[iColumn0]=value0;
1539 #else
1540 element+=2;
1541 int iColumn0=thisColumn[0];
1542 int iColumn1=thisColumn[1];
1543 int iColumn2=thisColumn[2];
1544 CoinFactorizationDouble value0=region[iColumn0];
1545 CoinFactorizationDouble value1=region[iColumn1];
1546 CoinFactorizationDouble value2=region[iColumn2];
1547 value0 += multiplier*element[0];
1548 value1 += multiplier*element[1];
1549 value2 += multiplier*element[2];
1550 region[iColumn0]=value0;
1551 region[iColumn1]=value1;
1552 region[iColumn2]=value2;
1553 #endif
1554 }
1555 #else
1556 // alternative
1557 #define CACHE_LINE_SIZE 16
1558 #define OPERATION +=
1559 #define functionName(zname) CoinAbc##zname
1560 #define ABC_CREATE_SCATTER_FUNCTION 1
1561 #include "CoinAbcHelperFunctions.hpp"
1562 #undef OPERATION
1563 #undef functionName
1564 #define OPERATION -=
1565 #define functionName(zname) CoinAbc##zname##Subtract
1566 #include "CoinAbcHelperFunctions.hpp"
1567 #undef OPERATION
1568 #undef functionName
1569 #define OPERATION +=
1570 #define functionName(zname) CoinAbc##zname##Add
1571 #include "CoinAbcHelperFunctions.hpp"
1572 scatterUpdate AbcScatterLow[] = {
1573 &CoinAbcScatterUpdate0,
1574 &CoinAbcScatterUpdate1,
1575 &CoinAbcScatterUpdate2,
1576 &CoinAbcScatterUpdate3,
1577 &CoinAbcScatterUpdate4,
1578 &CoinAbcScatterUpdate5,
1579 &CoinAbcScatterUpdate6,
1580 &CoinAbcScatterUpdate7,
1581 &CoinAbcScatterUpdate8
1582 };
1583 scatterUpdate AbcScatterHigh[] = {
1584 &CoinAbcScatterUpdate4N,
1585 &CoinAbcScatterUpdate4NPlus1,
1586 &CoinAbcScatterUpdate4NPlus2,
1587 &CoinAbcScatterUpdate4NPlus3
1588 };
1589 scatterUpdate AbcScatterLowSubtract[] = {
1590 &CoinAbcScatterUpdate0,
1591 &CoinAbcScatterUpdate1Subtract,
1592 &CoinAbcScatterUpdate2Subtract,
1593 &CoinAbcScatterUpdate3Subtract,
1594 &CoinAbcScatterUpdate4Subtract,
1595 &CoinAbcScatterUpdate5Subtract,
1596 &CoinAbcScatterUpdate6Subtract,
1597 &CoinAbcScatterUpdate7Subtract,
1598 &CoinAbcScatterUpdate8Subtract
1599 };
1600 scatterUpdate AbcScatterHighSubtract[] = {
1601 &CoinAbcScatterUpdate4NSubtract,
1602 &CoinAbcScatterUpdate4NPlus1Subtract,
1603 &CoinAbcScatterUpdate4NPlus2Subtract,
1604 &CoinAbcScatterUpdate4NPlus3Subtract
1605 };
1606 scatterUpdate AbcScatterLowAdd[] = {
1607 &CoinAbcScatterUpdate0,
1608 &CoinAbcScatterUpdate1Add,
1609 &CoinAbcScatterUpdate2Add,
1610 &CoinAbcScatterUpdate3Add,
1611 &CoinAbcScatterUpdate4Add,
1612 &CoinAbcScatterUpdate5Add,
1613 &CoinAbcScatterUpdate6Add,
1614 &CoinAbcScatterUpdate7Add,
1615 &CoinAbcScatterUpdate8Add
1616 };
1617 scatterUpdate AbcScatterHighAdd[] = {
1618 &CoinAbcScatterUpdate4NAdd,
1619 &CoinAbcScatterUpdate4NPlus1Add,
1620 &CoinAbcScatterUpdate4NPlus2Add,
1621 &CoinAbcScatterUpdate4NPlus3Add
1622 };
1623 #endif
1624 #include "CoinPragma.hpp"
1625
1626 #include <math.h>
1627
1628 #include <cfloat>
1629 #include <cassert>
1630 #include <string>
1631 #include <stdio.h>
1632 #include <iostream>
1633 #include "CoinAbcCommonFactorization.hpp"
1634 #if 1
1635 #if AVX2 == 1
1636 #include "emmintrin.h"
1637 #elif AVX2 == 2
1638 #include <immintrin.h>
1639 #elif AVX2 == 3
1640 #include "avx2intrin.h"
1641 #endif
1642 #endif
1643 // cilk seems a bit fragile
1644 #define CILK_FRAGILE 0
1645 #if CILK_FRAGILE > 1
1646 #undef cilk_spawn
1647 #undef cilk_sync
1648 #define cilk_spawn
1649 #define cilk_sync
1650 #define ONWARD 0
1651 #elif CILK_FRAGILE == 1
1652 #define ONWARD 0
1653 #else
1654 #define ONWARD 1
1655 #endif
1656 #define BLOCKING1 8 // factorization strip
1657 #define BLOCKING2 8 // dgemm recursive
1658 #define BLOCKING3 32 // dgemm parallel
CoinAbcDgemm(int m,int n,int k,double * COIN_RESTRICT a,int lda,double * COIN_RESTRICT b,double * COIN_RESTRICT c,int parallelMode)1659 void CoinAbcDgemm(int m, int n, int k, double *COIN_RESTRICT a, int lda,
1660 double *COIN_RESTRICT b, double *COIN_RESTRICT c
1661 #if ABC_PARALLEL == 2
1662 ,
1663 int parallelMode
1664 #endif
1665 )
1666 {
1667 assert((m & (BLOCKING8 - 1)) == 0 && (n & (BLOCKING8 - 1)) == 0 && (k & (BLOCKING8 - 1)) == 0);
1668 /* entry for column j and row i (when multiple of BLOCKING8)
1669 is at aBlocked+j*m+i*BLOCKING8
1670 */
1671 // k is always smallish
1672 if (m <= BLOCKING8 && n <= BLOCKING8) {
1673 assert(m == BLOCKING8 && n == BLOCKING8 && k == BLOCKING8);
1674 double *COIN_RESTRICT aBase2 = a;
1675 double *COIN_RESTRICT bBase2 = b;
1676 double *COIN_RESTRICT cBase2 = c;
1677 for (int j = 0; j < BLOCKING8; j++) {
1678 double *COIN_RESTRICT aBase = aBase2;
1679 #if AVX2 != 2
1680 #if 1
1681 double c0 = cBase2[0];
1682 double c1 = cBase2[1];
1683 double c2 = cBase2[2];
1684 double c3 = cBase2[3];
1685 double c4 = cBase2[4];
1686 double c5 = cBase2[5];
1687 double c6 = cBase2[6];
1688 double c7 = cBase2[7];
1689 for (int l = 0; l < BLOCKING8; l++) {
1690 double bValue = bBase2[l];
1691 if (bValue) {
1692 c0 -= bValue * aBase[0];
1693 c1 -= bValue * aBase[1];
1694 c2 -= bValue * aBase[2];
1695 c3 -= bValue * aBase[3];
1696 c4 -= bValue * aBase[4];
1697 c5 -= bValue * aBase[5];
1698 c6 -= bValue * aBase[6];
1699 c7 -= bValue * aBase[7];
1700 }
1701 aBase += BLOCKING8;
1702 }
1703 cBase2[0] = c0;
1704 cBase2[1] = c1;
1705 cBase2[2] = c2;
1706 cBase2[3] = c3;
1707 cBase2[4] = c4;
1708 cBase2[5] = c5;
1709 cBase2[6] = c6;
1710 cBase2[7] = c7;
1711 #else
1712 for (int l = 0; l < BLOCKING8; l++) {
1713 double bValue = bBase2[l];
1714 if (bValue) {
1715 for (int i = 0; i < BLOCKING8; i++) {
1716 cBase2[i] -= bValue * aBase[i];
1717 }
1718 }
1719 aBase += BLOCKING8;
1720 }
1721 #endif
1722 #else
1723 //__m256d c0=_mm256_load_pd(cBase2);
1724 __m256d c0 = *reinterpret_cast< __m256d * >(cBase2);
1725 //__m256d c1=_mm256_load_pd(cBase2+4);
1726 __m256d c1 = *reinterpret_cast< __m256d * >(cBase2 + 4);
1727 for (int l = 0; l < BLOCKING8; l++) {
1728 //__m256d bb = _mm256_broadcast_sd(bBase2+l);
1729 __m256d bb = static_cast< __m256d >(__builtin_ia32_vbroadcastsd256(bBase2 + l));
1730 //__m256d a0 = _mm256_load_pd(aBase);
1731 __m256d a0 = *reinterpret_cast< __m256d * >(aBase);
1732 //__m256d a1 = _mm256_load_pd(aBase+4);
1733 __m256d a1 = *reinterpret_cast< __m256d * >(aBase + 4);
1734 c0 -= bb * a0;
1735 c1 -= bb * a1;
1736 aBase += BLOCKING8;
1737 }
1738 //_mm256_store_pd (cBase2, c0);
1739 *reinterpret_cast< __m256d * >(cBase2) = c0;
1740 //_mm256_store_pd (cBase2+4, c1);
1741 *reinterpret_cast< __m256d * >(cBase2 + 4) = c1;
1742 #endif
1743 bBase2 += BLOCKING8;
1744 cBase2 += BLOCKING8;
1745 }
1746 } else if (m > n) {
1747 // make sure mNew1 multiple of BLOCKING8
1748 #if BLOCKING8 == 8
1749 int mNew1 = ((m + 15) >> 4) << 3;
1750 #elif BLOCKING8 == 4
1751 int mNew1 = ((m + 7) >> 3) << 2;
1752 #elif BLOCKING8 == 2
1753 int mNew1 = ((m + 3) >> 2) << 1;
1754 #else
1755 abort();
1756 #endif
1757 assert(mNew1 > 0 && m - mNew1 > 0);
1758 #if ABC_PARALLEL == 2
1759 if (mNew1 <= BLOCKING3 || !parallelMode) {
1760 #endif
1761 //printf("splitMa mNew1 %d\n",mNew1);
1762 CoinAbcDgemm(mNew1, n, k, a, lda, b, c
1763 #if ABC_PARALLEL == 2
1764 ,
1765 0
1766 #endif
1767 );
1768 //printf("splitMb mNew1 %d\n",mNew1);
1769 CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8
1770 #if ABC_PARALLEL == 2
1771 ,
1772 0
1773 #endif
1774 );
1775 #if ABC_PARALLEL == 2
1776 } else {
1777 //printf("splitMa mNew1 %d\n",mNew1);
1778 cilk_spawn CoinAbcDgemm(mNew1, n, k, a, lda, b, c, ONWARD);
1779 //printf("splitMb mNew1 %d\n",mNew1);
1780 CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8, ONWARD);
1781 cilk_sync;
1782 }
1783 #endif
1784 } else {
1785 // make sure nNew1 multiple of BLOCKING8
1786 #if BLOCKING8 == 8
1787 int nNew1 = ((n + 15) >> 4) << 3;
1788 #elif BLOCKING8 == 4
1789 int nNew1 = ((n + 7) >> 3) << 2;
1790 #elif BLOCKING8 == 2
1791 int nNew1 = ((n + 3) >> 2) << 1;
1792 #else
1793 abort();
1794 #endif
1795 assert(nNew1 > 0 && n - nNew1 > 0);
1796 #if ABC_PARALLEL == 2
1797 if (nNew1 <= BLOCKING3 || !parallelMode) {
1798 #endif
1799 //printf("splitNa nNew1 %d\n",nNew1);
1800 CoinAbcDgemm(m, nNew1, k, a, lda, b, c
1801 #if ABC_PARALLEL == 2
1802 ,
1803 0
1804 #endif
1805 );
1806 //printf("splitNb nNew1 %d\n",nNew1);
1807 CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1
1808 #if ABC_PARALLEL == 2
1809 ,
1810 0
1811 #endif
1812 );
1813 #if ABC_PARALLEL == 2
1814 } else {
1815 //printf("splitNa nNew1 %d\n",nNew1);
1816 cilk_spawn CoinAbcDgemm(m, nNew1, k, a, lda, b, c, ONWARD);
1817 //printf("splitNb nNew1 %d\n",nNew1);
1818 CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1, ONWARD);
1819 cilk_sync;
1820 }
1821 #endif
1822 }
1823 }
1824 #ifdef ABC_LONG_FACTORIZATION
1825 // Start long double version
CoinAbcDgemm(int m,int n,int k,long double * COIN_RESTRICT a,int lda,long double * COIN_RESTRICT b,long double * COIN_RESTRICT c,int parallelMode)1826 void CoinAbcDgemm(int m, int n, int k, long double *COIN_RESTRICT a, int lda,
1827 long double *COIN_RESTRICT b, long double *COIN_RESTRICT c
1828 #if ABC_PARALLEL == 2
1829 ,
1830 int parallelMode
1831 #endif
1832 )
1833 {
1834 assert((m & (BLOCKING8 - 1)) == 0 && (n & (BLOCKING8 - 1)) == 0 && (k & (BLOCKING8 - 1)) == 0);
1835 /* entry for column j and row i (when multiple of BLOCKING8)
1836 is at aBlocked+j*m+i*BLOCKING8
1837 */
1838 // k is always smallish
1839 if (m <= BLOCKING8 && n <= BLOCKING8) {
1840 assert(m == BLOCKING8 && n == BLOCKING8 && k == BLOCKING8);
1841 long double *COIN_RESTRICT aBase2 = a;
1842 long double *COIN_RESTRICT bBase2 = b;
1843 long double *COIN_RESTRICT cBase2 = c;
1844 for (int j = 0; j < BLOCKING8; j++) {
1845 long double *COIN_RESTRICT aBase = aBase2;
1846 #if AVX2 != 2
1847 #if 1
1848 long double c0 = cBase2[0];
1849 long double c1 = cBase2[1];
1850 long double c2 = cBase2[2];
1851 long double c3 = cBase2[3];
1852 long double c4 = cBase2[4];
1853 long double c5 = cBase2[5];
1854 long double c6 = cBase2[6];
1855 long double c7 = cBase2[7];
1856 for (int l = 0; l < BLOCKING8; l++) {
1857 long double bValue = bBase2[l];
1858 if (bValue) {
1859 c0 -= bValue * aBase[0];
1860 c1 -= bValue * aBase[1];
1861 c2 -= bValue * aBase[2];
1862 c3 -= bValue * aBase[3];
1863 c4 -= bValue * aBase[4];
1864 c5 -= bValue * aBase[5];
1865 c6 -= bValue * aBase[6];
1866 c7 -= bValue * aBase[7];
1867 }
1868 aBase += BLOCKING8;
1869 }
1870 cBase2[0] = c0;
1871 cBase2[1] = c1;
1872 cBase2[2] = c2;
1873 cBase2[3] = c3;
1874 cBase2[4] = c4;
1875 cBase2[5] = c5;
1876 cBase2[6] = c6;
1877 cBase2[7] = c7;
1878 #else
1879 for (int l = 0; l < BLOCKING8; l++) {
1880 long double bValue = bBase2[l];
1881 if (bValue) {
1882 for (int i = 0; i < BLOCKING8; i++) {
1883 cBase2[i] -= bValue * aBase[i];
1884 }
1885 }
1886 aBase += BLOCKING8;
1887 }
1888 #endif
1889 #else
1890 //__m256d c0=_mm256_load_pd(cBase2);
1891 __m256d c0 = *reinterpret_cast< __m256d * >(cBase2);
1892 //__m256d c1=_mm256_load_pd(cBase2+4);
1893 __m256d c1 = *reinterpret_cast< __m256d * >(cBase2 + 4);
1894 for (int l = 0; l < BLOCKING8; l++) {
1895 //__m256d bb = _mm256_broadcast_sd(bBase2+l);
1896 __m256d bb = static_cast< __m256d >(__builtin_ia32_vbroadcastsd256(bBase2 + l));
1897 //__m256d a0 = _mm256_load_pd(aBase);
1898 __m256d a0 = *reinterpret_cast< __m256d * >(aBase);
1899 //__m256d a1 = _mm256_load_pd(aBase+4);
1900 __m256d a1 = *reinterpret_cast< __m256d * >(aBase + 4);
1901 c0 -= bb * a0;
1902 c1 -= bb * a1;
1903 aBase += BLOCKING8;
1904 }
1905 //_mm256_store_pd (cBase2, c0);
1906 *reinterpret_cast< __m256d * >(cBase2) = c0;
1907 //_mm256_store_pd (cBase2+4, c1);
1908 *reinterpret_cast< __m256d * >(cBase2 + 4) = c1;
1909 #endif
1910 bBase2 += BLOCKING8;
1911 cBase2 += BLOCKING8;
1912 }
1913 } else if (m > n) {
1914 // make sure mNew1 multiple of BLOCKING8
1915 #if BLOCKING8 == 8
1916 int mNew1 = ((m + 15) >> 4) << 3;
1917 #elif BLOCKING8 == 4
1918 int mNew1 = ((m + 7) >> 3) << 2;
1919 #elif BLOCKING8 == 2
1920 int mNew1 = ((m + 3) >> 2) << 1;
1921 #else
1922 abort();
1923 #endif
1924 assert(mNew1 > 0 && m - mNew1 > 0);
1925 #if ABC_PARALLEL == 2
1926 if (mNew1 <= BLOCKING3 || !parallelMode) {
1927 #endif
1928 //printf("splitMa mNew1 %d\n",mNew1);
1929 CoinAbcDgemm(mNew1, n, k, a, lda, b, c
1930 #if ABC_PARALLEL == 2
1931 ,
1932 0
1933 #endif
1934 );
1935 //printf("splitMb mNew1 %d\n",mNew1);
1936 CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8
1937 #if ABC_PARALLEL == 2
1938 ,
1939 0
1940 #endif
1941 );
1942 #if ABC_PARALLEL == 2
1943 } else {
1944 //printf("splitMa mNew1 %d\n",mNew1);
1945 cilk_spawn CoinAbcDgemm(mNew1, n, k, a, lda, b, c, ONWARD);
1946 //printf("splitMb mNew1 %d\n",mNew1);
1947 CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8, ONWARD);
1948 cilk_sync;
1949 }
1950 #endif
1951 } else {
1952 // make sure nNew1 multiple of BLOCKING8
1953 #if BLOCKING8 == 8
1954 int nNew1 = ((n + 15) >> 4) << 3;
1955 #elif BLOCKING8 == 4
1956 int nNew1 = ((n + 7) >> 3) << 2;
1957 #elif BLOCKING8 == 2
1958 int nNew1 = ((n + 3) >> 2) << 1;
1959 #else
1960 abort();
1961 #endif
1962 assert(nNew1 > 0 && n - nNew1 > 0);
1963 #if ABC_PARALLEL == 2
1964 if (nNew1 <= BLOCKING3 || !parallelMode) {
1965 #endif
1966 //printf("splitNa nNew1 %d\n",nNew1);
1967 CoinAbcDgemm(m, nNew1, k, a, lda, b, c
1968 #if ABC_PARALLEL == 2
1969 ,
1970 0
1971 #endif
1972 );
1973 //printf("splitNb nNew1 %d\n",nNew1);
1974 CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1
1975 #if ABC_PARALLEL == 2
1976 ,
1977 0
1978 #endif
1979 );
1980 #if ABC_PARALLEL == 2
1981 } else {
1982 //printf("splitNa nNew1 %d\n",nNew1);
1983 cilk_spawn CoinAbcDgemm(m, nNew1, k, a, lda, b, c, ONWARD);
1984 //printf("splitNb nNew1 %d\n",nNew1);
1985 CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1, ONWARD);
1986 cilk_sync;
1987 }
1988 #endif
1989 }
1990 }
1991 #endif
1992 #if 0
1993 // From Intel site
1994 // get AVX intrinsics
1995 #include <immintrin.h>
1996 // get CPUID capability
1997 //#include <intrin.h>
1998 #define cpuid(func, ax, bx, cx, dx) \
1999 __asm__ __volatile__("cpuid" \
2000 : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
2001 : "a"(func));
2002 // written for clarity, not conciseness
2003 #define OSXSAVEFlag (1UL << 27)
2004 #define AVXFlag ((1UL << 28) | OSXSAVEFlag)
2005 #define VAESFlag ((1UL << 25) | AVXFlag | OSXSAVEFlag)
2006 #define FMAFlag ((1UL << 12) | AVXFlag | OSXSAVEFlag)
2007 #define CLMULFlag ((1UL << 1) | AVXFlag | OSXSAVEFlag)
2008
2009 bool DetectFeature(unsigned int feature)
2010 {
2011 int CPUInfo[4]; //, InfoType=1, ECX = 1;
2012 //__cpuidex(CPUInfo, 1, 1); // read the desired CPUID format
2013 cpuid(1,CPUInfo[0],CPUInfo[1],CPUInfo[2],CPUInfo[3]);
2014 unsigned int ECX = CPUInfo[2]; // the output of CPUID in the ECX register.
2015 if ((ECX & feature) != feature) // Missing feature
2016 return false;
2017 #if 0
2018 long int val = _xgetbv(0); // read XFEATURE_ENABLED_MASK register
2019 if ((val&6) != 6) // check OS has enabled both XMM and YMM support.
2020 return false;
2021 #endif
2022 return true;
2023 }
2024 #endif
2025
2026 /* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
2027 */
2028