1 /* $Id: CoinAbcHelperFunctions.hpp 2385 2019-01-06 19:43:06Z unxusr $ */
2 // Copyright (C) 2003, International Business Machines
3 // Corporation and others, Copyright (C) 2012, FasterCoin. All Rights Reserved.
4 // This code is licensed under the terms of the Eclipse Public License (EPL).
5
6 #ifndef CoinAbcHelperFunctions_H
7 #define CoinAbcHelperFunctions_H
8
9 #include "ClpConfig.h"
10 #ifdef HAVE_CMATH
11 #include <cmath>
12 #else
13 #ifdef HAVE_MATH_H
14 #include <math.h>
15 #else
16 #include <cmath>
17 #endif
18 #endif
19 #include "CoinAbcCommon.hpp"
20 #ifndef abc_assert
21 #define abc_assert(condition) \
22 { \
23 if (!condition) { \
24 printf("abc_assert in %s at line %d - %s is false\n", \
25 __FILE__, __LINE__, __STRING(condition)); \
26 abort(); \
27 } \
28 }
29 #endif
30 // cilk_for granularity.
31 #define CILK_FOR_GRAINSIZE 128
32 //#define AVX2 2
33 #if AVX2 == 1
34 #include "emmintrin.h"
35 #elif AVX2 == 2
36 #include <immintrin.h>
37 #elif AVX2 == 3
38 #include "avx2intrin.h"
39 #endif
40 //#define __AVX__ 1
41 //#define __AVX2__ 1
42 /**
43 Note (JJF) I have added some operations on arrays even though they may
44 duplicate CoinDenseVector.
45
46 */
47
48 #define UNROLL_SCATTER 2
49 #define INLINE_SCATTER 1
50 #if INLINE_SCATTER == 0
51 void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
52 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
53 const int *COIN_RESTRICT thisIndex,
54 CoinFactorizationDouble *COIN_RESTRICT region);
55 #else
CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)56 void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
57 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
58 const int *COIN_RESTRICT thisIndex,
59 CoinFactorizationDouble *COIN_RESTRICT region)
60 {
61 #if UNROLL_SCATTER == 0
62 for (CoinBigIndex j = number - 1; j >= 0; j--) {
63 CoinSimplexInt iRow = thisIndex[j];
64 CoinFactorizationDouble regionValue = region[iRow];
65 CoinFactorizationDouble value = thisElement[j];
66 assert(value);
67 region[iRow] = regionValue - value * pivotValue;
68 }
69 #elif UNROLL_SCATTER == 1
70 if ((number & 1) != 0) {
71 number--;
72 CoinSimplexInt iRow = thisIndex[number];
73 CoinFactorizationDouble regionValue = region[iRow];
74 CoinFactorizationDouble value = thisElement[number];
75 region[iRow] = regionValue - value * pivotValue;
76 }
77 for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
78 CoinSimplexInt iRow0 = thisIndex[j];
79 CoinSimplexInt iRow1 = thisIndex[j - 1];
80 CoinFactorizationDouble regionValue0 = region[iRow0];
81 CoinFactorizationDouble regionValue1 = region[iRow1];
82 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
83 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
84 }
85 #elif UNROLL_SCATTER == 2
86 if ((number & 1) != 0) {
87 number--;
88 CoinSimplexInt iRow = thisIndex[number];
89 CoinFactorizationDouble regionValue = region[iRow];
90 CoinFactorizationDouble value = thisElement[number];
91 region[iRow] = regionValue - value * pivotValue;
92 }
93 if ((number & 2) != 0) {
94 CoinSimplexInt iRow0 = thisIndex[number - 1];
95 CoinFactorizationDouble regionValue0 = region[iRow0];
96 CoinFactorizationDouble value0 = thisElement[number - 1];
97 CoinSimplexInt iRow1 = thisIndex[number - 2];
98 CoinFactorizationDouble regionValue1 = region[iRow1];
99 CoinFactorizationDouble value1 = thisElement[number - 2];
100 region[iRow0] = regionValue0 - value0 * pivotValue;
101 region[iRow1] = regionValue1 - value1 * pivotValue;
102 number -= 2;
103 }
104 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
105 cilk_for(CoinBigIndex j = number - 1; j >= 0; j -= 4)
106 {
107 CoinSimplexInt iRow0 = thisIndex[j];
108 CoinSimplexInt iRow1 = thisIndex[j - 1];
109 CoinFactorizationDouble regionValue0 = region[iRow0];
110 CoinFactorizationDouble regionValue1 = region[iRow1];
111 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
112 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
113 CoinSimplexInt iRow2 = thisIndex[j - 2];
114 CoinSimplexInt iRow3 = thisIndex[j - 3];
115 CoinFactorizationDouble regionValue2 = region[iRow2];
116 CoinFactorizationDouble regionValue3 = region[iRow3];
117 region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
118 region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
119 }
120 #elif UNROLL_SCATTER == 3
121 CoinSimplexInt iRow0;
122 CoinSimplexInt iRow1;
123 CoinFactorizationDouble regionValue0;
124 CoinFactorizationDouble regionValue1;
125 switch (static_cast< unsigned int >(number)) {
126 case 0:
127 break;
128 case 1:
129 iRow0 = thisIndex[0];
130 regionValue0 = region[iRow0];
131 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
132 break;
133 case 2:
134 iRow0 = thisIndex[0];
135 iRow1 = thisIndex[1];
136 regionValue0 = region[iRow0];
137 regionValue1 = region[iRow1];
138 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
139 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
140 break;
141 case 3:
142 iRow0 = thisIndex[0];
143 iRow1 = thisIndex[1];
144 regionValue0 = region[iRow0];
145 regionValue1 = region[iRow1];
146 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
147 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
148 iRow0 = thisIndex[2];
149 regionValue0 = region[iRow0];
150 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
151 break;
152 case 4:
153 iRow0 = thisIndex[0];
154 iRow1 = thisIndex[1];
155 regionValue0 = region[iRow0];
156 regionValue1 = region[iRow1];
157 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
158 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
159 iRow0 = thisIndex[2];
160 iRow1 = thisIndex[3];
161 regionValue0 = region[iRow0];
162 regionValue1 = region[iRow1];
163 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
164 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
165 break;
166 case 5:
167 iRow0 = thisIndex[0];
168 iRow1 = thisIndex[1];
169 regionValue0 = region[iRow0];
170 regionValue1 = region[iRow1];
171 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
172 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
173 iRow0 = thisIndex[2];
174 iRow1 = thisIndex[3];
175 regionValue0 = region[iRow0];
176 regionValue1 = region[iRow1];
177 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
178 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
179 iRow0 = thisIndex[4];
180 regionValue0 = region[iRow0];
181 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
182 break;
183 case 6:
184 iRow0 = thisIndex[0];
185 iRow1 = thisIndex[1];
186 regionValue0 = region[iRow0];
187 regionValue1 = region[iRow1];
188 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
189 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
190 iRow0 = thisIndex[2];
191 iRow1 = thisIndex[3];
192 regionValue0 = region[iRow0];
193 regionValue1 = region[iRow1];
194 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
195 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
196 iRow0 = thisIndex[4];
197 iRow1 = thisIndex[5];
198 regionValue0 = region[iRow0];
199 regionValue1 = region[iRow1];
200 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
201 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
202 break;
203 case 7:
204 iRow0 = thisIndex[0];
205 iRow1 = thisIndex[1];
206 regionValue0 = region[iRow0];
207 regionValue1 = region[iRow1];
208 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
209 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
210 iRow0 = thisIndex[2];
211 iRow1 = thisIndex[3];
212 regionValue0 = region[iRow0];
213 regionValue1 = region[iRow1];
214 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
215 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
216 iRow0 = thisIndex[4];
217 iRow1 = thisIndex[5];
218 regionValue0 = region[iRow0];
219 regionValue1 = region[iRow1];
220 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
221 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
222 iRow0 = thisIndex[6];
223 regionValue0 = region[iRow0];
224 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
225 break;
226 case 8:
227 iRow0 = thisIndex[0];
228 iRow1 = thisIndex[1];
229 regionValue0 = region[iRow0];
230 regionValue1 = region[iRow1];
231 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
232 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
233 iRow0 = thisIndex[2];
234 iRow1 = thisIndex[3];
235 regionValue0 = region[iRow0];
236 regionValue1 = region[iRow1];
237 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
238 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
239 iRow0 = thisIndex[4];
240 iRow1 = thisIndex[5];
241 regionValue0 = region[iRow0];
242 regionValue1 = region[iRow1];
243 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
244 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
245 iRow0 = thisIndex[6];
246 iRow1 = thisIndex[7];
247 regionValue0 = region[iRow0];
248 regionValue1 = region[iRow1];
249 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
250 region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
251 break;
252 default:
253 if ((number & 1) != 0) {
254 number--;
255 CoinSimplexInt iRow = thisIndex[number];
256 CoinFactorizationDouble regionValue = region[iRow];
257 CoinFactorizationDouble value = thisElement[number];
258 region[iRow] = regionValue - value * pivotValue;
259 }
260 for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
261 CoinSimplexInt iRow0 = thisIndex[j];
262 CoinSimplexInt iRow1 = thisIndex[j - 1];
263 CoinFactorizationDouble regionValue0 = region[iRow0];
264 CoinFactorizationDouble regionValue1 = region[iRow1];
265 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
266 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
267 }
268 break;
269 }
270 #endif
271 }
CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,const CoinFactorizationDouble * COIN_RESTRICT thisElement,CoinFactorizationDouble * COIN_RESTRICT region)272 void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
273 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
274 CoinFactorizationDouble *COIN_RESTRICT region)
275 {
276 #if UNROLL_SCATTER == 0
277 const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
278 for (CoinBigIndex j = number - 1; j >= 0; j--) {
279 CoinSimplexInt iRow = thisIndex[j];
280 CoinFactorizationDouble regionValue = region[iRow];
281 CoinFactorizationDouble value = thisElement[j];
282 assert(value);
283 region[iRow] = regionValue - value * pivotValue;
284 }
285 #elif UNROLL_SCATTER == 1
286 const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
287 if ((number & 1) != 0) {
288 number--;
289 CoinSimplexInt iRow = thisIndex[number];
290 CoinFactorizationDouble regionValue = region[iRow];
291 CoinFactorizationDouble value = thisElement[number];
292 region[iRow] = regionValue - value * pivotValue;
293 }
294 for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
295 CoinSimplexInt iRow0 = thisIndex[j];
296 CoinSimplexInt iRow1 = thisIndex[j - 1];
297 CoinFactorizationDouble regionValue0 = region[iRow0];
298 CoinFactorizationDouble regionValue1 = region[iRow1];
299 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
300 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
301 }
302 #elif UNROLL_SCATTER == 2
303 const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
304 if ((number & 1) != 0) {
305 number--;
306 CoinSimplexInt iRow = thisIndex[number];
307 CoinFactorizationDouble regionValue = region[iRow];
308 CoinFactorizationDouble value = thisElement[number];
309 region[iRow] = regionValue - value * pivotValue;
310 }
311 if ((number & 2) != 0) {
312 CoinSimplexInt iRow0 = thisIndex[number - 1];
313 CoinFactorizationDouble regionValue0 = region[iRow0];
314 CoinFactorizationDouble value0 = thisElement[number - 1];
315 CoinSimplexInt iRow1 = thisIndex[number - 2];
316 CoinFactorizationDouble regionValue1 = region[iRow1];
317 CoinFactorizationDouble value1 = thisElement[number - 2];
318 region[iRow0] = regionValue0 - value0 * pivotValue;
319 region[iRow1] = regionValue1 - value1 * pivotValue;
320 number -= 2;
321 }
322 #if AVX2 == 22
323 CoinFactorizationDouble temp[4] __attribute__((aligned(32)));
324 __m256d pv = _mm256_broadcast_sd(&pivotValue);
325 for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
326 __m256d elements = _mm256_loadu_pd(thisElement + j - 3);
327 CoinSimplexInt iRow0 = thisIndex[j - 3];
328 CoinSimplexInt iRow1 = thisIndex[j - 2];
329 CoinSimplexInt iRow2 = thisIndex[j - 1];
330 CoinSimplexInt iRow3 = thisIndex[j - 0];
331 temp[0] = region[iRow0];
332 temp[1] = region[iRow1];
333 temp[2] = region[iRow2];
334 temp[3] = region[iRow3];
335 __m256d t0 = _mm256_load_pd(temp);
336 t0 -= pv * elements;
337 _mm256_store_pd(temp, t0);
338 region[iRow0] = temp[0];
339 region[iRow1] = temp[1];
340 region[iRow2] = temp[2];
341 region[iRow3] = temp[3];
342 }
343 #else
344 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
345 cilk_for(CoinBigIndex j = number - 1; j >= 0; j -= 4)
346 {
347 CoinSimplexInt iRow0 = thisIndex[j];
348 CoinSimplexInt iRow1 = thisIndex[j - 1];
349 CoinFactorizationDouble regionValue0 = region[iRow0];
350 CoinFactorizationDouble regionValue1 = region[iRow1];
351 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
352 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
353 CoinSimplexInt iRow2 = thisIndex[j - 2];
354 CoinSimplexInt iRow3 = thisIndex[j - 3];
355 CoinFactorizationDouble regionValue2 = region[iRow2];
356 CoinFactorizationDouble regionValue3 = region[iRow3];
357 region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
358 region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
359 }
360 #endif
361 #elif UNROLL_SCATTER == 3
362 const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
363 CoinSimplexInt iRow0;
364 CoinSimplexInt iRow1;
365 CoinFactorizationDouble regionValue0;
366 CoinFactorizationDouble regionValue1;
367 switch (static_cast< unsigned int >(number)) {
368 case 0:
369 break;
370 case 1:
371 iRow0 = thisIndex[0];
372 regionValue0 = region[iRow0];
373 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
374 break;
375 case 2:
376 iRow0 = thisIndex[0];
377 iRow1 = thisIndex[1];
378 regionValue0 = region[iRow0];
379 regionValue1 = region[iRow1];
380 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
381 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
382 break;
383 case 3:
384 iRow0 = thisIndex[0];
385 iRow1 = thisIndex[1];
386 regionValue0 = region[iRow0];
387 regionValue1 = region[iRow1];
388 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
389 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
390 iRow0 = thisIndex[2];
391 regionValue0 = region[iRow0];
392 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
393 break;
394 case 4:
395 iRow0 = thisIndex[0];
396 iRow1 = thisIndex[1];
397 regionValue0 = region[iRow0];
398 regionValue1 = region[iRow1];
399 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
400 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
401 iRow0 = thisIndex[2];
402 iRow1 = thisIndex[3];
403 regionValue0 = region[iRow0];
404 regionValue1 = region[iRow1];
405 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
406 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
407 break;
408 case 5:
409 iRow0 = thisIndex[0];
410 iRow1 = thisIndex[1];
411 regionValue0 = region[iRow0];
412 regionValue1 = region[iRow1];
413 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
414 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
415 iRow0 = thisIndex[2];
416 iRow1 = thisIndex[3];
417 regionValue0 = region[iRow0];
418 regionValue1 = region[iRow1];
419 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
420 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
421 iRow0 = thisIndex[4];
422 regionValue0 = region[iRow0];
423 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
424 break;
425 case 6:
426 iRow0 = thisIndex[0];
427 iRow1 = thisIndex[1];
428 regionValue0 = region[iRow0];
429 regionValue1 = region[iRow1];
430 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
431 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
432 iRow0 = thisIndex[2];
433 iRow1 = thisIndex[3];
434 regionValue0 = region[iRow0];
435 regionValue1 = region[iRow1];
436 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
437 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
438 iRow0 = thisIndex[4];
439 iRow1 = thisIndex[5];
440 regionValue0 = region[iRow0];
441 regionValue1 = region[iRow1];
442 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
443 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
444 break;
445 case 7:
446 iRow0 = thisIndex[0];
447 iRow1 = thisIndex[1];
448 regionValue0 = region[iRow0];
449 regionValue1 = region[iRow1];
450 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
451 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
452 iRow0 = thisIndex[2];
453 iRow1 = thisIndex[3];
454 regionValue0 = region[iRow0];
455 regionValue1 = region[iRow1];
456 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
457 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
458 iRow0 = thisIndex[4];
459 iRow1 = thisIndex[5];
460 regionValue0 = region[iRow0];
461 regionValue1 = region[iRow1];
462 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
463 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
464 iRow0 = thisIndex[6];
465 regionValue0 = region[iRow0];
466 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
467 break;
468 case 8:
469 iRow0 = thisIndex[0];
470 iRow1 = thisIndex[1];
471 regionValue0 = region[iRow0];
472 regionValue1 = region[iRow1];
473 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
474 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
475 iRow0 = thisIndex[2];
476 iRow1 = thisIndex[3];
477 regionValue0 = region[iRow0];
478 regionValue1 = region[iRow1];
479 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
480 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
481 iRow0 = thisIndex[4];
482 iRow1 = thisIndex[5];
483 regionValue0 = region[iRow0];
484 regionValue1 = region[iRow1];
485 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
486 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
487 iRow0 = thisIndex[6];
488 iRow1 = thisIndex[7];
489 regionValue0 = region[iRow0];
490 regionValue1 = region[iRow1];
491 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
492 region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
493 break;
494 default:
495 if ((number & 1) != 0) {
496 number--;
497 CoinSimplexInt iRow = thisIndex[number];
498 CoinFactorizationDouble regionValue = region[iRow];
499 CoinFactorizationDouble value = thisElement[number];
500 region[iRow] = regionValue - value * pivotValue;
501 }
502 for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
503 CoinSimplexInt iRow0 = thisIndex[j];
504 CoinSimplexInt iRow1 = thisIndex[j - 1];
505 CoinFactorizationDouble regionValue0 = region[iRow0];
506 CoinFactorizationDouble regionValue1 = region[iRow1];
507 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
508 region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
509 }
510 break;
511 }
512 #endif
513 }
514 #endif
515 //#define COIN_PREFETCH
516 #ifdef COIN_PREFETCH
517 #if 1
518 #define coin_prefetch(mem) \
519 __asm__ __volatile__("prefetchnta %0" \
520 : \
521 : "m"(*(reinterpret_cast< char * >(mem))))
522 #define coin_prefetch_const(mem) \
523 __asm__ __volatile__("prefetchnta %0" \
524 : \
525 : "m"(*(reinterpret_cast< const char * >(mem))))
526 #else
527 #define coin_prefetch(mem) \
528 __asm__ __volatile__("prefetch %0" \
529 : \
530 : "m"(*(reinterpret_cast< char * >(mem))))
531 #define coin_prefetch_const(mem) \
532 __asm__ __volatile__("prefetch %0" \
533 : \
534 : "m"(*(reinterpret_cast< const char * >(mem))))
535 #endif
536 #else
537 // dummy
538 #define coin_prefetch(mem)
539 #define coin_prefetch_const(mem)
540 #endif
541 #define NEW_CHUNK_SIZE 4
542 #define NEW_CHUNK_SIZE_INCREMENT (NEW_CHUNK_SIZE + NEW_CHUNK_SIZE / 2);
543 #define NEW_CHUNK_SIZE_OFFSET (NEW_CHUNK_SIZE / 2)
544 // leaf, pure, nothrow and hot give warnings
545 // fastcall and sseregparm give wrong results
546 //#define SCATTER_ATTRIBUTE __attribute__ ((leaf,fastcall,pure,sseregparm,nothrow,hot))
547 #define SCATTER_ATTRIBUTE
548 typedef void (*scatterUpdate)(int, CoinFactorizationDouble, const CoinFactorizationDouble *, double *) SCATTER_ATTRIBUTE;
549 typedef struct {
550 scatterUpdate functionPointer;
551 CoinBigIndex offset;
552 int number;
553 } scatterStruct;
554 void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble multiplier,
555 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
556 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
557 void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier,
558 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
559 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
560 void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier,
561 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
562 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
563 void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier,
564 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
565 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
566 void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier,
567 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
568 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
569 void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier,
570 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
571 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
572 void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier,
573 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
574 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
575 void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier,
576 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
577 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
578 void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier,
579 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
580 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
581 void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier,
582 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
583 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
584 void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier,
585 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
586 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
587 void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier,
588 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
589 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
590 void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier,
591 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
592 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
593 void CoinAbcScatterUpdate1Subtract(int numberIn, CoinFactorizationDouble multiplier,
594 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
595 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
596 void CoinAbcScatterUpdate2Subtract(int numberIn, CoinFactorizationDouble multiplier,
597 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
598 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
599 void CoinAbcScatterUpdate3Subtract(int numberIn, CoinFactorizationDouble multiplier,
600 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
601 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
602 void CoinAbcScatterUpdate4Subtract(int numberIn, CoinFactorizationDouble multiplier,
603 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
604 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
605 void CoinAbcScatterUpdate5Subtract(int numberIn, CoinFactorizationDouble multiplier,
606 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
607 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
608 void CoinAbcScatterUpdate6Subtract(int numberIn, CoinFactorizationDouble multiplier,
609 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
610 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
611 void CoinAbcScatterUpdate7Subtract(int numberIn, CoinFactorizationDouble multiplier,
612 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
613 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
614 void CoinAbcScatterUpdate8Subtract(int numberIn, CoinFactorizationDouble multiplier,
615 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
616 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
617 void CoinAbcScatterUpdate4NSubtract(int numberIn, CoinFactorizationDouble multiplier,
618 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
619 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
620 void CoinAbcScatterUpdate4NPlus1Subtract(int numberIn, CoinFactorizationDouble multiplier,
621 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
622 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
623 void CoinAbcScatterUpdate4NPlus2Subtract(int numberIn, CoinFactorizationDouble multiplier,
624 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
625 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
626 void CoinAbcScatterUpdate4NPlus3Subtract(int numberIn, CoinFactorizationDouble multiplier,
627 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
628 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
629 void CoinAbcScatterUpdate1Add(int numberIn, CoinFactorizationDouble multiplier,
630 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
631 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
632 void CoinAbcScatterUpdate2Add(int numberIn, CoinFactorizationDouble multiplier,
633 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
634 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
635 void CoinAbcScatterUpdate3Add(int numberIn, CoinFactorizationDouble multiplier,
636 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
637 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
638 void CoinAbcScatterUpdate4Add(int numberIn, CoinFactorizationDouble multiplier,
639 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
640 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
641 void CoinAbcScatterUpdate5Add(int numberIn, CoinFactorizationDouble multiplier,
642 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
643 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
644 void CoinAbcScatterUpdate6Add(int numberIn, CoinFactorizationDouble multiplier,
645 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
646 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
647 void CoinAbcScatterUpdate7Add(int numberIn, CoinFactorizationDouble multiplier,
648 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
649 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
650 void CoinAbcScatterUpdate8Add(int numberIn, CoinFactorizationDouble multiplier,
651 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
652 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
653 void CoinAbcScatterUpdate4NAdd(int numberIn, CoinFactorizationDouble multiplier,
654 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
655 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
656 void CoinAbcScatterUpdate4NPlus1Add(int numberIn, CoinFactorizationDouble multiplier,
657 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
658 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
659 void CoinAbcScatterUpdate4NPlus2Add(int numberIn, CoinFactorizationDouble multiplier,
660 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
661 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
662 void CoinAbcScatterUpdate4NPlus3Add(int numberIn, CoinFactorizationDouble multiplier,
663 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
664 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
665 #if INLINE_SCATTER == 0
666 void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
667 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
668 const int *COIN_RESTRICT thisIndex,
669 CoinFactorizationDouble *COIN_RESTRICT region,
670 double *COIN_RESTRICT work);
671 #else
672 #if 0
673 void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
674 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
675 const int * COIN_RESTRICT thisIndex,
676 CoinFactorizationDouble * COIN_RESTRICT region,
677 double * COIN_RESTRICT /*work*/)
678 {
679 #if UNROLL_SCATTER == 0
680 for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
681 CoinSimplexInt iRow = thisIndex[j];
682 CoinFactorizationDouble regionValue = region[iRow];
683 CoinFactorizationDouble value = thisElement[j];
684 assert (value);
685 region[iRow] = regionValue - value * pivotValue;
686 }
687 #elif UNROLL_SCATTER == 1
688 if ((number&1)!=0) {
689 CoinSimplexInt iRow = thisIndex[0];
690 thisIndex++;
691 CoinFactorizationDouble regionValue = region[iRow];
692 CoinFactorizationDouble value = thisElement[0];
693 thisElement++;
694 region[iRow] = regionValue - value * pivotValue;
695 }
696 number = number>>1;
697 CoinFactorizationDouble work2[4];
698 for ( ; number !=0; number-- ) {
699 CoinSimplexInt iRow0 = thisIndex[0];
700 CoinSimplexInt iRow1 = thisIndex[1];
701 work2[0] = region[iRow0];
702 work2[1] = region[iRow1];
703 #if 0
704 work2[2] = region[iRow0];
705 work2[3] = region[iRow1];
706 //__v4df b = __builtin_ia32_maskloadpd256(work2);
707 __v4df b = __builtin_ia32_loadupd256(work2);
708 //__v4df b = _mm256_load_pd(work2);
709 #endif
710 work2[0] -= thisElement[0] * pivotValue;
711 work2[1] -= thisElement[1] * pivotValue;
712 region[iRow0] = work2[0];
713 region[iRow1] = work2[1];
714 thisIndex+=2;
715 thisElement+=2;
716 }
717 #endif
718 }
719 #endif
720 #endif
721 #define UNROLL_GATHER 0
722 #define INLINE_GATHER 1
723 #if INLINE_GATHER == 0
724 CoinFactorizationDouble CoinAbcGatherUpdate(CoinSimplexInt number,
725 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
726 const int *COIN_RESTRICT thisIndex,
727 CoinFactorizationDouble *COIN_RESTRICT region);
728 #else
CoinAbcGatherUpdate(CoinSimplexInt number,const CoinFactorizationDouble * COIN_RESTRICT thisElement,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)729 CoinFactorizationDouble ABC_INLINE inline CoinAbcGatherUpdate(CoinSimplexInt number,
730 const CoinFactorizationDouble *COIN_RESTRICT thisElement,
731 const int *COIN_RESTRICT thisIndex,
732 CoinFactorizationDouble *COIN_RESTRICT region)
733 {
734 #if UNROLL_GATHER == 0
735 CoinFactorizationDouble pivotValue = 0.0;
736 for (CoinBigIndex j = 0; j < number; j++) {
737 CoinFactorizationDouble value = thisElement[j];
738 CoinSimplexInt jRow = thisIndex[j];
739 value *= region[jRow];
740 pivotValue -= value;
741 }
742 return pivotValue;
743 #else
744 #error code
745 #endif
746 }
747 #endif
748 #define UNROLL_MULTIPLY_INDEXED 0
749 #define INLINE_MULTIPLY_INDEXED 0
750 #if INLINE_MULTIPLY_INDEXED == 0
751 void CoinAbcMultiplyIndexed(int number,
752 const double *COIN_RESTRICT multiplier,
753 const int *COIN_RESTRICT thisIndex,
754 CoinFactorizationDouble *COIN_RESTRICT region);
755 void CoinAbcMultiplyIndexed(int number,
756 const long double *COIN_RESTRICT multiplier,
757 const int *COIN_RESTRICT thisIndex,
758 long double *COIN_RESTRICT region);
759 #else
CoinAbcMultiplyIndexed(int number,const double * COIN_RESTRICT multiplier,const int * COIN_RESTRICT thisIndex,CoinFactorizationDouble * COIN_RESTRICT region)760 void ABC_INLINE inline CoinAbcMultiplyIndexed(int number,
761 const double *COIN_RESTRICT multiplier,
762 const int *COIN_RESTRICT thisIndex,
763 CoinFactorizationDouble *COIN_RESTRICT region)
764 {
765 }
766 #endif
767 double CoinAbcMaximumAbsElement(const double *region, int size);
768 void CoinAbcMinMaxAbsElement(const double *region, int size, double &minimum, double &maximum);
769 void CoinAbcMinMaxAbsNormalValues(const double *region, int size, double &minimum, double &maximum);
770 void CoinAbcScale(double *region, double multiplier, int size);
771 void CoinAbcScaleNormalValues(double *region, double multiplier, double killIfLessThanThis, int size);
772 /// maximum fabs(region[i]) and then region[i]*=multiplier
773 double CoinAbcMaximumAbsElementAndScale(double *region, double multiplier, int size);
774 void CoinAbcSetElements(double *region, int size, double value);
775 void CoinAbcMultiplyAdd(const double *region1, int size, double multiplier1,
776 double *regionChanged, double multiplier2);
777 double CoinAbcInnerProduct(const double *region1, int size, const double *region2);
778 void CoinAbcGetNorms(const double *region, int size, double &norm1, double &norm2);
779 /// regionTo[index[i]]=regionFrom[i]
780 void CoinAbcScatterTo(const double *regionFrom, double *regionTo, const int *index, int number);
781 /// regionTo[i]=regionFrom[index[i]]
782 void CoinAbcGatherFrom(const double *regionFrom, double *regionTo, const int *index, int number);
783 /// regionTo[index[i]]=0.0
784 void CoinAbcScatterZeroTo(double *regionTo, const int *index, int number);
785 /// regionTo[indexScatter[indexList[i]]]=regionFrom[indexList[i]]
786 void CoinAbcScatterToList(const double *regionFrom, double *regionTo,
787 const int *indexList, const int *indexScatter, int number);
788 /// array[i]=1.0/sqrt(array[i])
789 void CoinAbcInverseSqrts(double *array, int n);
790 void CoinAbcReciprocal(double *array, int n, const double *input);
791 void CoinAbcMemcpyLong(double *array, const double *arrayFrom, int size);
792 void CoinAbcMemcpyLong(int *array, const int *arrayFrom, int size);
793 void CoinAbcMemcpyLong(unsigned char *array, const unsigned char *arrayFrom, int size);
794 void CoinAbcMemset0Long(double *array, int size);
795 void CoinAbcMemset0Long(int *array, int size);
796 void CoinAbcMemset0Long(unsigned char *array, int size);
797 void CoinAbcMemmove(double *array, const double *arrayFrom, int size);
798 void CoinAbcMemmove(int *array, const int *arrayFrom, int size);
799 void CoinAbcMemmove(unsigned char *array, const unsigned char *arrayFrom, int size);
800 /// This moves down and zeroes out end
801 void CoinAbcMemmoveAndZero(double *array, double *arrayFrom, int size);
802 /// This compacts several sections and zeroes out end (returns number)
803 int CoinAbcCompact(int numberSections, int alreadyDone, double *array, const int *starts, const int *lengths);
804 /// This compacts several sections (returns number)
805 int CoinAbcCompact(int numberSections, int alreadyDone, int *array, const int *starts, const int *lengths);
806 #endif
807 #if ABC_CREATE_SCATTER_FUNCTION
functionName(ScatterUpdate1)808 SCATTER_ATTRIBUTE void functionName(ScatterUpdate1)(int numberIn, CoinFactorizationDouble multiplier,
809 const CoinFactorizationDouble *COIN_RESTRICT element,
810 CoinFactorizationDouble *COIN_RESTRICT region)
811 {
812 #ifndef NDEBUG
813 assert(numberIn == 1);
814 #endif
815 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 1);
816 int iColumn0 = thisColumn[0];
817 double value0 = region[iColumn0];
818 value0 OPERATION multiplier *element[0];
819 region[iColumn0] = value0;
820 }
functionName(ScatterUpdate2)821 SCATTER_ATTRIBUTE void functionName(ScatterUpdate2)(int numberIn, CoinFactorizationDouble multiplier,
822 const CoinFactorizationDouble *COIN_RESTRICT element,
823 CoinFactorizationDouble *COIN_RESTRICT region)
824 {
825 #ifndef NDEBUG
826 assert(numberIn == 2);
827 #endif
828 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 2);
829 #if NEW_CHUNK_SIZE == 2
830 int nFull = 2 & (~(NEW_CHUNK_SIZE - 1));
831 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
832 coin_prefetch(element + NEW_CHUNK_SIZE_INCREMENT);
833 int iColumn0 = thisColumn[0];
834 int iColumn1 = thisColumn[1];
835 CoinFactorizationDouble value0 = region[iColumn0];
836 CoinFactorizationDouble value1 = region[iColumn1];
837 value0 OPERATION multiplier *element[0 + NEW_CHUNK_SIZE_OFFSET];
838 value1 OPERATION multiplier *element[1 + NEW_CHUNK_SIZE_OFFSET];
839 region[iColumn0] = value0;
840 region[iColumn1] = value1;
841 element += NEW_CHUNK_SIZE_INCREMENT;
842 thisColumn = reinterpret_cast< const int * >(element);
843 }
844 #endif
845 #if NEW_CHUNK_SIZE == 4
846 int iColumn0 = thisColumn[0];
847 int iColumn1 = thisColumn[1];
848 CoinFactorizationDouble value0 = region[iColumn0];
849 CoinFactorizationDouble value1 = region[iColumn1];
850 value0 OPERATION multiplier *element[0];
851 value1 OPERATION multiplier *element[1];
852 region[iColumn0] = value0;
853 region[iColumn1] = value1;
854 #endif
855 }
functionName(ScatterUpdate3)856 SCATTER_ATTRIBUTE void functionName(ScatterUpdate3)(int numberIn, CoinFactorizationDouble multiplier,
857 const CoinFactorizationDouble *COIN_RESTRICT element,
858 CoinFactorizationDouble *COIN_RESTRICT region)
859 {
860 #ifndef NDEBUG
861 assert(numberIn == 3);
862 #endif
863 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 3);
864 #if AVX2 == 1
865 double temp[2];
866 #endif
867 #if NEW_CHUNK_SIZE == 2
868 int nFull = 3 & (~(NEW_CHUNK_SIZE - 1));
869 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
870 //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
871 int iColumn0 = thisColumn[0];
872 int iColumn1 = thisColumn[1];
873 CoinFactorizationDouble value0 = region[iColumn0];
874 CoinFactorizationDouble value1 = region[iColumn1];
875 value0 OPERATION multiplier *element[0];
876 value1 OPERATION multiplier *element[1];
877 region[iColumn0] = value0;
878 region[iColumn1] = value1;
879 element += NEW_CHUNK_SIZE;
880 thisColumn + = NEW_CHUNK_SIZE;
881 }
882 #endif
883 #if NEW_CHUNK_SIZE == 2
884 int iColumn0 = thisColumn[0];
885 double value0 = region[iColumn0];
886 value0 OPERATION multiplier *element[0];
887 region[iColumn0] = value0;
888 #else
889 int iColumn0 = thisColumn[0];
890 int iColumn1 = thisColumn[1];
891 int iColumn2 = thisColumn[2];
892 #if AVX2 == 1
893 __v2df bb;
894 double value2 = region[iColumn2];
895 value2 OPERATION multiplier *element[2];
896 set_const_v2df(bb, multiplier);
897 temp[0] = region[iColumn0];
898 temp[1] = region[iColumn1];
899 region[iColumn2] = value2;
900 __v2df v0 = __builtin_ia32_loadupd(temp);
901 __v2df a = __builtin_ia32_loadupd(element);
902 a *= bb;
903 v0 OPERATION a;
904 __builtin_ia32_storeupd(temp, v0);
905 region[iColumn0] = temp[0];
906 region[iColumn1] = temp[1];
907 #else
908 double value0 = region[iColumn0];
909 double value1 = region[iColumn1];
910 double value2 = region[iColumn2];
911 value0 OPERATION multiplier *element[0];
912 value1 OPERATION multiplier *element[1];
913 value2 OPERATION multiplier *element[2];
914 region[iColumn0] = value0;
915 region[iColumn1] = value1;
916 region[iColumn2] = value2;
917 #endif
918 #endif
919 }
functionName(ScatterUpdate4)920 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4)(int numberIn, CoinFactorizationDouble multiplier,
921 const CoinFactorizationDouble *COIN_RESTRICT element,
922 CoinFactorizationDouble *COIN_RESTRICT region)
923 {
924 #ifndef NDEBUG
925 assert(numberIn == 4);
926 #endif
927 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 4);
928 int nFull = 4 & (~(NEW_CHUNK_SIZE - 1));
929 #if AVX2 == 1
930 double temp[4];
931 #endif
932 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
933 //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
934 #if NEW_CHUNK_SIZE == 2
935 int iColumn0 = thisColumn[0];
936 int iColumn1 = thisColumn[1];
937 double value0 = region[iColumn0];
938 double value1 = region[iColumn1];
939 value0 OPERATION multiplier *element[0];
940 value1 OPERATION multiplier *element[1];
941 region[iColumn0] = value0;
942 region[iColumn1] = value1;
943 #elif NEW_CHUNK_SIZE == 4
944 int iColumn0 = thisColumn[0];
945 int iColumn1 = thisColumn[1];
946 int iColumn2 = thisColumn[2];
947 int iColumn3 = thisColumn[3];
948 #if AVX2 == 1
949 __v2df bb;
950 set_const_v2df(bb, multiplier);
951 temp[0] = region[iColumn0];
952 temp[1] = region[iColumn1];
953 temp[2] = region[iColumn2];
954 temp[3] = region[iColumn3];
955 __v2df v0 = __builtin_ia32_loadupd(temp);
956 __v2df v1 = __builtin_ia32_loadupd(temp + 2);
957 __v2df a = __builtin_ia32_loadupd(element);
958 a *= bb;
959 v0 OPERATION a;
960 a = __builtin_ia32_loadupd(element + 2);
961 a *= bb;
962 v1 OPERATION a;
963 __builtin_ia32_storeupd(temp, v0);
964 __builtin_ia32_storeupd(temp + 2, v1);
965 region[iColumn0] = temp[0];
966 region[iColumn1] = temp[1];
967 region[iColumn2] = temp[2];
968 region[iColumn3] = temp[3];
969 #else
970 double value0 = region[iColumn0];
971 double value1 = region[iColumn1];
972 double value2 = region[iColumn2];
973 double value3 = region[iColumn3];
974 value0 OPERATION multiplier *element[0];
975 value1 OPERATION multiplier *element[1];
976 value2 OPERATION multiplier *element[2];
977 value3 OPERATION multiplier *element[3];
978 region[iColumn0] = value0;
979 region[iColumn1] = value1;
980 region[iColumn2] = value2;
981 region[iColumn3] = value3;
982 #endif
983 #else
984 abort();
985 #endif
986 element += NEW_CHUNK_SIZE;
987 thisColumn += NEW_CHUNK_SIZE;
988 }
989 }
functionName(ScatterUpdate5)990 SCATTER_ATTRIBUTE void functionName(ScatterUpdate5)(int numberIn, CoinFactorizationDouble multiplier,
991 const CoinFactorizationDouble *COIN_RESTRICT element,
992 CoinFactorizationDouble *COIN_RESTRICT region)
993 {
994 #ifndef NDEBUG
995 assert(numberIn == 5);
996 #endif
997 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 5);
998 int nFull = 5 & (~(NEW_CHUNK_SIZE - 1));
999 #if AVX2 == 1
1000 double temp[4];
1001 #endif
1002 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1003 //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
1004 #if NEW_CHUNK_SIZE == 2
1005 int iColumn0 = thisColumn[0];
1006 int iColumn1 = thisColumn[1];
1007 double value0 = region[iColumn0];
1008 double value1 = region[iColumn1];
1009 value0 OPERATION multiplier *element[0];
1010 value1 OPERATION multiplier *element[1];
1011 region[iColumn0] = value0;
1012 region[iColumn1] = value1;
1013 #elif NEW_CHUNK_SIZE == 4
1014 int iColumn0 = thisColumn[0];
1015 int iColumn1 = thisColumn[1];
1016 int iColumn2 = thisColumn[2];
1017 int iColumn3 = thisColumn[3];
1018 #if AVX2 == 1
1019 __v2df bb;
1020 set_const_v2df(bb, multiplier);
1021 temp[0] = region[iColumn0];
1022 temp[1] = region[iColumn1];
1023 temp[2] = region[iColumn2];
1024 temp[3] = region[iColumn3];
1025 __v2df v0 = __builtin_ia32_loadupd(temp);
1026 __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1027 __v2df a = __builtin_ia32_loadupd(element);
1028 a *= bb;
1029 v0 OPERATION a;
1030 a = __builtin_ia32_loadupd(element + 2);
1031 a *= bb;
1032 v1 OPERATION a;
1033 __builtin_ia32_storeupd(temp, v0);
1034 __builtin_ia32_storeupd(temp + 2, v1);
1035 region[iColumn0] = temp[0];
1036 region[iColumn1] = temp[1];
1037 region[iColumn2] = temp[2];
1038 region[iColumn3] = temp[3];
1039 #else
1040 double value0 = region[iColumn0];
1041 double value1 = region[iColumn1];
1042 double value2 = region[iColumn2];
1043 double value3 = region[iColumn3];
1044 value0 OPERATION multiplier *element[0];
1045 value1 OPERATION multiplier *element[1];
1046 value2 OPERATION multiplier *element[2];
1047 value3 OPERATION multiplier *element[3];
1048 region[iColumn0] = value0;
1049 region[iColumn1] = value1;
1050 region[iColumn2] = value2;
1051 region[iColumn3] = value3;
1052 #endif
1053 #else
1054 abort();
1055 #endif
1056 element += NEW_CHUNK_SIZE;
1057 thisColumn += NEW_CHUNK_SIZE;
1058 }
1059 int iColumn0 = thisColumn[0];
1060 double value0 = region[iColumn0];
1061 value0 OPERATION multiplier *element[0];
1062 region[iColumn0] = value0;
1063 }
functionName(ScatterUpdate6)1064 SCATTER_ATTRIBUTE void functionName(ScatterUpdate6)(int numberIn, CoinFactorizationDouble multiplier,
1065 const CoinFactorizationDouble *COIN_RESTRICT element,
1066 CoinFactorizationDouble *COIN_RESTRICT region)
1067 {
1068 #ifndef NDEBUG
1069 assert(numberIn == 6);
1070 #endif
1071 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 6);
1072 int nFull = 6 & (~(NEW_CHUNK_SIZE - 1));
1073 #if AVX2 == 1
1074 double temp[4];
1075 #endif
1076 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1077 coin_prefetch_const(element + 6);
1078 #if NEW_CHUNK_SIZE == 2
1079 int iColumn0 = thisColumn[0];
1080 int iColumn1 = thisColumn[1];
1081 double value0 = region[iColumn0];
1082 double value1 = region[iColumn1];
1083 value0 OPERATION multiplier *element[0];
1084 value1 OPERATION multiplier *element[1];
1085 region[iColumn0] = value0;
1086 region[iColumn1] = value1;
1087 #elif NEW_CHUNK_SIZE == 4
1088 int iColumn0 = thisColumn[0];
1089 int iColumn1 = thisColumn[1];
1090 int iColumn2 = thisColumn[2];
1091 int iColumn3 = thisColumn[3];
1092 #if AVX2 == 1
1093 __v2df bb;
1094 set_const_v2df(bb, multiplier);
1095 temp[0] = region[iColumn0];
1096 temp[1] = region[iColumn1];
1097 temp[2] = region[iColumn2];
1098 temp[3] = region[iColumn3];
1099 __v2df v0 = __builtin_ia32_loadupd(temp);
1100 __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1101 __v2df a = __builtin_ia32_loadupd(element);
1102 a *= bb;
1103 v0 OPERATION a;
1104 a = __builtin_ia32_loadupd(element + 2);
1105 a *= bb;
1106 v1 OPERATION a;
1107 __builtin_ia32_storeupd(temp, v0);
1108 __builtin_ia32_storeupd(temp + 2, v1);
1109 region[iColumn0] = temp[0];
1110 region[iColumn1] = temp[1];
1111 region[iColumn2] = temp[2];
1112 region[iColumn3] = temp[3];
1113 #else
1114 double value0 = region[iColumn0];
1115 double value1 = region[iColumn1];
1116 double value2 = region[iColumn2];
1117 double value3 = region[iColumn3];
1118 value0 OPERATION multiplier *element[0];
1119 value1 OPERATION multiplier *element[1];
1120 value2 OPERATION multiplier *element[2];
1121 value3 OPERATION multiplier *element[3];
1122 region[iColumn0] = value0;
1123 region[iColumn1] = value1;
1124 region[iColumn2] = value2;
1125 region[iColumn3] = value3;
1126 #endif
1127 #else
1128 abort();
1129 #endif
1130 element += NEW_CHUNK_SIZE;
1131 thisColumn += NEW_CHUNK_SIZE;
1132 }
1133 #if NEW_CHUNK_SIZE == 4
1134 int iColumn0 = thisColumn[0];
1135 int iColumn1 = thisColumn[1];
1136 double value0 = region[iColumn0];
1137 double value1 = region[iColumn1];
1138 value0 OPERATION multiplier *element[0];
1139 value1 OPERATION multiplier *element[1];
1140 region[iColumn0] = value0;
1141 region[iColumn1] = value1;
1142 #endif
1143 }
functionName(ScatterUpdate7)1144 SCATTER_ATTRIBUTE void functionName(ScatterUpdate7)(int numberIn, CoinFactorizationDouble multiplier,
1145 const CoinFactorizationDouble *COIN_RESTRICT element,
1146 CoinFactorizationDouble *COIN_RESTRICT region)
1147 {
1148 #ifndef NDEBUG
1149 assert(numberIn == 7);
1150 #endif
1151 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 7);
1152 int nFull = 7 & (~(NEW_CHUNK_SIZE - 1));
1153 #if AVX2 == 1
1154 double temp[4];
1155 #endif
1156 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1157 coin_prefetch_const(element + 6);
1158 #if NEW_CHUNK_SIZE == 2
1159 int iColumn0 = thisColumn[0];
1160 int iColumn1 = thisColumn[1];
1161 double value0 = region[iColumn0];
1162 double value1 = region[iColumn1];
1163 value0 OPERATION multiplier *element[0];
1164 value1 OPERATION multiplier *element[1];
1165 region[iColumn0] = value0;
1166 region[iColumn1] = value1;
1167 #elif NEW_CHUNK_SIZE == 4
1168 int iColumn0 = thisColumn[0];
1169 int iColumn1 = thisColumn[1];
1170 int iColumn2 = thisColumn[2];
1171 int iColumn3 = thisColumn[3];
1172 #if AVX2 == 1
1173 __v2df bb;
1174 set_const_v2df(bb, multiplier);
1175 temp[0] = region[iColumn0];
1176 temp[1] = region[iColumn1];
1177 temp[2] = region[iColumn2];
1178 temp[3] = region[iColumn3];
1179 __v2df v0 = __builtin_ia32_loadupd(temp);
1180 __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1181 __v2df a = __builtin_ia32_loadupd(element);
1182 a *= bb;
1183 v0 OPERATION a;
1184 a = __builtin_ia32_loadupd(element + 2);
1185 a *= bb;
1186 v1 OPERATION a;
1187 __builtin_ia32_storeupd(temp, v0);
1188 __builtin_ia32_storeupd(temp + 2, v1);
1189 region[iColumn0] = temp[0];
1190 region[iColumn1] = temp[1];
1191 region[iColumn2] = temp[2];
1192 region[iColumn3] = temp[3];
1193 #else
1194 double value0 = region[iColumn0];
1195 double value1 = region[iColumn1];
1196 double value2 = region[iColumn2];
1197 double value3 = region[iColumn3];
1198 value0 OPERATION multiplier *element[0];
1199 value1 OPERATION multiplier *element[1];
1200 value2 OPERATION multiplier *element[2];
1201 value3 OPERATION multiplier *element[3];
1202 region[iColumn0] = value0;
1203 region[iColumn1] = value1;
1204 region[iColumn2] = value2;
1205 region[iColumn3] = value3;
1206 #endif
1207 #else
1208 abort();
1209 #endif
1210 element += NEW_CHUNK_SIZE;
1211 thisColumn += NEW_CHUNK_SIZE;
1212 }
1213 #if NEW_CHUNK_SIZE == 2
1214 int iColumn0 = thisColumn[0];
1215 double value0 = region[iColumn0];
1216 value0 OPERATION multiplier *element[0];
1217 region[iColumn0] = value0;
1218 #else
1219 int iColumn0 = thisColumn[0];
1220 int iColumn1 = thisColumn[1];
1221 int iColumn2 = thisColumn[2];
1222 double value0 = region[iColumn0];
1223 double value1 = region[iColumn1];
1224 double value2 = region[iColumn2];
1225 value0 OPERATION multiplier *element[0];
1226 value1 OPERATION multiplier *element[1];
1227 value2 OPERATION multiplier *element[2];
1228 region[iColumn0] = value0;
1229 region[iColumn1] = value1;
1230 region[iColumn2] = value2;
1231 #endif
1232 }
functionName(ScatterUpdate8)1233 SCATTER_ATTRIBUTE void functionName(ScatterUpdate8)(int numberIn, CoinFactorizationDouble multiplier,
1234 const CoinFactorizationDouble *COIN_RESTRICT element,
1235 CoinFactorizationDouble *COIN_RESTRICT region)
1236 {
1237 #ifndef NDEBUG
1238 assert(numberIn == 8);
1239 #endif
1240 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 8);
1241 int nFull = 8 & (~(NEW_CHUNK_SIZE - 1));
1242 #if AVX2 == 1
1243 double temp[4];
1244 #endif
1245 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1246 coin_prefetch_const(element + 6);
1247 #if NEW_CHUNK_SIZE == 2
1248 int iColumn0 = thisColumn[0];
1249 int iColumn1 = thisColumn[1];
1250 double value0 = region[iColumn0];
1251 double value1 = region[iColumn1];
1252 value0 OPERATION multiplier *element[0];
1253 value1 OPERATION multiplier *element[1];
1254 region[iColumn0] = value0;
1255 region[iColumn1] = value1;
1256 #elif NEW_CHUNK_SIZE == 4
1257 int iColumn0 = thisColumn[0];
1258 int iColumn1 = thisColumn[1];
1259 int iColumn2 = thisColumn[2];
1260 int iColumn3 = thisColumn[3];
1261 #if AVX2 == 1
1262 __v2df bb;
1263 set_const_v2df(bb, multiplier);
1264 temp[0] = region[iColumn0];
1265 temp[1] = region[iColumn1];
1266 temp[2] = region[iColumn2];
1267 temp[3] = region[iColumn3];
1268 __v2df v0 = __builtin_ia32_loadupd(temp);
1269 __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1270 __v2df a = __builtin_ia32_loadupd(element);
1271 a *= bb;
1272 v0 OPERATION a;
1273 a = __builtin_ia32_loadupd(element + 2);
1274 a *= bb;
1275 v1 OPERATION a;
1276 __builtin_ia32_storeupd(temp, v0);
1277 __builtin_ia32_storeupd(temp + 2, v1);
1278 region[iColumn0] = temp[0];
1279 region[iColumn1] = temp[1];
1280 region[iColumn2] = temp[2];
1281 region[iColumn3] = temp[3];
1282 #else
1283 double value0 = region[iColumn0];
1284 double value1 = region[iColumn1];
1285 double value2 = region[iColumn2];
1286 double value3 = region[iColumn3];
1287 value0 OPERATION multiplier *element[0];
1288 value1 OPERATION multiplier *element[1];
1289 value2 OPERATION multiplier *element[2];
1290 value3 OPERATION multiplier *element[3];
1291 region[iColumn0] = value0;
1292 region[iColumn1] = value1;
1293 region[iColumn2] = value2;
1294 region[iColumn3] = value3;
1295 #endif
1296 #else
1297 abort();
1298 #endif
1299 element += NEW_CHUNK_SIZE;
1300 thisColumn += NEW_CHUNK_SIZE;
1301 }
1302 }
functionName(ScatterUpdate4N)1303 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4N)(int numberIn, CoinFactorizationDouble multiplier,
1304 const CoinFactorizationDouble *COIN_RESTRICT element,
1305 CoinFactorizationDouble *COIN_RESTRICT region)
1306 {
1307 assert((numberIn & 3) == 0);
1308 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1309 int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1310 #if AVX2 == 1
1311 double temp[4];
1312 #endif
1313 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1314 coin_prefetch_const(element + 16);
1315 coin_prefetch_const(thisColumn + 32);
1316 #if NEW_CHUNK_SIZE == 2
1317 int iColumn0 = thisColumn[0];
1318 int iColumn1 = thisColumn[1];
1319 double value0 = region[iColumn0];
1320 double value1 = region[iColumn1];
1321 value0 OPERATION multiplier *element[0];
1322 value1 OPERATION multiplier *element[1];
1323 region[iColumn0] = value0;
1324 region[iColumn1] = value1;
1325 #elif NEW_CHUNK_SIZE == 4
1326 int iColumn0 = thisColumn[0];
1327 int iColumn1 = thisColumn[1];
1328 int iColumn2 = thisColumn[2];
1329 int iColumn3 = thisColumn[3];
1330 #if AVX2 == 1
1331 __v2df bb;
1332 set_const_v2df(bb, multiplier);
1333 temp[0] = region[iColumn0];
1334 temp[1] = region[iColumn1];
1335 temp[2] = region[iColumn2];
1336 temp[3] = region[iColumn3];
1337 __v2df v0 = __builtin_ia32_loadupd(temp);
1338 __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1339 __v2df a = __builtin_ia32_loadupd(element);
1340 a *= bb;
1341 v0 OPERATION a;
1342 a = __builtin_ia32_loadupd(element + 2);
1343 a *= bb;
1344 v1 OPERATION a;
1345 __builtin_ia32_storeupd(temp, v0);
1346 __builtin_ia32_storeupd(temp + 2, v1);
1347 region[iColumn0] = temp[0];
1348 region[iColumn1] = temp[1];
1349 region[iColumn2] = temp[2];
1350 region[iColumn3] = temp[3];
1351 #else
1352 double value0 = region[iColumn0];
1353 double value1 = region[iColumn1];
1354 double value2 = region[iColumn2];
1355 double value3 = region[iColumn3];
1356 value0 OPERATION multiplier *element[0];
1357 value1 OPERATION multiplier *element[1];
1358 value2 OPERATION multiplier *element[2];
1359 value3 OPERATION multiplier *element[3];
1360 region[iColumn0] = value0;
1361 region[iColumn1] = value1;
1362 region[iColumn2] = value2;
1363 region[iColumn3] = value3;
1364 #endif
1365 #else
1366 abort();
1367 #endif
1368 element += NEW_CHUNK_SIZE;
1369 thisColumn += NEW_CHUNK_SIZE;
1370 }
1371 }
functionName(ScatterUpdate4NPlus1)1372 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus1)(int numberIn, CoinFactorizationDouble multiplier,
1373 const CoinFactorizationDouble *COIN_RESTRICT element,
1374 CoinFactorizationDouble *COIN_RESTRICT region)
1375 {
1376 assert((numberIn & 3) == 1);
1377 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1378 int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1379 #if AVX2 == 1
1380 double temp[4];
1381 #endif
1382 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1383 coin_prefetch_const(element + 16);
1384 coin_prefetch_const(thisColumn + 32);
1385 #if NEW_CHUNK_SIZE == 2
1386 int iColumn0 = thisColumn[0];
1387 int iColumn1 = thisColumn[1];
1388 double value0 = region[iColumn0];
1389 double value1 = region[iColumn1];
1390 value0 OPERATION multiplier *element[0];
1391 value1 OPERATION multiplier *element[1];
1392 region[iColumn0] = value0;
1393 region[iColumn1] = value1;
1394 #elif NEW_CHUNK_SIZE == 4
1395 int iColumn0 = thisColumn[0];
1396 int iColumn1 = thisColumn[1];
1397 int iColumn2 = thisColumn[2];
1398 int iColumn3 = thisColumn[3];
1399 #if AVX2 == 1
1400 __v2df bb;
1401 set_const_v2df(bb, multiplier);
1402 temp[0] = region[iColumn0];
1403 temp[1] = region[iColumn1];
1404 temp[2] = region[iColumn2];
1405 temp[3] = region[iColumn3];
1406 __v2df v0 = __builtin_ia32_loadupd(temp);
1407 __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1408 __v2df a = __builtin_ia32_loadupd(element);
1409 a *= bb;
1410 v0 OPERATION a;
1411 a = __builtin_ia32_loadupd(element + 2);
1412 a *= bb;
1413 v1 OPERATION a;
1414 __builtin_ia32_storeupd(temp, v0);
1415 __builtin_ia32_storeupd(temp + 2, v1);
1416 region[iColumn0] = temp[0];
1417 region[iColumn1] = temp[1];
1418 region[iColumn2] = temp[2];
1419 region[iColumn3] = temp[3];
1420 #else
1421 double value0 = region[iColumn0];
1422 double value1 = region[iColumn1];
1423 double value2 = region[iColumn2];
1424 double value3 = region[iColumn3];
1425 value0 OPERATION multiplier *element[0];
1426 value1 OPERATION multiplier *element[1];
1427 value2 OPERATION multiplier *element[2];
1428 value3 OPERATION multiplier *element[3];
1429 region[iColumn0] = value0;
1430 region[iColumn1] = value1;
1431 region[iColumn2] = value2;
1432 region[iColumn3] = value3;
1433 #endif
1434 #else
1435 abort();
1436 #endif
1437 element += NEW_CHUNK_SIZE;
1438 thisColumn += NEW_CHUNK_SIZE;
1439 }
1440 int iColumn0 = thisColumn[0];
1441 double value0 = region[iColumn0];
1442 value0 OPERATION multiplier *element[0];
1443 region[iColumn0] = value0;
1444 }
functionName(ScatterUpdate4NPlus2)1445 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus2)(int numberIn, CoinFactorizationDouble multiplier,
1446 const CoinFactorizationDouble *COIN_RESTRICT element,
1447 CoinFactorizationDouble *COIN_RESTRICT region)
1448 {
1449 assert((numberIn & 3) == 2);
1450 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1451 int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1452 #if AVX2 == 1
1453 double temp[4];
1454 #endif
1455 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1456 coin_prefetch_const(element + 16);
1457 coin_prefetch_const(thisColumn + 32);
1458 #if NEW_CHUNK_SIZE == 2
1459 int iColumn0 = thisColumn[0];
1460 int iColumn1 = thisColumn[1];
1461 double value0 = region[iColumn0];
1462 double value1 = region[iColumn1];
1463 value0 OPERATION multiplier *element[0];
1464 value1 OPERATION multiplier *element[1];
1465 region[iColumn0] = value0;
1466 region[iColumn1] = value1;
1467 #elif NEW_CHUNK_SIZE == 4
1468 int iColumn0 = thisColumn[0];
1469 int iColumn1 = thisColumn[1];
1470 int iColumn2 = thisColumn[2];
1471 int iColumn3 = thisColumn[3];
1472 #if AVX2 == 1
1473 __v2df bb;
1474 set_const_v2df(bb, multiplier);
1475 temp[0] = region[iColumn0];
1476 temp[1] = region[iColumn1];
1477 temp[2] = region[iColumn2];
1478 temp[3] = region[iColumn3];
1479 __v2df v0 = __builtin_ia32_loadupd(temp);
1480 __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1481 __v2df a = __builtin_ia32_loadupd(element);
1482 a *= bb;
1483 v0 OPERATION a;
1484 a = __builtin_ia32_loadupd(element + 2);
1485 a *= bb;
1486 v1 OPERATION a;
1487 __builtin_ia32_storeupd(temp, v0);
1488 __builtin_ia32_storeupd(temp + 2, v1);
1489 region[iColumn0] = temp[0];
1490 region[iColumn1] = temp[1];
1491 region[iColumn2] = temp[2];
1492 region[iColumn3] = temp[3];
1493 #else
1494 double value0 = region[iColumn0];
1495 double value1 = region[iColumn1];
1496 double value2 = region[iColumn2];
1497 double value3 = region[iColumn3];
1498 value0 OPERATION multiplier *element[0];
1499 value1 OPERATION multiplier *element[1];
1500 value2 OPERATION multiplier *element[2];
1501 value3 OPERATION multiplier *element[3];
1502 region[iColumn0] = value0;
1503 region[iColumn1] = value1;
1504 region[iColumn2] = value2;
1505 region[iColumn3] = value3;
1506 #endif
1507 #else
1508 abort();
1509 #endif
1510 element += NEW_CHUNK_SIZE;
1511 thisColumn += NEW_CHUNK_SIZE;
1512 }
1513 #if NEW_CHUNK_SIZE == 4
1514 int iColumn0 = thisColumn[0];
1515 int iColumn1 = thisColumn[1];
1516 double value0 = region[iColumn0];
1517 double value1 = region[iColumn1];
1518 value0 OPERATION multiplier *element[0];
1519 value1 OPERATION multiplier *element[1];
1520 region[iColumn0] = value0;
1521 region[iColumn1] = value1;
1522 #endif
1523 }
functionName(ScatterUpdate4NPlus3)1524 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus3)(int numberIn, CoinFactorizationDouble multiplier,
1525 const CoinFactorizationDouble *COIN_RESTRICT element,
1526 CoinFactorizationDouble *COIN_RESTRICT region)
1527 {
1528 assert((numberIn & 3) == 3);
1529 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1530 int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1531 #if AVX2 == 1
1532 double temp[4];
1533 #endif
1534 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1535 coin_prefetch_const(element + 16);
1536 coin_prefetch_const(thisColumn + 32);
1537 #if NEW_CHUNK_SIZE == 2
1538 int iColumn0 = thisColumn[0];
1539 int iColumn1 = thisColumn[1];
1540 double value0 = region[iColumn0];
1541 double value1 = region[iColumn1];
1542 value0 OPERATION multiplier *element[0];
1543 value1 OPERATION multiplier *element[1];
1544 region[iColumn0] = value0;
1545 region[iColumn1] = value1;
1546 #elif NEW_CHUNK_SIZE == 4
1547 int iColumn0 = thisColumn[0];
1548 int iColumn1 = thisColumn[1];
1549 int iColumn2 = thisColumn[2];
1550 int iColumn3 = thisColumn[3];
1551 #if AVX2 == 1
1552 __v2df bb;
1553 set_const_v2df(bb, multiplier);
1554 temp[0] = region[iColumn0];
1555 temp[1] = region[iColumn1];
1556 temp[2] = region[iColumn2];
1557 temp[3] = region[iColumn3];
1558 __v2df v0 = __builtin_ia32_loadupd(temp);
1559 __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1560 __v2df a = __builtin_ia32_loadupd(element);
1561 a *= bb;
1562 v0 OPERATION a;
1563 a = __builtin_ia32_loadupd(element + 2);
1564 a *= bb;
1565 v1 OPERATION a;
1566 __builtin_ia32_storeupd(temp, v0);
1567 __builtin_ia32_storeupd(temp + 2, v1);
1568 region[iColumn0] = temp[0];
1569 region[iColumn1] = temp[1];
1570 region[iColumn2] = temp[2];
1571 region[iColumn3] = temp[3];
1572 #else
1573 double value0 = region[iColumn0];
1574 double value1 = region[iColumn1];
1575 double value2 = region[iColumn2];
1576 double value3 = region[iColumn3];
1577 value0 OPERATION multiplier *element[0];
1578 value1 OPERATION multiplier *element[1];
1579 value2 OPERATION multiplier *element[2];
1580 value3 OPERATION multiplier *element[3];
1581 region[iColumn0] = value0;
1582 region[iColumn1] = value1;
1583 region[iColumn2] = value2;
1584 region[iColumn3] = value3;
1585 #endif
1586 #else
1587 abort();
1588 #endif
1589 element += NEW_CHUNK_SIZE;
1590 thisColumn += NEW_CHUNK_SIZE;
1591 }
1592 #if NEW_CHUNK_SIZE == 2
1593 int iColumn0 = thisColumn[0];
1594 double value0 = region[iColumn0];
1595 value0 OPERATION multiplier *element[0];
1596 region[iColumn0] = value0;
1597 #else
1598 int iColumn0 = thisColumn[0];
1599 int iColumn1 = thisColumn[1];
1600 int iColumn2 = thisColumn[2];
1601 double value0 = region[iColumn0];
1602 double value1 = region[iColumn1];
1603 double value2 = region[iColumn2];
1604 value0 OPERATION multiplier *element[0];
1605 value1 OPERATION multiplier *element[1];
1606 value2 OPERATION multiplier *element[2];
1607 region[iColumn0] = value0;
1608 region[iColumn1] = value1;
1609 region[iColumn2] = value2;
1610 #endif
1611 }
1612 #endif
1613
1614 /* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
1615 */
1616