1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_dge_sum_mv_x(enum blas_order_type order,int m,int n,double alpha,const double * a,int lda,const double * x,int incx,double beta,const double * b,int ldb,double * y,int incy,enum blas_prec_type prec)3 void BLAS_dge_sum_mv_x(enum blas_order_type order, int m, int n,
4 double alpha, const double *a, int lda,
5 const double *x, int incx,
6 double beta, const double *b, int ldb,
7 double *y, int incy, enum blas_prec_type prec)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * Computes y = alpha * A * x + beta * B * y,
14 * where A, B are general matricies.
15 *
16 * Arguments
17 * =========
18 *
19 * order (input) blas_order_type
20 * Order of A; row or column major
21 *
22 * m (input) int
23 * Row Dimension of A, B, length of output vector y
24 *
25 * n (input) int
26 * Column Dimension of A, B and the length of vector x
27 *
28 * alpha (input) double
29 *
30 * A (input) const double*
31 *
32 * lda (input) int
33 * Leading dimension of A
34 *
35 * x (input) const double*
36 *
37 * incx (input) int
38 * The stride for vector x.
39 *
40 * beta (input) double
41 *
42 * b (input) const double*
43 *
44 * ldb (input) int
45 * Leading dimension of B
46 *
47 * y (input/output) double*
48 *
49 * incy (input) int
50 * The stride for vector y.
51 *
52 * prec (input) enum blas_prec_type
53 * Specifies the internal precision to be used.
54 * = blas_prec_single: single precision.
55 * = blas_prec_double: double precision.
56 * = blas_prec_extra : anything at least 1.5 times as accurate
57 * than double, and wider than 80-bits.
58 * We use double-double in our implementation.
59 *
60 */
61 {
62 /* Routine name */
63 static const char routine_name[] = "BLAS_dge_sum_mv";
64 switch (prec) {
65 case blas_prec_single:
66 case blas_prec_indigenous:
67 case blas_prec_double:
68 {
69 int i, j;
70 int xi, yi;
71 int x_starti, y_starti, incxi, incyi;
72 int lda_min;
73 int ai;
74 int incai;
75 int aij;
76 int incaij;
77 int bi;
78 int incbi;
79 int bij;
80 int incbij;
81
82 const double *a_i = a;
83 const double *b_i = b;
84 const double *x_i = x;
85 double *y_i = y;
86 double alpha_i = alpha;
87 double beta_i = beta;
88 double a_elem;
89 double b_elem;
90 double x_elem;
91 double prod;
92 double sumA;
93 double sumB;
94 double tmp1;
95 double tmp2;
96
97
98
99 /* m is number of rows */
100 /* n is number of columns */
101
102 if (m == 0 || n == 0)
103 return;
104
105
106 /* all error calls */
107 if (order == blas_rowmajor) {
108 lda_min = n;
109 incai = lda; /* row stride */
110 incbi = ldb;
111 incbij = incaij = 1; /* column stride */
112 } else if (order == blas_colmajor) {
113 lda_min = m;
114 incai = incbi = 1; /*row stride */
115 incaij = lda; /* column stride */
116 incbij = ldb;
117 } else {
118 /* error, order not blas_colmajor not blas_rowmajor */
119 BLAS_error(routine_name, -1, order, 0);
120 return;
121 }
122
123 if (m < 0)
124 BLAS_error(routine_name, -2, m, 0);
125 else if (n < 0)
126 BLAS_error(routine_name, -3, n, 0);
127 if (lda < lda_min)
128 BLAS_error(routine_name, -6, lda, 0);
129 else if (ldb < lda_min)
130 BLAS_error(routine_name, -11, ldb, 0);
131 else if (incx == 0)
132 BLAS_error(routine_name, -8, incx, 0);
133 else if (incy == 0)
134 BLAS_error(routine_name, -13, incy, 0);
135
136 incxi = incx;
137 incyi = incy;
138
139
140
141
142
143
144
145 if (incxi > 0)
146 x_starti = 0;
147 else
148 x_starti = (1 - n) * incxi;
149
150 if (incyi > 0)
151 y_starti = 0;
152 else
153 y_starti = (1 - m) * incyi;
154
155
156
157 if (alpha_i == 0.0) {
158 if (beta_i == 0.0) {
159 /* alpha, beta are 0.0 */
160 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
161 y_i[yi] = 0.0;
162 }
163 } else if (beta_i == 1.0) {
164 /* alpha is 0.0, beta is 1.0 */
165
166
167 bi = 0;
168 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
169
170 sumB = 0.0;
171 bij = bi;
172 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
173 x_elem = x_i[xi];
174
175 b_elem = b_i[bij];
176 prod = b_elem * x_elem;
177 sumB = sumB + prod;
178 bij += incbij;
179 }
180 /* now put the result into y_i */
181 y_i[yi] = sumB;
182
183 bi += incbi;
184 }
185 } else {
186 /* alpha is 0.0, beta not 1.0 nor 0.0 */
187
188
189 bi = 0;
190 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
191
192 sumB = 0.0;
193 bij = bi;
194 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
195 x_elem = x_i[xi];
196
197 b_elem = b_i[bij];
198 prod = b_elem * x_elem;
199 sumB = sumB + prod;
200 bij += incbij;
201 }
202 /* now put the result into y_i */
203 tmp1 = sumB * beta_i;
204 y_i[yi] = tmp1;
205
206 bi += incbi;
207 }
208 }
209 } else if (alpha_i == 1.0) {
210 if (beta_i == 0.0) {
211 /* alpha is 1.0, beta is 0.0 */
212
213 ai = 0;
214
215 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
216 sumA = 0.0;
217 aij = ai;
218
219 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
220 x_elem = x_i[xi];
221 a_elem = a_i[aij];
222 prod = a_elem * x_elem;
223 sumA = sumA + prod;
224 aij += incaij;
225
226 }
227 /* now put the result into y_i */
228 y_i[yi] = sumA;
229 ai += incai;
230
231 }
232 } else if (beta_i == 1.0) {
233 /* alpha is 1.0, beta is 1.0 */
234
235 ai = 0;
236 bi = 0;
237 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
238 sumA = 0.0;
239 aij = ai;
240 sumB = 0.0;
241 bij = bi;
242 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
243 x_elem = x_i[xi];
244 a_elem = a_i[aij];
245 prod = a_elem * x_elem;
246 sumA = sumA + prod;
247 aij += incaij;
248 b_elem = b_i[bij];
249 prod = b_elem * x_elem;
250 sumB = sumB + prod;
251 bij += incbij;
252 }
253 /* now put the result into y_i */
254 tmp1 = sumA;
255 tmp2 = sumB;
256 tmp1 = tmp1 + tmp2;
257 y_i[yi] = tmp1;
258 ai += incai;
259 bi += incbi;
260 }
261 } else {
262 /* alpha is 1.0, beta is other */
263
264 ai = 0;
265 bi = 0;
266 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
267 sumA = 0.0;
268 aij = ai;
269 sumB = 0.0;
270 bij = bi;
271 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
272 x_elem = x_i[xi];
273 a_elem = a_i[aij];
274 prod = a_elem * x_elem;
275 sumA = sumA + prod;
276 aij += incaij;
277 b_elem = b_i[bij];
278 prod = b_elem * x_elem;
279 sumB = sumB + prod;
280 bij += incbij;
281 }
282 /* now put the result into y_i */
283 tmp1 = sumA;
284 tmp2 = sumB * beta_i;
285 tmp1 = tmp1 + tmp2;
286 y_i[yi] = tmp1;
287 ai += incai;
288 bi += incbi;
289 }
290 }
291 } else {
292 if (beta_i == 0.0) {
293 /* alpha is other, beta is 0.0 */
294
295 ai = 0;
296
297 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
298 sumA = 0.0;
299 aij = ai;
300
301 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
302 x_elem = x_i[xi];
303 a_elem = a_i[aij];
304 prod = a_elem * x_elem;
305 sumA = sumA + prod;
306 aij += incaij;
307
308 }
309 /* now put the result into y_i */
310 tmp1 = sumA * alpha_i;
311 y_i[yi] = tmp1;
312 ai += incai;
313
314 }
315 } else if (beta_i == 1.0) {
316 /* alpha is other, beta is 1.0 */
317
318 ai = 0;
319 bi = 0;
320 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
321 sumA = 0.0;
322 aij = ai;
323 sumB = 0.0;
324 bij = bi;
325 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
326 x_elem = x_i[xi];
327 a_elem = a_i[aij];
328 prod = a_elem * x_elem;
329 sumA = sumA + prod;
330 aij += incaij;
331 b_elem = b_i[bij];
332 prod = b_elem * x_elem;
333 sumB = sumB + prod;
334 bij += incbij;
335 }
336 /* now put the result into y_i */
337 tmp1 = sumA * alpha_i;
338 tmp2 = sumB;
339 tmp1 = tmp1 + tmp2;
340 y_i[yi] = tmp1;
341 ai += incai;
342 bi += incbi;
343 }
344 } else {
345 /* most general form, alpha, beta are other */
346
347 ai = 0;
348 bi = 0;
349 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
350 sumA = 0.0;
351 aij = ai;
352 sumB = 0.0;
353 bij = bi;
354 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
355 x_elem = x_i[xi];
356 a_elem = a_i[aij];
357 prod = a_elem * x_elem;
358 sumA = sumA + prod;
359 aij += incaij;
360 b_elem = b_i[bij];
361 prod = b_elem * x_elem;
362 sumB = sumB + prod;
363 bij += incbij;
364 }
365 /* now put the result into y_i */
366 tmp1 = sumA * alpha_i;
367 tmp2 = sumB * beta_i;
368 tmp1 = tmp1 + tmp2;
369 y_i[yi] = tmp1;
370 ai += incai;
371 bi += incbi;
372 }
373 }
374 }
375
376 }
377 break;
378
379 case blas_prec_extra:
380 {
381 int i, j;
382 int xi, yi;
383 int x_starti, y_starti, incxi, incyi;
384 int lda_min;
385 int ai;
386 int incai;
387 int aij;
388 int incaij;
389 int bi;
390 int incbi;
391 int bij;
392 int incbij;
393
394 const double *a_i = a;
395 const double *b_i = b;
396 const double *x_i = x;
397 double *y_i = y;
398 double alpha_i = alpha;
399 double beta_i = beta;
400 double a_elem;
401 double b_elem;
402 double x_elem;
403 double head_prod, tail_prod;
404 double head_sumA, tail_sumA;
405 double head_sumB, tail_sumB;
406 double head_tmp1, tail_tmp1;
407 double head_tmp2, tail_tmp2;
408
409 FPU_FIX_DECL;
410
411 /* m is number of rows */
412 /* n is number of columns */
413
414 if (m == 0 || n == 0)
415 return;
416
417
418 /* all error calls */
419 if (order == blas_rowmajor) {
420 lda_min = n;
421 incai = lda; /* row stride */
422 incbi = ldb;
423 incbij = incaij = 1; /* column stride */
424 } else if (order == blas_colmajor) {
425 lda_min = m;
426 incai = incbi = 1; /*row stride */
427 incaij = lda; /* column stride */
428 incbij = ldb;
429 } else {
430 /* error, order not blas_colmajor not blas_rowmajor */
431 BLAS_error(routine_name, -1, order, 0);
432 return;
433 }
434
435 if (m < 0)
436 BLAS_error(routine_name, -2, m, 0);
437 else if (n < 0)
438 BLAS_error(routine_name, -3, n, 0);
439 if (lda < lda_min)
440 BLAS_error(routine_name, -6, lda, 0);
441 else if (ldb < lda_min)
442 BLAS_error(routine_name, -11, ldb, 0);
443 else if (incx == 0)
444 BLAS_error(routine_name, -8, incx, 0);
445 else if (incy == 0)
446 BLAS_error(routine_name, -13, incy, 0);
447
448 incxi = incx;
449 incyi = incy;
450
451
452
453
454
455
456
457 if (incxi > 0)
458 x_starti = 0;
459 else
460 x_starti = (1 - n) * incxi;
461
462 if (incyi > 0)
463 y_starti = 0;
464 else
465 y_starti = (1 - m) * incyi;
466
467 FPU_FIX_START;
468
469 if (alpha_i == 0.0) {
470 if (beta_i == 0.0) {
471 /* alpha, beta are 0.0 */
472 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
473 y_i[yi] = 0.0;
474 }
475 } else if (beta_i == 1.0) {
476 /* alpha is 0.0, beta is 1.0 */
477
478
479 bi = 0;
480 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
481
482 head_sumB = tail_sumB = 0.0;
483 bij = bi;
484 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
485 x_elem = x_i[xi];
486
487 b_elem = b_i[bij];
488 {
489 /* Compute double_double = double * double. */
490 double a1, a2, b1, b2, con;
491
492 con = b_elem * split;
493 a1 = con - b_elem;
494 a1 = con - a1;
495 a2 = b_elem - a1;
496 con = x_elem * split;
497 b1 = con - x_elem;
498 b1 = con - b1;
499 b2 = x_elem - b1;
500
501 head_prod = b_elem * x_elem;
502 tail_prod =
503 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
504 }
505 {
506 /* Compute double-double = double-double + double-double. */
507 double bv;
508 double s1, s2, t1, t2;
509
510 /* Add two hi words. */
511 s1 = head_sumB + head_prod;
512 bv = s1 - head_sumB;
513 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
514
515 /* Add two lo words. */
516 t1 = tail_sumB + tail_prod;
517 bv = t1 - tail_sumB;
518 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
519
520 s2 += t1;
521
522 /* Renormalize (s1, s2) to (t1, s2) */
523 t1 = s1 + s2;
524 s2 = s2 - (t1 - s1);
525
526 t2 += s2;
527
528 /* Renormalize (t1, t2) */
529 head_sumB = t1 + t2;
530 tail_sumB = t2 - (head_sumB - t1);
531 }
532 bij += incbij;
533 }
534 /* now put the result into y_i */
535 y_i[yi] = head_sumB;
536
537 bi += incbi;
538 }
539 } else {
540 /* alpha is 0.0, beta not 1.0 nor 0.0 */
541
542
543 bi = 0;
544 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
545
546 head_sumB = tail_sumB = 0.0;
547 bij = bi;
548 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
549 x_elem = x_i[xi];
550
551 b_elem = b_i[bij];
552 {
553 /* Compute double_double = double * double. */
554 double a1, a2, b1, b2, con;
555
556 con = b_elem * split;
557 a1 = con - b_elem;
558 a1 = con - a1;
559 a2 = b_elem - a1;
560 con = x_elem * split;
561 b1 = con - x_elem;
562 b1 = con - b1;
563 b2 = x_elem - b1;
564
565 head_prod = b_elem * x_elem;
566 tail_prod =
567 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
568 }
569 {
570 /* Compute double-double = double-double + double-double. */
571 double bv;
572 double s1, s2, t1, t2;
573
574 /* Add two hi words. */
575 s1 = head_sumB + head_prod;
576 bv = s1 - head_sumB;
577 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
578
579 /* Add two lo words. */
580 t1 = tail_sumB + tail_prod;
581 bv = t1 - tail_sumB;
582 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
583
584 s2 += t1;
585
586 /* Renormalize (s1, s2) to (t1, s2) */
587 t1 = s1 + s2;
588 s2 = s2 - (t1 - s1);
589
590 t2 += s2;
591
592 /* Renormalize (t1, t2) */
593 head_sumB = t1 + t2;
594 tail_sumB = t2 - (head_sumB - t1);
595 }
596 bij += incbij;
597 }
598 /* now put the result into y_i */
599 {
600 /* Compute double-double = double-double * double. */
601 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
602
603 con = head_sumB * split;
604 a11 = con - head_sumB;
605 a11 = con - a11;
606 a21 = head_sumB - a11;
607 con = beta_i * split;
608 b1 = con - beta_i;
609 b1 = con - b1;
610 b2 = beta_i - b1;
611
612 c11 = head_sumB * beta_i;
613 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
614
615 c2 = tail_sumB * beta_i;
616 t1 = c11 + c2;
617 t2 = (c2 - (t1 - c11)) + c21;
618
619 head_tmp1 = t1 + t2;
620 tail_tmp1 = t2 - (head_tmp1 - t1);
621 }
622 y_i[yi] = head_tmp1;
623
624 bi += incbi;
625 }
626 }
627 } else if (alpha_i == 1.0) {
628 if (beta_i == 0.0) {
629 /* alpha is 1.0, beta is 0.0 */
630
631 ai = 0;
632
633 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
634 head_sumA = tail_sumA = 0.0;
635 aij = ai;
636
637 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
638 x_elem = x_i[xi];
639 a_elem = a_i[aij];
640 {
641 /* Compute double_double = double * double. */
642 double a1, a2, b1, b2, con;
643
644 con = a_elem * split;
645 a1 = con - a_elem;
646 a1 = con - a1;
647 a2 = a_elem - a1;
648 con = x_elem * split;
649 b1 = con - x_elem;
650 b1 = con - b1;
651 b2 = x_elem - b1;
652
653 head_prod = a_elem * x_elem;
654 tail_prod =
655 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
656 }
657 {
658 /* Compute double-double = double-double + double-double. */
659 double bv;
660 double s1, s2, t1, t2;
661
662 /* Add two hi words. */
663 s1 = head_sumA + head_prod;
664 bv = s1 - head_sumA;
665 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
666
667 /* Add two lo words. */
668 t1 = tail_sumA + tail_prod;
669 bv = t1 - tail_sumA;
670 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
671
672 s2 += t1;
673
674 /* Renormalize (s1, s2) to (t1, s2) */
675 t1 = s1 + s2;
676 s2 = s2 - (t1 - s1);
677
678 t2 += s2;
679
680 /* Renormalize (t1, t2) */
681 head_sumA = t1 + t2;
682 tail_sumA = t2 - (head_sumA - t1);
683 }
684 aij += incaij;
685
686 }
687 /* now put the result into y_i */
688 y_i[yi] = head_sumA;
689 ai += incai;
690
691 }
692 } else if (beta_i == 1.0) {
693 /* alpha is 1.0, beta is 1.0 */
694
695 ai = 0;
696 bi = 0;
697 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
698 head_sumA = tail_sumA = 0.0;
699 aij = ai;
700 head_sumB = tail_sumB = 0.0;
701 bij = bi;
702 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
703 x_elem = x_i[xi];
704 a_elem = a_i[aij];
705 {
706 /* Compute double_double = double * double. */
707 double a1, a2, b1, b2, con;
708
709 con = a_elem * split;
710 a1 = con - a_elem;
711 a1 = con - a1;
712 a2 = a_elem - a1;
713 con = x_elem * split;
714 b1 = con - x_elem;
715 b1 = con - b1;
716 b2 = x_elem - b1;
717
718 head_prod = a_elem * x_elem;
719 tail_prod =
720 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
721 }
722 {
723 /* Compute double-double = double-double + double-double. */
724 double bv;
725 double s1, s2, t1, t2;
726
727 /* Add two hi words. */
728 s1 = head_sumA + head_prod;
729 bv = s1 - head_sumA;
730 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
731
732 /* Add two lo words. */
733 t1 = tail_sumA + tail_prod;
734 bv = t1 - tail_sumA;
735 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
736
737 s2 += t1;
738
739 /* Renormalize (s1, s2) to (t1, s2) */
740 t1 = s1 + s2;
741 s2 = s2 - (t1 - s1);
742
743 t2 += s2;
744
745 /* Renormalize (t1, t2) */
746 head_sumA = t1 + t2;
747 tail_sumA = t2 - (head_sumA - t1);
748 }
749 aij += incaij;
750 b_elem = b_i[bij];
751 {
752 /* Compute double_double = double * double. */
753 double a1, a2, b1, b2, con;
754
755 con = b_elem * split;
756 a1 = con - b_elem;
757 a1 = con - a1;
758 a2 = b_elem - a1;
759 con = x_elem * split;
760 b1 = con - x_elem;
761 b1 = con - b1;
762 b2 = x_elem - b1;
763
764 head_prod = b_elem * x_elem;
765 tail_prod =
766 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
767 }
768 {
769 /* Compute double-double = double-double + double-double. */
770 double bv;
771 double s1, s2, t1, t2;
772
773 /* Add two hi words. */
774 s1 = head_sumB + head_prod;
775 bv = s1 - head_sumB;
776 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
777
778 /* Add two lo words. */
779 t1 = tail_sumB + tail_prod;
780 bv = t1 - tail_sumB;
781 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
782
783 s2 += t1;
784
785 /* Renormalize (s1, s2) to (t1, s2) */
786 t1 = s1 + s2;
787 s2 = s2 - (t1 - s1);
788
789 t2 += s2;
790
791 /* Renormalize (t1, t2) */
792 head_sumB = t1 + t2;
793 tail_sumB = t2 - (head_sumB - t1);
794 }
795 bij += incbij;
796 }
797 /* now put the result into y_i */
798 head_tmp1 = head_sumA;
799 tail_tmp1 = tail_sumA;
800 head_tmp2 = head_sumB;
801 tail_tmp2 = tail_sumB;
802 {
803 /* Compute double-double = double-double + double-double. */
804 double bv;
805 double s1, s2, t1, t2;
806
807 /* Add two hi words. */
808 s1 = head_tmp1 + head_tmp2;
809 bv = s1 - head_tmp1;
810 s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
811
812 /* Add two lo words. */
813 t1 = tail_tmp1 + tail_tmp2;
814 bv = t1 - tail_tmp1;
815 t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
816
817 s2 += t1;
818
819 /* Renormalize (s1, s2) to (t1, s2) */
820 t1 = s1 + s2;
821 s2 = s2 - (t1 - s1);
822
823 t2 += s2;
824
825 /* Renormalize (t1, t2) */
826 head_tmp1 = t1 + t2;
827 tail_tmp1 = t2 - (head_tmp1 - t1);
828 }
829 y_i[yi] = head_tmp1;
830 ai += incai;
831 bi += incbi;
832 }
833 } else {
834 /* alpha is 1.0, beta is other */
835
836 ai = 0;
837 bi = 0;
838 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
839 head_sumA = tail_sumA = 0.0;
840 aij = ai;
841 head_sumB = tail_sumB = 0.0;
842 bij = bi;
843 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
844 x_elem = x_i[xi];
845 a_elem = a_i[aij];
846 {
847 /* Compute double_double = double * double. */
848 double a1, a2, b1, b2, con;
849
850 con = a_elem * split;
851 a1 = con - a_elem;
852 a1 = con - a1;
853 a2 = a_elem - a1;
854 con = x_elem * split;
855 b1 = con - x_elem;
856 b1 = con - b1;
857 b2 = x_elem - b1;
858
859 head_prod = a_elem * x_elem;
860 tail_prod =
861 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
862 }
863 {
864 /* Compute double-double = double-double + double-double. */
865 double bv;
866 double s1, s2, t1, t2;
867
868 /* Add two hi words. */
869 s1 = head_sumA + head_prod;
870 bv = s1 - head_sumA;
871 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
872
873 /* Add two lo words. */
874 t1 = tail_sumA + tail_prod;
875 bv = t1 - tail_sumA;
876 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
877
878 s2 += t1;
879
880 /* Renormalize (s1, s2) to (t1, s2) */
881 t1 = s1 + s2;
882 s2 = s2 - (t1 - s1);
883
884 t2 += s2;
885
886 /* Renormalize (t1, t2) */
887 head_sumA = t1 + t2;
888 tail_sumA = t2 - (head_sumA - t1);
889 }
890 aij += incaij;
891 b_elem = b_i[bij];
892 {
893 /* Compute double_double = double * double. */
894 double a1, a2, b1, b2, con;
895
896 con = b_elem * split;
897 a1 = con - b_elem;
898 a1 = con - a1;
899 a2 = b_elem - a1;
900 con = x_elem * split;
901 b1 = con - x_elem;
902 b1 = con - b1;
903 b2 = x_elem - b1;
904
905 head_prod = b_elem * x_elem;
906 tail_prod =
907 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
908 }
909 {
910 /* Compute double-double = double-double + double-double. */
911 double bv;
912 double s1, s2, t1, t2;
913
914 /* Add two hi words. */
915 s1 = head_sumB + head_prod;
916 bv = s1 - head_sumB;
917 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
918
919 /* Add two lo words. */
920 t1 = tail_sumB + tail_prod;
921 bv = t1 - tail_sumB;
922 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
923
924 s2 += t1;
925
926 /* Renormalize (s1, s2) to (t1, s2) */
927 t1 = s1 + s2;
928 s2 = s2 - (t1 - s1);
929
930 t2 += s2;
931
932 /* Renormalize (t1, t2) */
933 head_sumB = t1 + t2;
934 tail_sumB = t2 - (head_sumB - t1);
935 }
936 bij += incbij;
937 }
938 /* now put the result into y_i */
939 head_tmp1 = head_sumA;
940 tail_tmp1 = tail_sumA;
941 {
942 /* Compute double-double = double-double * double. */
943 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
944
945 con = head_sumB * split;
946 a11 = con - head_sumB;
947 a11 = con - a11;
948 a21 = head_sumB - a11;
949 con = beta_i * split;
950 b1 = con - beta_i;
951 b1 = con - b1;
952 b2 = beta_i - b1;
953
954 c11 = head_sumB * beta_i;
955 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
956
957 c2 = tail_sumB * beta_i;
958 t1 = c11 + c2;
959 t2 = (c2 - (t1 - c11)) + c21;
960
961 head_tmp2 = t1 + t2;
962 tail_tmp2 = t2 - (head_tmp2 - t1);
963 }
964 {
965 /* Compute double-double = double-double + double-double. */
966 double bv;
967 double s1, s2, t1, t2;
968
969 /* Add two hi words. */
970 s1 = head_tmp1 + head_tmp2;
971 bv = s1 - head_tmp1;
972 s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
973
974 /* Add two lo words. */
975 t1 = tail_tmp1 + tail_tmp2;
976 bv = t1 - tail_tmp1;
977 t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
978
979 s2 += t1;
980
981 /* Renormalize (s1, s2) to (t1, s2) */
982 t1 = s1 + s2;
983 s2 = s2 - (t1 - s1);
984
985 t2 += s2;
986
987 /* Renormalize (t1, t2) */
988 head_tmp1 = t1 + t2;
989 tail_tmp1 = t2 - (head_tmp1 - t1);
990 }
991 y_i[yi] = head_tmp1;
992 ai += incai;
993 bi += incbi;
994 }
995 }
996 } else {
997 if (beta_i == 0.0) {
998 /* alpha is other, beta is 0.0 */
999
1000 ai = 0;
1001
1002 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1003 head_sumA = tail_sumA = 0.0;
1004 aij = ai;
1005
1006 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1007 x_elem = x_i[xi];
1008 a_elem = a_i[aij];
1009 {
1010 /* Compute double_double = double * double. */
1011 double a1, a2, b1, b2, con;
1012
1013 con = a_elem * split;
1014 a1 = con - a_elem;
1015 a1 = con - a1;
1016 a2 = a_elem - a1;
1017 con = x_elem * split;
1018 b1 = con - x_elem;
1019 b1 = con - b1;
1020 b2 = x_elem - b1;
1021
1022 head_prod = a_elem * x_elem;
1023 tail_prod =
1024 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
1025 }
1026 {
1027 /* Compute double-double = double-double + double-double. */
1028 double bv;
1029 double s1, s2, t1, t2;
1030
1031 /* Add two hi words. */
1032 s1 = head_sumA + head_prod;
1033 bv = s1 - head_sumA;
1034 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1035
1036 /* Add two lo words. */
1037 t1 = tail_sumA + tail_prod;
1038 bv = t1 - tail_sumA;
1039 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1040
1041 s2 += t1;
1042
1043 /* Renormalize (s1, s2) to (t1, s2) */
1044 t1 = s1 + s2;
1045 s2 = s2 - (t1 - s1);
1046
1047 t2 += s2;
1048
1049 /* Renormalize (t1, t2) */
1050 head_sumA = t1 + t2;
1051 tail_sumA = t2 - (head_sumA - t1);
1052 }
1053 aij += incaij;
1054
1055 }
1056 /* now put the result into y_i */
1057 {
1058 /* Compute double-double = double-double * double. */
1059 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1060
1061 con = head_sumA * split;
1062 a11 = con - head_sumA;
1063 a11 = con - a11;
1064 a21 = head_sumA - a11;
1065 con = alpha_i * split;
1066 b1 = con - alpha_i;
1067 b1 = con - b1;
1068 b2 = alpha_i - b1;
1069
1070 c11 = head_sumA * alpha_i;
1071 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1072
1073 c2 = tail_sumA * alpha_i;
1074 t1 = c11 + c2;
1075 t2 = (c2 - (t1 - c11)) + c21;
1076
1077 head_tmp1 = t1 + t2;
1078 tail_tmp1 = t2 - (head_tmp1 - t1);
1079 }
1080 y_i[yi] = head_tmp1;
1081 ai += incai;
1082
1083 }
1084 } else if (beta_i == 1.0) {
1085 /* alpha is other, beta is 1.0 */
1086
1087 ai = 0;
1088 bi = 0;
1089 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1090 head_sumA = tail_sumA = 0.0;
1091 aij = ai;
1092 head_sumB = tail_sumB = 0.0;
1093 bij = bi;
1094 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1095 x_elem = x_i[xi];
1096 a_elem = a_i[aij];
1097 {
1098 /* Compute double_double = double * double. */
1099 double a1, a2, b1, b2, con;
1100
1101 con = a_elem * split;
1102 a1 = con - a_elem;
1103 a1 = con - a1;
1104 a2 = a_elem - a1;
1105 con = x_elem * split;
1106 b1 = con - x_elem;
1107 b1 = con - b1;
1108 b2 = x_elem - b1;
1109
1110 head_prod = a_elem * x_elem;
1111 tail_prod =
1112 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
1113 }
1114 {
1115 /* Compute double-double = double-double + double-double. */
1116 double bv;
1117 double s1, s2, t1, t2;
1118
1119 /* Add two hi words. */
1120 s1 = head_sumA + head_prod;
1121 bv = s1 - head_sumA;
1122 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1123
1124 /* Add two lo words. */
1125 t1 = tail_sumA + tail_prod;
1126 bv = t1 - tail_sumA;
1127 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1128
1129 s2 += t1;
1130
1131 /* Renormalize (s1, s2) to (t1, s2) */
1132 t1 = s1 + s2;
1133 s2 = s2 - (t1 - s1);
1134
1135 t2 += s2;
1136
1137 /* Renormalize (t1, t2) */
1138 head_sumA = t1 + t2;
1139 tail_sumA = t2 - (head_sumA - t1);
1140 }
1141 aij += incaij;
1142 b_elem = b_i[bij];
1143 {
1144 /* Compute double_double = double * double. */
1145 double a1, a2, b1, b2, con;
1146
1147 con = b_elem * split;
1148 a1 = con - b_elem;
1149 a1 = con - a1;
1150 a2 = b_elem - a1;
1151 con = x_elem * split;
1152 b1 = con - x_elem;
1153 b1 = con - b1;
1154 b2 = x_elem - b1;
1155
1156 head_prod = b_elem * x_elem;
1157 tail_prod =
1158 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
1159 }
1160 {
1161 /* Compute double-double = double-double + double-double. */
1162 double bv;
1163 double s1, s2, t1, t2;
1164
1165 /* Add two hi words. */
1166 s1 = head_sumB + head_prod;
1167 bv = s1 - head_sumB;
1168 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
1169
1170 /* Add two lo words. */
1171 t1 = tail_sumB + tail_prod;
1172 bv = t1 - tail_sumB;
1173 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
1174
1175 s2 += t1;
1176
1177 /* Renormalize (s1, s2) to (t1, s2) */
1178 t1 = s1 + s2;
1179 s2 = s2 - (t1 - s1);
1180
1181 t2 += s2;
1182
1183 /* Renormalize (t1, t2) */
1184 head_sumB = t1 + t2;
1185 tail_sumB = t2 - (head_sumB - t1);
1186 }
1187 bij += incbij;
1188 }
1189 /* now put the result into y_i */
1190 {
1191 /* Compute double-double = double-double * double. */
1192 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1193
1194 con = head_sumA * split;
1195 a11 = con - head_sumA;
1196 a11 = con - a11;
1197 a21 = head_sumA - a11;
1198 con = alpha_i * split;
1199 b1 = con - alpha_i;
1200 b1 = con - b1;
1201 b2 = alpha_i - b1;
1202
1203 c11 = head_sumA * alpha_i;
1204 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1205
1206 c2 = tail_sumA * alpha_i;
1207 t1 = c11 + c2;
1208 t2 = (c2 - (t1 - c11)) + c21;
1209
1210 head_tmp1 = t1 + t2;
1211 tail_tmp1 = t2 - (head_tmp1 - t1);
1212 }
1213 head_tmp2 = head_sumB;
1214 tail_tmp2 = tail_sumB;
1215 {
1216 /* Compute double-double = double-double + double-double. */
1217 double bv;
1218 double s1, s2, t1, t2;
1219
1220 /* Add two hi words. */
1221 s1 = head_tmp1 + head_tmp2;
1222 bv = s1 - head_tmp1;
1223 s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
1224
1225 /* Add two lo words. */
1226 t1 = tail_tmp1 + tail_tmp2;
1227 bv = t1 - tail_tmp1;
1228 t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
1229
1230 s2 += t1;
1231
1232 /* Renormalize (s1, s2) to (t1, s2) */
1233 t1 = s1 + s2;
1234 s2 = s2 - (t1 - s1);
1235
1236 t2 += s2;
1237
1238 /* Renormalize (t1, t2) */
1239 head_tmp1 = t1 + t2;
1240 tail_tmp1 = t2 - (head_tmp1 - t1);
1241 }
1242 y_i[yi] = head_tmp1;
1243 ai += incai;
1244 bi += incbi;
1245 }
1246 } else {
1247 /* most general form, alpha, beta are other */
1248
1249 ai = 0;
1250 bi = 0;
1251 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1252 head_sumA = tail_sumA = 0.0;
1253 aij = ai;
1254 head_sumB = tail_sumB = 0.0;
1255 bij = bi;
1256 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1257 x_elem = x_i[xi];
1258 a_elem = a_i[aij];
1259 {
1260 /* Compute double_double = double * double. */
1261 double a1, a2, b1, b2, con;
1262
1263 con = a_elem * split;
1264 a1 = con - a_elem;
1265 a1 = con - a1;
1266 a2 = a_elem - a1;
1267 con = x_elem * split;
1268 b1 = con - x_elem;
1269 b1 = con - b1;
1270 b2 = x_elem - b1;
1271
1272 head_prod = a_elem * x_elem;
1273 tail_prod =
1274 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
1275 }
1276 {
1277 /* Compute double-double = double-double + double-double. */
1278 double bv;
1279 double s1, s2, t1, t2;
1280
1281 /* Add two hi words. */
1282 s1 = head_sumA + head_prod;
1283 bv = s1 - head_sumA;
1284 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1285
1286 /* Add two lo words. */
1287 t1 = tail_sumA + tail_prod;
1288 bv = t1 - tail_sumA;
1289 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1290
1291 s2 += t1;
1292
1293 /* Renormalize (s1, s2) to (t1, s2) */
1294 t1 = s1 + s2;
1295 s2 = s2 - (t1 - s1);
1296
1297 t2 += s2;
1298
1299 /* Renormalize (t1, t2) */
1300 head_sumA = t1 + t2;
1301 tail_sumA = t2 - (head_sumA - t1);
1302 }
1303 aij += incaij;
1304 b_elem = b_i[bij];
1305 {
1306 /* Compute double_double = double * double. */
1307 double a1, a2, b1, b2, con;
1308
1309 con = b_elem * split;
1310 a1 = con - b_elem;
1311 a1 = con - a1;
1312 a2 = b_elem - a1;
1313 con = x_elem * split;
1314 b1 = con - x_elem;
1315 b1 = con - b1;
1316 b2 = x_elem - b1;
1317
1318 head_prod = b_elem * x_elem;
1319 tail_prod =
1320 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
1321 }
1322 {
1323 /* Compute double-double = double-double + double-double. */
1324 double bv;
1325 double s1, s2, t1, t2;
1326
1327 /* Add two hi words. */
1328 s1 = head_sumB + head_prod;
1329 bv = s1 - head_sumB;
1330 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
1331
1332 /* Add two lo words. */
1333 t1 = tail_sumB + tail_prod;
1334 bv = t1 - tail_sumB;
1335 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
1336
1337 s2 += t1;
1338
1339 /* Renormalize (s1, s2) to (t1, s2) */
1340 t1 = s1 + s2;
1341 s2 = s2 - (t1 - s1);
1342
1343 t2 += s2;
1344
1345 /* Renormalize (t1, t2) */
1346 head_sumB = t1 + t2;
1347 tail_sumB = t2 - (head_sumB - t1);
1348 }
1349 bij += incbij;
1350 }
1351 /* now put the result into y_i */
1352 {
1353 /* Compute double-double = double-double * double. */
1354 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1355
1356 con = head_sumA * split;
1357 a11 = con - head_sumA;
1358 a11 = con - a11;
1359 a21 = head_sumA - a11;
1360 con = alpha_i * split;
1361 b1 = con - alpha_i;
1362 b1 = con - b1;
1363 b2 = alpha_i - b1;
1364
1365 c11 = head_sumA * alpha_i;
1366 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1367
1368 c2 = tail_sumA * alpha_i;
1369 t1 = c11 + c2;
1370 t2 = (c2 - (t1 - c11)) + c21;
1371
1372 head_tmp1 = t1 + t2;
1373 tail_tmp1 = t2 - (head_tmp1 - t1);
1374 }
1375 {
1376 /* Compute double-double = double-double * double. */
1377 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1378
1379 con = head_sumB * split;
1380 a11 = con - head_sumB;
1381 a11 = con - a11;
1382 a21 = head_sumB - a11;
1383 con = beta_i * split;
1384 b1 = con - beta_i;
1385 b1 = con - b1;
1386 b2 = beta_i - b1;
1387
1388 c11 = head_sumB * beta_i;
1389 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1390
1391 c2 = tail_sumB * beta_i;
1392 t1 = c11 + c2;
1393 t2 = (c2 - (t1 - c11)) + c21;
1394
1395 head_tmp2 = t1 + t2;
1396 tail_tmp2 = t2 - (head_tmp2 - t1);
1397 }
1398 {
1399 /* Compute double-double = double-double + double-double. */
1400 double bv;
1401 double s1, s2, t1, t2;
1402
1403 /* Add two hi words. */
1404 s1 = head_tmp1 + head_tmp2;
1405 bv = s1 - head_tmp1;
1406 s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
1407
1408 /* Add two lo words. */
1409 t1 = tail_tmp1 + tail_tmp2;
1410 bv = t1 - tail_tmp1;
1411 t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
1412
1413 s2 += t1;
1414
1415 /* Renormalize (s1, s2) to (t1, s2) */
1416 t1 = s1 + s2;
1417 s2 = s2 - (t1 - s1);
1418
1419 t2 += s2;
1420
1421 /* Renormalize (t1, t2) */
1422 head_tmp1 = t1 + t2;
1423 tail_tmp1 = t2 - (head_tmp1 - t1);
1424 }
1425 y_i[yi] = head_tmp1;
1426 ai += incai;
1427 bi += incbi;
1428 }
1429 }
1430 }
1431 FPU_FIX_STOP;
1432 }
1433 break;
1434
1435 default:
1436 {
1437 BLAS_error(routine_name, -14, prec, 0);
1438 }
1439 break;
1440 }
1441
1442 } /* end BLAS_dge_sum_mv */
1443