1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_cge_sum_mv_s_s_x(enum blas_order_type order,int m,int n,const void * alpha,const float * a,int lda,const float * x,int incx,const void * beta,const float * b,int ldb,void * y,int incy,enum blas_prec_type prec)3 void BLAS_cge_sum_mv_s_s_x(enum blas_order_type order, int m, int n,
4 const void *alpha, const float *a, int lda,
5 const float *x, int incx,
6 const void *beta, const float *b, int ldb,
7 void *y, int incy, enum blas_prec_type prec)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * Computes y = alpha * A * x + beta * B * y,
14 * where A, B are general matricies.
15 *
16 * Arguments
17 * =========
18 *
19 * order (input) blas_order_type
20 * Order of A; row or column major
21 *
22 * m (input) int
23 * Row Dimension of A, B, length of output vector y
24 *
25 * n (input) int
26 * Column Dimension of A, B and the length of vector x
27 *
28 * alpha (input) const void*
29 *
30 * A (input) const float*
31 *
32 * lda (input) int
33 * Leading dimension of A
34 *
35 * x (input) const float*
36 *
37 * incx (input) int
38 * The stride for vector x.
39 *
40 * beta (input) const void*
41 *
42 * b (input) const float*
43 *
44 * ldb (input) int
45 * Leading dimension of B
46 *
47 * y (input/output) void*
48 *
49 * incy (input) int
50 * The stride for vector y.
51 *
52 * prec (input) enum blas_prec_type
53 * Specifies the internal precision to be used.
54 * = blas_prec_single: single precision.
55 * = blas_prec_double: double precision.
56 * = blas_prec_extra : anything at least 1.5 times as accurate
57 * than double, and wider than 80-bits.
58 * We use double-double in our implementation.
59 *
60 */
61 {
62 /* Routine name */
63 static const char routine_name[] = "BLAS_cge_sum_mv_s_s";
64 switch (prec) {
65 case blas_prec_single:{
66 int i, j;
67 int xi, yi;
68 int x_starti, y_starti, incxi, incyi;
69 int lda_min;
70 int ai;
71 int incai;
72 int aij;
73 int incaij;
74 int bi;
75 int incbi;
76 int bij;
77 int incbij;
78
79 const float *a_i = a;
80 const float *b_i = b;
81 const float *x_i = x;
82 float *y_i = (float *) y;
83 float *alpha_i = (float *) alpha;
84 float *beta_i = (float *) beta;
85 float a_elem;
86 float b_elem;
87 float x_elem;
88 float prod;
89 float sumA;
90 float sumB;
91 float tmp1[2];
92 float tmp2[2];
93
94
95
96 /* m is number of rows */
97 /* n is number of columns */
98
99 if (m == 0 || n == 0)
100 return;
101
102
103 /* all error calls */
104 if (order == blas_rowmajor) {
105 lda_min = n;
106 incai = lda; /* row stride */
107 incbi = ldb;
108 incbij = incaij = 1; /* column stride */
109 } else if (order == blas_colmajor) {
110 lda_min = m;
111 incai = incbi = 1; /*row stride */
112 incaij = lda; /* column stride */
113 incbij = ldb;
114 } else {
115 /* error, order not blas_colmajor not blas_rowmajor */
116 BLAS_error(routine_name, -1, order, 0);
117 return;
118 }
119
120 if (m < 0)
121 BLAS_error(routine_name, -2, m, 0);
122 else if (n < 0)
123 BLAS_error(routine_name, -3, n, 0);
124 if (lda < lda_min)
125 BLAS_error(routine_name, -6, lda, 0);
126 else if (ldb < lda_min)
127 BLAS_error(routine_name, -11, ldb, 0);
128 else if (incx == 0)
129 BLAS_error(routine_name, -8, incx, 0);
130 else if (incy == 0)
131 BLAS_error(routine_name, -13, incy, 0);
132
133 incxi = incx;
134 incyi = incy;
135
136 incyi *= 2;
137
138
139
140
141
142 if (incxi > 0)
143 x_starti = 0;
144 else
145 x_starti = (1 - n) * incxi;
146
147 if (incyi > 0)
148 y_starti = 0;
149 else
150 y_starti = (1 - m) * incyi;
151
152
153
154 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
155 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
156 /* alpha, beta are 0.0 */
157 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
158 y_i[yi] = 0.0;
159 y_i[yi + 1] = 0.0;
160 }
161 } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
162 /* alpha is 0.0, beta is 1.0 */
163
164
165 bi = 0;
166 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
167
168 sumB = 0.0;
169 bij = bi;
170 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
171 x_elem = x_i[xi];
172
173 b_elem = b_i[bij];
174 prod = b_elem * x_elem;
175 sumB = sumB + prod;
176 bij += incbij;
177 }
178 /* now put the result into y_i */
179 tmp1[0] = sumB;
180 tmp1[1] = 0.0;
181 y_i[yi] = tmp1[0];
182 y_i[yi + 1] = tmp1[1];
183
184 bi += incbi;
185 }
186 } else {
187 /* alpha is 0.0, beta not 1.0 nor 0.0 */
188
189
190 bi = 0;
191 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
192
193 sumB = 0.0;
194 bij = bi;
195 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
196 x_elem = x_i[xi];
197
198 b_elem = b_i[bij];
199 prod = b_elem * x_elem;
200 sumB = sumB + prod;
201 bij += incbij;
202 }
203 /* now put the result into y_i */
204 {
205 tmp1[0] = beta_i[0] * sumB;
206 tmp1[1] = beta_i[1] * sumB;
207 }
208 y_i[yi] = tmp1[0];
209 y_i[yi + 1] = tmp1[1];
210
211 bi += incbi;
212 }
213 }
214 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
215 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
216 /* alpha is 1.0, beta is 0.0 */
217
218 ai = 0;
219
220 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
221 sumA = 0.0;
222 aij = ai;
223
224 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
225 x_elem = x_i[xi];
226 a_elem = a_i[aij];
227 prod = a_elem * x_elem;
228 sumA = sumA + prod;
229 aij += incaij;
230
231 }
232 /* now put the result into y_i */
233 tmp1[0] = sumA;
234 tmp1[1] = 0.0;
235 y_i[yi] = tmp1[0];
236 y_i[yi + 1] = tmp1[1];
237 ai += incai;
238
239 }
240 } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
241 /* alpha is 1.0, beta is 1.0 */
242
243 ai = 0;
244 bi = 0;
245 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
246 sumA = 0.0;
247 aij = ai;
248 sumB = 0.0;
249 bij = bi;
250 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
251 x_elem = x_i[xi];
252 a_elem = a_i[aij];
253 prod = a_elem * x_elem;
254 sumA = sumA + prod;
255 aij += incaij;
256 b_elem = b_i[bij];
257 prod = b_elem * x_elem;
258 sumB = sumB + prod;
259 bij += incbij;
260 }
261 /* now put the result into y_i */
262 tmp1[0] = sumA;
263 tmp1[1] = 0.0;
264 tmp2[0] = sumB;
265 tmp2[1] = 0.0;
266 tmp1[0] = tmp1[0] + tmp2[0];
267 tmp1[1] = tmp1[1] + tmp2[1];
268 y_i[yi] = tmp1[0];
269 y_i[yi + 1] = tmp1[1];
270 ai += incai;
271 bi += incbi;
272 }
273 } else {
274 /* alpha is 1.0, beta is other */
275
276 ai = 0;
277 bi = 0;
278 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
279 sumA = 0.0;
280 aij = ai;
281 sumB = 0.0;
282 bij = bi;
283 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
284 x_elem = x_i[xi];
285 a_elem = a_i[aij];
286 prod = a_elem * x_elem;
287 sumA = sumA + prod;
288 aij += incaij;
289 b_elem = b_i[bij];
290 prod = b_elem * x_elem;
291 sumB = sumB + prod;
292 bij += incbij;
293 }
294 /* now put the result into y_i */
295 tmp1[0] = sumA;
296 tmp1[1] = 0.0;
297 {
298 tmp2[0] = beta_i[0] * sumB;
299 tmp2[1] = beta_i[1] * sumB;
300 }
301 tmp1[0] = tmp1[0] + tmp2[0];
302 tmp1[1] = tmp1[1] + tmp2[1];
303 y_i[yi] = tmp1[0];
304 y_i[yi + 1] = tmp1[1];
305 ai += incai;
306 bi += incbi;
307 }
308 }
309 } else {
310 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
311 /* alpha is other, beta is 0.0 */
312
313 ai = 0;
314
315 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
316 sumA = 0.0;
317 aij = ai;
318
319 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
320 x_elem = x_i[xi];
321 a_elem = a_i[aij];
322 prod = a_elem * x_elem;
323 sumA = sumA + prod;
324 aij += incaij;
325
326 }
327 /* now put the result into y_i */
328 {
329 tmp1[0] = alpha_i[0] * sumA;
330 tmp1[1] = alpha_i[1] * sumA;
331 }
332 y_i[yi] = tmp1[0];
333 y_i[yi + 1] = tmp1[1];
334 ai += incai;
335
336 }
337 } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
338 /* alpha is other, beta is 1.0 */
339
340 ai = 0;
341 bi = 0;
342 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
343 sumA = 0.0;
344 aij = ai;
345 sumB = 0.0;
346 bij = bi;
347 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
348 x_elem = x_i[xi];
349 a_elem = a_i[aij];
350 prod = a_elem * x_elem;
351 sumA = sumA + prod;
352 aij += incaij;
353 b_elem = b_i[bij];
354 prod = b_elem * x_elem;
355 sumB = sumB + prod;
356 bij += incbij;
357 }
358 /* now put the result into y_i */
359 {
360 tmp1[0] = alpha_i[0] * sumA;
361 tmp1[1] = alpha_i[1] * sumA;
362 }
363 tmp2[0] = sumB;
364 tmp2[1] = 0.0;
365 tmp1[0] = tmp1[0] + tmp2[0];
366 tmp1[1] = tmp1[1] + tmp2[1];
367 y_i[yi] = tmp1[0];
368 y_i[yi + 1] = tmp1[1];
369 ai += incai;
370 bi += incbi;
371 }
372 } else {
373 /* most general form, alpha, beta are other */
374
375 ai = 0;
376 bi = 0;
377 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
378 sumA = 0.0;
379 aij = ai;
380 sumB = 0.0;
381 bij = bi;
382 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
383 x_elem = x_i[xi];
384 a_elem = a_i[aij];
385 prod = a_elem * x_elem;
386 sumA = sumA + prod;
387 aij += incaij;
388 b_elem = b_i[bij];
389 prod = b_elem * x_elem;
390 sumB = sumB + prod;
391 bij += incbij;
392 }
393 /* now put the result into y_i */
394 {
395 tmp1[0] = alpha_i[0] * sumA;
396 tmp1[1] = alpha_i[1] * sumA;
397 }
398 {
399 tmp2[0] = beta_i[0] * sumB;
400 tmp2[1] = beta_i[1] * sumB;
401 }
402 tmp1[0] = tmp1[0] + tmp2[0];
403 tmp1[1] = tmp1[1] + tmp2[1];
404 y_i[yi] = tmp1[0];
405 y_i[yi + 1] = tmp1[1];
406 ai += incai;
407 bi += incbi;
408 }
409 }
410 }
411
412
413 break;
414 }
415 case blas_prec_indigenous:
416 case blas_prec_double:
417 {
418 int i, j;
419 int xi, yi;
420 int x_starti, y_starti, incxi, incyi;
421 int lda_min;
422 int ai;
423 int incai;
424 int aij;
425 int incaij;
426 int bi;
427 int incbi;
428 int bij;
429 int incbij;
430
431 const float *a_i = a;
432 const float *b_i = b;
433 const float *x_i = x;
434 float *y_i = (float *) y;
435 float *alpha_i = (float *) alpha;
436 float *beta_i = (float *) beta;
437 float a_elem;
438 float b_elem;
439 float x_elem;
440 double prod;
441 double sumA;
442 double sumB;
443 double tmp1[2];
444 double tmp2[2];
445
446
447
448 /* m is number of rows */
449 /* n is number of columns */
450
451 if (m == 0 || n == 0)
452 return;
453
454
455 /* all error calls */
456 if (order == blas_rowmajor) {
457 lda_min = n;
458 incai = lda; /* row stride */
459 incbi = ldb;
460 incbij = incaij = 1; /* column stride */
461 } else if (order == blas_colmajor) {
462 lda_min = m;
463 incai = incbi = 1; /*row stride */
464 incaij = lda; /* column stride */
465 incbij = ldb;
466 } else {
467 /* error, order not blas_colmajor not blas_rowmajor */
468 BLAS_error(routine_name, -1, order, 0);
469 return;
470 }
471
472 if (m < 0)
473 BLAS_error(routine_name, -2, m, 0);
474 else if (n < 0)
475 BLAS_error(routine_name, -3, n, 0);
476 if (lda < lda_min)
477 BLAS_error(routine_name, -6, lda, 0);
478 else if (ldb < lda_min)
479 BLAS_error(routine_name, -11, ldb, 0);
480 else if (incx == 0)
481 BLAS_error(routine_name, -8, incx, 0);
482 else if (incy == 0)
483 BLAS_error(routine_name, -13, incy, 0);
484
485 incxi = incx;
486 incyi = incy;
487
488 incyi *= 2;
489
490
491
492
493
494 if (incxi > 0)
495 x_starti = 0;
496 else
497 x_starti = (1 - n) * incxi;
498
499 if (incyi > 0)
500 y_starti = 0;
501 else
502 y_starti = (1 - m) * incyi;
503
504
505
506 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
507 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
508 /* alpha, beta are 0.0 */
509 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
510 y_i[yi] = 0.0;
511 y_i[yi + 1] = 0.0;
512 }
513 } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
514 /* alpha is 0.0, beta is 1.0 */
515
516
517 bi = 0;
518 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
519
520 sumB = 0.0;
521 bij = bi;
522 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
523 x_elem = x_i[xi];
524
525 b_elem = b_i[bij];
526 prod = (double) b_elem *x_elem;
527 sumB = sumB + prod;
528 bij += incbij;
529 }
530 /* now put the result into y_i */
531 tmp1[0] = sumB;
532 tmp1[1] = 0.0;
533 y_i[yi] = tmp1[0];
534 y_i[yi + 1] = tmp1[1];
535
536 bi += incbi;
537 }
538 } else {
539 /* alpha is 0.0, beta not 1.0 nor 0.0 */
540
541
542 bi = 0;
543 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
544
545 sumB = 0.0;
546 bij = bi;
547 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
548 x_elem = x_i[xi];
549
550 b_elem = b_i[bij];
551 prod = (double) b_elem *x_elem;
552 sumB = sumB + prod;
553 bij += incbij;
554 }
555 /* now put the result into y_i */
556 {
557 tmp1[0] = beta_i[0] * sumB;
558 tmp1[1] = beta_i[1] * sumB;
559 }
560 y_i[yi] = tmp1[0];
561 y_i[yi + 1] = tmp1[1];
562
563 bi += incbi;
564 }
565 }
566 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
567 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
568 /* alpha is 1.0, beta is 0.0 */
569
570 ai = 0;
571
572 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
573 sumA = 0.0;
574 aij = ai;
575
576 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
577 x_elem = x_i[xi];
578 a_elem = a_i[aij];
579 prod = (double) a_elem *x_elem;
580 sumA = sumA + prod;
581 aij += incaij;
582
583 }
584 /* now put the result into y_i */
585 tmp1[0] = sumA;
586 tmp1[1] = 0.0;
587 y_i[yi] = tmp1[0];
588 y_i[yi + 1] = tmp1[1];
589 ai += incai;
590
591 }
592 } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
593 /* alpha is 1.0, beta is 1.0 */
594
595 ai = 0;
596 bi = 0;
597 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
598 sumA = 0.0;
599 aij = ai;
600 sumB = 0.0;
601 bij = bi;
602 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
603 x_elem = x_i[xi];
604 a_elem = a_i[aij];
605 prod = (double) a_elem *x_elem;
606 sumA = sumA + prod;
607 aij += incaij;
608 b_elem = b_i[bij];
609 prod = (double) b_elem *x_elem;
610 sumB = sumB + prod;
611 bij += incbij;
612 }
613 /* now put the result into y_i */
614 tmp1[0] = sumA;
615 tmp1[1] = 0.0;
616 tmp2[0] = sumB;
617 tmp2[1] = 0.0;
618 tmp1[0] = tmp1[0] + tmp2[0];
619 tmp1[1] = tmp1[1] + tmp2[1];
620 y_i[yi] = tmp1[0];
621 y_i[yi + 1] = tmp1[1];
622 ai += incai;
623 bi += incbi;
624 }
625 } else {
626 /* alpha is 1.0, beta is other */
627
628 ai = 0;
629 bi = 0;
630 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
631 sumA = 0.0;
632 aij = ai;
633 sumB = 0.0;
634 bij = bi;
635 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
636 x_elem = x_i[xi];
637 a_elem = a_i[aij];
638 prod = (double) a_elem *x_elem;
639 sumA = sumA + prod;
640 aij += incaij;
641 b_elem = b_i[bij];
642 prod = (double) b_elem *x_elem;
643 sumB = sumB + prod;
644 bij += incbij;
645 }
646 /* now put the result into y_i */
647 tmp1[0] = sumA;
648 tmp1[1] = 0.0;
649 {
650 tmp2[0] = beta_i[0] * sumB;
651 tmp2[1] = beta_i[1] * sumB;
652 }
653 tmp1[0] = tmp1[0] + tmp2[0];
654 tmp1[1] = tmp1[1] + tmp2[1];
655 y_i[yi] = tmp1[0];
656 y_i[yi + 1] = tmp1[1];
657 ai += incai;
658 bi += incbi;
659 }
660 }
661 } else {
662 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
663 /* alpha is other, beta is 0.0 */
664
665 ai = 0;
666
667 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
668 sumA = 0.0;
669 aij = ai;
670
671 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
672 x_elem = x_i[xi];
673 a_elem = a_i[aij];
674 prod = (double) a_elem *x_elem;
675 sumA = sumA + prod;
676 aij += incaij;
677
678 }
679 /* now put the result into y_i */
680 {
681 tmp1[0] = alpha_i[0] * sumA;
682 tmp1[1] = alpha_i[1] * sumA;
683 }
684 y_i[yi] = tmp1[0];
685 y_i[yi + 1] = tmp1[1];
686 ai += incai;
687
688 }
689 } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
690 /* alpha is other, beta is 1.0 */
691
692 ai = 0;
693 bi = 0;
694 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
695 sumA = 0.0;
696 aij = ai;
697 sumB = 0.0;
698 bij = bi;
699 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
700 x_elem = x_i[xi];
701 a_elem = a_i[aij];
702 prod = (double) a_elem *x_elem;
703 sumA = sumA + prod;
704 aij += incaij;
705 b_elem = b_i[bij];
706 prod = (double) b_elem *x_elem;
707 sumB = sumB + prod;
708 bij += incbij;
709 }
710 /* now put the result into y_i */
711 {
712 tmp1[0] = alpha_i[0] * sumA;
713 tmp1[1] = alpha_i[1] * sumA;
714 }
715 tmp2[0] = sumB;
716 tmp2[1] = 0.0;
717 tmp1[0] = tmp1[0] + tmp2[0];
718 tmp1[1] = tmp1[1] + tmp2[1];
719 y_i[yi] = tmp1[0];
720 y_i[yi + 1] = tmp1[1];
721 ai += incai;
722 bi += incbi;
723 }
724 } else {
725 /* most general form, alpha, beta are other */
726
727 ai = 0;
728 bi = 0;
729 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
730 sumA = 0.0;
731 aij = ai;
732 sumB = 0.0;
733 bij = bi;
734 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
735 x_elem = x_i[xi];
736 a_elem = a_i[aij];
737 prod = (double) a_elem *x_elem;
738 sumA = sumA + prod;
739 aij += incaij;
740 b_elem = b_i[bij];
741 prod = (double) b_elem *x_elem;
742 sumB = sumB + prod;
743 bij += incbij;
744 }
745 /* now put the result into y_i */
746 {
747 tmp1[0] = alpha_i[0] * sumA;
748 tmp1[1] = alpha_i[1] * sumA;
749 }
750 {
751 tmp2[0] = beta_i[0] * sumB;
752 tmp2[1] = beta_i[1] * sumB;
753 }
754 tmp1[0] = tmp1[0] + tmp2[0];
755 tmp1[1] = tmp1[1] + tmp2[1];
756 y_i[yi] = tmp1[0];
757 y_i[yi + 1] = tmp1[1];
758 ai += incai;
759 bi += incbi;
760 }
761 }
762 }
763
764 }
765 break;
766
767 case blas_prec_extra:
768 {
769 int i, j;
770 int xi, yi;
771 int x_starti, y_starti, incxi, incyi;
772 int lda_min;
773 int ai;
774 int incai;
775 int aij;
776 int incaij;
777 int bi;
778 int incbi;
779 int bij;
780 int incbij;
781
782 const float *a_i = a;
783 const float *b_i = b;
784 const float *x_i = x;
785 float *y_i = (float *) y;
786 float *alpha_i = (float *) alpha;
787 float *beta_i = (float *) beta;
788 float a_elem;
789 float b_elem;
790 float x_elem;
791 double head_prod, tail_prod;
792 double head_sumA, tail_sumA;
793 double head_sumB, tail_sumB;
794 double head_tmp1[2], tail_tmp1[2];
795 double head_tmp2[2], tail_tmp2[2];
796
797 FPU_FIX_DECL;
798
799 /* m is number of rows */
800 /* n is number of columns */
801
802 if (m == 0 || n == 0)
803 return;
804
805
806 /* all error calls */
807 if (order == blas_rowmajor) {
808 lda_min = n;
809 incai = lda; /* row stride */
810 incbi = ldb;
811 incbij = incaij = 1; /* column stride */
812 } else if (order == blas_colmajor) {
813 lda_min = m;
814 incai = incbi = 1; /*row stride */
815 incaij = lda; /* column stride */
816 incbij = ldb;
817 } else {
818 /* error, order not blas_colmajor not blas_rowmajor */
819 BLAS_error(routine_name, -1, order, 0);
820 return;
821 }
822
823 if (m < 0)
824 BLAS_error(routine_name, -2, m, 0);
825 else if (n < 0)
826 BLAS_error(routine_name, -3, n, 0);
827 if (lda < lda_min)
828 BLAS_error(routine_name, -6, lda, 0);
829 else if (ldb < lda_min)
830 BLAS_error(routine_name, -11, ldb, 0);
831 else if (incx == 0)
832 BLAS_error(routine_name, -8, incx, 0);
833 else if (incy == 0)
834 BLAS_error(routine_name, -13, incy, 0);
835
836 incxi = incx;
837 incyi = incy;
838
839 incyi *= 2;
840
841
842
843
844
845 if (incxi > 0)
846 x_starti = 0;
847 else
848 x_starti = (1 - n) * incxi;
849
850 if (incyi > 0)
851 y_starti = 0;
852 else
853 y_starti = (1 - m) * incyi;
854
855 FPU_FIX_START;
856
857 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
858 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
859 /* alpha, beta are 0.0 */
860 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
861 y_i[yi] = 0.0;
862 y_i[yi + 1] = 0.0;
863 }
864 } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
865 /* alpha is 0.0, beta is 1.0 */
866
867
868 bi = 0;
869 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
870
871 head_sumB = tail_sumB = 0.0;
872 bij = bi;
873 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
874 x_elem = x_i[xi];
875
876 b_elem = b_i[bij];
877 head_prod = (double) b_elem *x_elem;
878 tail_prod = 0.0;
879 {
880 /* Compute double-double = double-double + double-double. */
881 double bv;
882 double s1, s2, t1, t2;
883
884 /* Add two hi words. */
885 s1 = head_sumB + head_prod;
886 bv = s1 - head_sumB;
887 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
888
889 /* Add two lo words. */
890 t1 = tail_sumB + tail_prod;
891 bv = t1 - tail_sumB;
892 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
893
894 s2 += t1;
895
896 /* Renormalize (s1, s2) to (t1, s2) */
897 t1 = s1 + s2;
898 s2 = s2 - (t1 - s1);
899
900 t2 += s2;
901
902 /* Renormalize (t1, t2) */
903 head_sumB = t1 + t2;
904 tail_sumB = t2 - (head_sumB - t1);
905 }
906 bij += incbij;
907 }
908 /* now put the result into y_i */
909 head_tmp1[0] = head_sumB;
910 tail_tmp1[0] = tail_sumB;
911 head_tmp1[1] = tail_tmp1[1] = 0.0;
912 y_i[yi] = head_tmp1[0];
913 y_i[yi + 1] = head_tmp1[1];
914
915 bi += incbi;
916 }
917 } else {
918 /* alpha is 0.0, beta not 1.0 nor 0.0 */
919
920
921 bi = 0;
922 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
923
924 head_sumB = tail_sumB = 0.0;
925 bij = bi;
926 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
927 x_elem = x_i[xi];
928
929 b_elem = b_i[bij];
930 head_prod = (double) b_elem *x_elem;
931 tail_prod = 0.0;
932 {
933 /* Compute double-double = double-double + double-double. */
934 double bv;
935 double s1, s2, t1, t2;
936
937 /* Add two hi words. */
938 s1 = head_sumB + head_prod;
939 bv = s1 - head_sumB;
940 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
941
942 /* Add two lo words. */
943 t1 = tail_sumB + tail_prod;
944 bv = t1 - tail_sumB;
945 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
946
947 s2 += t1;
948
949 /* Renormalize (s1, s2) to (t1, s2) */
950 t1 = s1 + s2;
951 s2 = s2 - (t1 - s1);
952
953 t2 += s2;
954
955 /* Renormalize (t1, t2) */
956 head_sumB = t1 + t2;
957 tail_sumB = t2 - (head_sumB - t1);
958 }
959 bij += incbij;
960 }
961 /* now put the result into y_i */
962 {
963 double head_e1, tail_e1;
964 double dt;
965 dt = (double) beta_i[0];
966 {
967 /* Compute double-double = double-double * double. */
968 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
969
970 con = head_sumB * split;
971 a11 = con - head_sumB;
972 a11 = con - a11;
973 a21 = head_sumB - a11;
974 con = dt * split;
975 b1 = con - dt;
976 b1 = con - b1;
977 b2 = dt - b1;
978
979 c11 = head_sumB * dt;
980 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
981
982 c2 = tail_sumB * dt;
983 t1 = c11 + c2;
984 t2 = (c2 - (t1 - c11)) + c21;
985
986 head_e1 = t1 + t2;
987 tail_e1 = t2 - (head_e1 - t1);
988 }
989 head_tmp1[0] = head_e1;
990 tail_tmp1[0] = tail_e1;
991 dt = (double) beta_i[1];
992 {
993 /* Compute double-double = double-double * double. */
994 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
995
996 con = head_sumB * split;
997 a11 = con - head_sumB;
998 a11 = con - a11;
999 a21 = head_sumB - a11;
1000 con = dt * split;
1001 b1 = con - dt;
1002 b1 = con - b1;
1003 b2 = dt - b1;
1004
1005 c11 = head_sumB * dt;
1006 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1007
1008 c2 = tail_sumB * dt;
1009 t1 = c11 + c2;
1010 t2 = (c2 - (t1 - c11)) + c21;
1011
1012 head_e1 = t1 + t2;
1013 tail_e1 = t2 - (head_e1 - t1);
1014 }
1015 head_tmp1[1] = head_e1;
1016 tail_tmp1[1] = tail_e1;
1017 }
1018 y_i[yi] = head_tmp1[0];
1019 y_i[yi + 1] = head_tmp1[1];
1020
1021 bi += incbi;
1022 }
1023 }
1024 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
1025 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
1026 /* alpha is 1.0, beta is 0.0 */
1027
1028 ai = 0;
1029
1030 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1031 head_sumA = tail_sumA = 0.0;
1032 aij = ai;
1033
1034 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1035 x_elem = x_i[xi];
1036 a_elem = a_i[aij];
1037 head_prod = (double) a_elem *x_elem;
1038 tail_prod = 0.0;
1039 {
1040 /* Compute double-double = double-double + double-double. */
1041 double bv;
1042 double s1, s2, t1, t2;
1043
1044 /* Add two hi words. */
1045 s1 = head_sumA + head_prod;
1046 bv = s1 - head_sumA;
1047 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1048
1049 /* Add two lo words. */
1050 t1 = tail_sumA + tail_prod;
1051 bv = t1 - tail_sumA;
1052 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1053
1054 s2 += t1;
1055
1056 /* Renormalize (s1, s2) to (t1, s2) */
1057 t1 = s1 + s2;
1058 s2 = s2 - (t1 - s1);
1059
1060 t2 += s2;
1061
1062 /* Renormalize (t1, t2) */
1063 head_sumA = t1 + t2;
1064 tail_sumA = t2 - (head_sumA - t1);
1065 }
1066 aij += incaij;
1067
1068 }
1069 /* now put the result into y_i */
1070 head_tmp1[0] = head_sumA;
1071 tail_tmp1[0] = tail_sumA;
1072 head_tmp1[1] = tail_tmp1[1] = 0.0;
1073 y_i[yi] = head_tmp1[0];
1074 y_i[yi + 1] = head_tmp1[1];
1075 ai += incai;
1076
1077 }
1078 } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
1079 /* alpha is 1.0, beta is 1.0 */
1080
1081 ai = 0;
1082 bi = 0;
1083 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1084 head_sumA = tail_sumA = 0.0;
1085 aij = ai;
1086 head_sumB = tail_sumB = 0.0;
1087 bij = bi;
1088 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1089 x_elem = x_i[xi];
1090 a_elem = a_i[aij];
1091 head_prod = (double) a_elem *x_elem;
1092 tail_prod = 0.0;
1093 {
1094 /* Compute double-double = double-double + double-double. */
1095 double bv;
1096 double s1, s2, t1, t2;
1097
1098 /* Add two hi words. */
1099 s1 = head_sumA + head_prod;
1100 bv = s1 - head_sumA;
1101 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1102
1103 /* Add two lo words. */
1104 t1 = tail_sumA + tail_prod;
1105 bv = t1 - tail_sumA;
1106 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1107
1108 s2 += t1;
1109
1110 /* Renormalize (s1, s2) to (t1, s2) */
1111 t1 = s1 + s2;
1112 s2 = s2 - (t1 - s1);
1113
1114 t2 += s2;
1115
1116 /* Renormalize (t1, t2) */
1117 head_sumA = t1 + t2;
1118 tail_sumA = t2 - (head_sumA - t1);
1119 }
1120 aij += incaij;
1121 b_elem = b_i[bij];
1122 head_prod = (double) b_elem *x_elem;
1123 tail_prod = 0.0;
1124 {
1125 /* Compute double-double = double-double + double-double. */
1126 double bv;
1127 double s1, s2, t1, t2;
1128
1129 /* Add two hi words. */
1130 s1 = head_sumB + head_prod;
1131 bv = s1 - head_sumB;
1132 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
1133
1134 /* Add two lo words. */
1135 t1 = tail_sumB + tail_prod;
1136 bv = t1 - tail_sumB;
1137 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
1138
1139 s2 += t1;
1140
1141 /* Renormalize (s1, s2) to (t1, s2) */
1142 t1 = s1 + s2;
1143 s2 = s2 - (t1 - s1);
1144
1145 t2 += s2;
1146
1147 /* Renormalize (t1, t2) */
1148 head_sumB = t1 + t2;
1149 tail_sumB = t2 - (head_sumB - t1);
1150 }
1151 bij += incbij;
1152 }
1153 /* now put the result into y_i */
1154 head_tmp1[0] = head_sumA;
1155 tail_tmp1[0] = tail_sumA;
1156 head_tmp1[1] = tail_tmp1[1] = 0.0;
1157 head_tmp2[0] = head_sumB;
1158 tail_tmp2[0] = tail_sumB;
1159 head_tmp2[1] = tail_tmp2[1] = 0.0;
1160 {
1161 double head_t, tail_t;
1162 double head_a, tail_a;
1163 double head_b, tail_b;
1164 /* Real part */
1165 head_a = head_tmp1[0];
1166 tail_a = tail_tmp1[0];
1167 head_b = head_tmp2[0];
1168 tail_b = tail_tmp2[0];
1169 {
1170 /* Compute double-double = double-double + double-double. */
1171 double bv;
1172 double s1, s2, t1, t2;
1173
1174 /* Add two hi words. */
1175 s1 = head_a + head_b;
1176 bv = s1 - head_a;
1177 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1178
1179 /* Add two lo words. */
1180 t1 = tail_a + tail_b;
1181 bv = t1 - tail_a;
1182 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1183
1184 s2 += t1;
1185
1186 /* Renormalize (s1, s2) to (t1, s2) */
1187 t1 = s1 + s2;
1188 s2 = s2 - (t1 - s1);
1189
1190 t2 += s2;
1191
1192 /* Renormalize (t1, t2) */
1193 head_t = t1 + t2;
1194 tail_t = t2 - (head_t - t1);
1195 }
1196 head_tmp1[0] = head_t;
1197 tail_tmp1[0] = tail_t;
1198 /* Imaginary part */
1199 head_a = head_tmp1[1];
1200 tail_a = tail_tmp1[1];
1201 head_b = head_tmp2[1];
1202 tail_b = tail_tmp2[1];
1203 {
1204 /* Compute double-double = double-double + double-double. */
1205 double bv;
1206 double s1, s2, t1, t2;
1207
1208 /* Add two hi words. */
1209 s1 = head_a + head_b;
1210 bv = s1 - head_a;
1211 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1212
1213 /* Add two lo words. */
1214 t1 = tail_a + tail_b;
1215 bv = t1 - tail_a;
1216 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1217
1218 s2 += t1;
1219
1220 /* Renormalize (s1, s2) to (t1, s2) */
1221 t1 = s1 + s2;
1222 s2 = s2 - (t1 - s1);
1223
1224 t2 += s2;
1225
1226 /* Renormalize (t1, t2) */
1227 head_t = t1 + t2;
1228 tail_t = t2 - (head_t - t1);
1229 }
1230 head_tmp1[1] = head_t;
1231 tail_tmp1[1] = tail_t;
1232 }
1233 y_i[yi] = head_tmp1[0];
1234 y_i[yi + 1] = head_tmp1[1];
1235 ai += incai;
1236 bi += incbi;
1237 }
1238 } else {
1239 /* alpha is 1.0, beta is other */
1240
1241 ai = 0;
1242 bi = 0;
1243 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1244 head_sumA = tail_sumA = 0.0;
1245 aij = ai;
1246 head_sumB = tail_sumB = 0.0;
1247 bij = bi;
1248 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1249 x_elem = x_i[xi];
1250 a_elem = a_i[aij];
1251 head_prod = (double) a_elem *x_elem;
1252 tail_prod = 0.0;
1253 {
1254 /* Compute double-double = double-double + double-double. */
1255 double bv;
1256 double s1, s2, t1, t2;
1257
1258 /* Add two hi words. */
1259 s1 = head_sumA + head_prod;
1260 bv = s1 - head_sumA;
1261 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1262
1263 /* Add two lo words. */
1264 t1 = tail_sumA + tail_prod;
1265 bv = t1 - tail_sumA;
1266 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1267
1268 s2 += t1;
1269
1270 /* Renormalize (s1, s2) to (t1, s2) */
1271 t1 = s1 + s2;
1272 s2 = s2 - (t1 - s1);
1273
1274 t2 += s2;
1275
1276 /* Renormalize (t1, t2) */
1277 head_sumA = t1 + t2;
1278 tail_sumA = t2 - (head_sumA - t1);
1279 }
1280 aij += incaij;
1281 b_elem = b_i[bij];
1282 head_prod = (double) b_elem *x_elem;
1283 tail_prod = 0.0;
1284 {
1285 /* Compute double-double = double-double + double-double. */
1286 double bv;
1287 double s1, s2, t1, t2;
1288
1289 /* Add two hi words. */
1290 s1 = head_sumB + head_prod;
1291 bv = s1 - head_sumB;
1292 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
1293
1294 /* Add two lo words. */
1295 t1 = tail_sumB + tail_prod;
1296 bv = t1 - tail_sumB;
1297 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
1298
1299 s2 += t1;
1300
1301 /* Renormalize (s1, s2) to (t1, s2) */
1302 t1 = s1 + s2;
1303 s2 = s2 - (t1 - s1);
1304
1305 t2 += s2;
1306
1307 /* Renormalize (t1, t2) */
1308 head_sumB = t1 + t2;
1309 tail_sumB = t2 - (head_sumB - t1);
1310 }
1311 bij += incbij;
1312 }
1313 /* now put the result into y_i */
1314 head_tmp1[0] = head_sumA;
1315 tail_tmp1[0] = tail_sumA;
1316 head_tmp1[1] = tail_tmp1[1] = 0.0;
1317 {
1318 double head_e1, tail_e1;
1319 double dt;
1320 dt = (double) beta_i[0];
1321 {
1322 /* Compute double-double = double-double * double. */
1323 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1324
1325 con = head_sumB * split;
1326 a11 = con - head_sumB;
1327 a11 = con - a11;
1328 a21 = head_sumB - a11;
1329 con = dt * split;
1330 b1 = con - dt;
1331 b1 = con - b1;
1332 b2 = dt - b1;
1333
1334 c11 = head_sumB * dt;
1335 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1336
1337 c2 = tail_sumB * dt;
1338 t1 = c11 + c2;
1339 t2 = (c2 - (t1 - c11)) + c21;
1340
1341 head_e1 = t1 + t2;
1342 tail_e1 = t2 - (head_e1 - t1);
1343 }
1344 head_tmp2[0] = head_e1;
1345 tail_tmp2[0] = tail_e1;
1346 dt = (double) beta_i[1];
1347 {
1348 /* Compute double-double = double-double * double. */
1349 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1350
1351 con = head_sumB * split;
1352 a11 = con - head_sumB;
1353 a11 = con - a11;
1354 a21 = head_sumB - a11;
1355 con = dt * split;
1356 b1 = con - dt;
1357 b1 = con - b1;
1358 b2 = dt - b1;
1359
1360 c11 = head_sumB * dt;
1361 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1362
1363 c2 = tail_sumB * dt;
1364 t1 = c11 + c2;
1365 t2 = (c2 - (t1 - c11)) + c21;
1366
1367 head_e1 = t1 + t2;
1368 tail_e1 = t2 - (head_e1 - t1);
1369 }
1370 head_tmp2[1] = head_e1;
1371 tail_tmp2[1] = tail_e1;
1372 }
1373 {
1374 double head_t, tail_t;
1375 double head_a, tail_a;
1376 double head_b, tail_b;
1377 /* Real part */
1378 head_a = head_tmp1[0];
1379 tail_a = tail_tmp1[0];
1380 head_b = head_tmp2[0];
1381 tail_b = tail_tmp2[0];
1382 {
1383 /* Compute double-double = double-double + double-double. */
1384 double bv;
1385 double s1, s2, t1, t2;
1386
1387 /* Add two hi words. */
1388 s1 = head_a + head_b;
1389 bv = s1 - head_a;
1390 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1391
1392 /* Add two lo words. */
1393 t1 = tail_a + tail_b;
1394 bv = t1 - tail_a;
1395 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1396
1397 s2 += t1;
1398
1399 /* Renormalize (s1, s2) to (t1, s2) */
1400 t1 = s1 + s2;
1401 s2 = s2 - (t1 - s1);
1402
1403 t2 += s2;
1404
1405 /* Renormalize (t1, t2) */
1406 head_t = t1 + t2;
1407 tail_t = t2 - (head_t - t1);
1408 }
1409 head_tmp1[0] = head_t;
1410 tail_tmp1[0] = tail_t;
1411 /* Imaginary part */
1412 head_a = head_tmp1[1];
1413 tail_a = tail_tmp1[1];
1414 head_b = head_tmp2[1];
1415 tail_b = tail_tmp2[1];
1416 {
1417 /* Compute double-double = double-double + double-double. */
1418 double bv;
1419 double s1, s2, t1, t2;
1420
1421 /* Add two hi words. */
1422 s1 = head_a + head_b;
1423 bv = s1 - head_a;
1424 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1425
1426 /* Add two lo words. */
1427 t1 = tail_a + tail_b;
1428 bv = t1 - tail_a;
1429 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1430
1431 s2 += t1;
1432
1433 /* Renormalize (s1, s2) to (t1, s2) */
1434 t1 = s1 + s2;
1435 s2 = s2 - (t1 - s1);
1436
1437 t2 += s2;
1438
1439 /* Renormalize (t1, t2) */
1440 head_t = t1 + t2;
1441 tail_t = t2 - (head_t - t1);
1442 }
1443 head_tmp1[1] = head_t;
1444 tail_tmp1[1] = tail_t;
1445 }
1446 y_i[yi] = head_tmp1[0];
1447 y_i[yi + 1] = head_tmp1[1];
1448 ai += incai;
1449 bi += incbi;
1450 }
1451 }
1452 } else {
1453 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
1454 /* alpha is other, beta is 0.0 */
1455
1456 ai = 0;
1457
1458 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1459 head_sumA = tail_sumA = 0.0;
1460 aij = ai;
1461
1462 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1463 x_elem = x_i[xi];
1464 a_elem = a_i[aij];
1465 head_prod = (double) a_elem *x_elem;
1466 tail_prod = 0.0;
1467 {
1468 /* Compute double-double = double-double + double-double. */
1469 double bv;
1470 double s1, s2, t1, t2;
1471
1472 /* Add two hi words. */
1473 s1 = head_sumA + head_prod;
1474 bv = s1 - head_sumA;
1475 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1476
1477 /* Add two lo words. */
1478 t1 = tail_sumA + tail_prod;
1479 bv = t1 - tail_sumA;
1480 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1481
1482 s2 += t1;
1483
1484 /* Renormalize (s1, s2) to (t1, s2) */
1485 t1 = s1 + s2;
1486 s2 = s2 - (t1 - s1);
1487
1488 t2 += s2;
1489
1490 /* Renormalize (t1, t2) */
1491 head_sumA = t1 + t2;
1492 tail_sumA = t2 - (head_sumA - t1);
1493 }
1494 aij += incaij;
1495
1496 }
1497 /* now put the result into y_i */
1498 {
1499 double head_e1, tail_e1;
1500 double dt;
1501 dt = (double) alpha_i[0];
1502 {
1503 /* Compute double-double = double-double * double. */
1504 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1505
1506 con = head_sumA * split;
1507 a11 = con - head_sumA;
1508 a11 = con - a11;
1509 a21 = head_sumA - a11;
1510 con = dt * split;
1511 b1 = con - dt;
1512 b1 = con - b1;
1513 b2 = dt - b1;
1514
1515 c11 = head_sumA * dt;
1516 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1517
1518 c2 = tail_sumA * dt;
1519 t1 = c11 + c2;
1520 t2 = (c2 - (t1 - c11)) + c21;
1521
1522 head_e1 = t1 + t2;
1523 tail_e1 = t2 - (head_e1 - t1);
1524 }
1525 head_tmp1[0] = head_e1;
1526 tail_tmp1[0] = tail_e1;
1527 dt = (double) alpha_i[1];
1528 {
1529 /* Compute double-double = double-double * double. */
1530 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1531
1532 con = head_sumA * split;
1533 a11 = con - head_sumA;
1534 a11 = con - a11;
1535 a21 = head_sumA - a11;
1536 con = dt * split;
1537 b1 = con - dt;
1538 b1 = con - b1;
1539 b2 = dt - b1;
1540
1541 c11 = head_sumA * dt;
1542 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1543
1544 c2 = tail_sumA * dt;
1545 t1 = c11 + c2;
1546 t2 = (c2 - (t1 - c11)) + c21;
1547
1548 head_e1 = t1 + t2;
1549 tail_e1 = t2 - (head_e1 - t1);
1550 }
1551 head_tmp1[1] = head_e1;
1552 tail_tmp1[1] = tail_e1;
1553 }
1554 y_i[yi] = head_tmp1[0];
1555 y_i[yi + 1] = head_tmp1[1];
1556 ai += incai;
1557
1558 }
1559 } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
1560 /* alpha is other, beta is 1.0 */
1561
1562 ai = 0;
1563 bi = 0;
1564 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1565 head_sumA = tail_sumA = 0.0;
1566 aij = ai;
1567 head_sumB = tail_sumB = 0.0;
1568 bij = bi;
1569 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1570 x_elem = x_i[xi];
1571 a_elem = a_i[aij];
1572 head_prod = (double) a_elem *x_elem;
1573 tail_prod = 0.0;
1574 {
1575 /* Compute double-double = double-double + double-double. */
1576 double bv;
1577 double s1, s2, t1, t2;
1578
1579 /* Add two hi words. */
1580 s1 = head_sumA + head_prod;
1581 bv = s1 - head_sumA;
1582 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1583
1584 /* Add two lo words. */
1585 t1 = tail_sumA + tail_prod;
1586 bv = t1 - tail_sumA;
1587 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1588
1589 s2 += t1;
1590
1591 /* Renormalize (s1, s2) to (t1, s2) */
1592 t1 = s1 + s2;
1593 s2 = s2 - (t1 - s1);
1594
1595 t2 += s2;
1596
1597 /* Renormalize (t1, t2) */
1598 head_sumA = t1 + t2;
1599 tail_sumA = t2 - (head_sumA - t1);
1600 }
1601 aij += incaij;
1602 b_elem = b_i[bij];
1603 head_prod = (double) b_elem *x_elem;
1604 tail_prod = 0.0;
1605 {
1606 /* Compute double-double = double-double + double-double. */
1607 double bv;
1608 double s1, s2, t1, t2;
1609
1610 /* Add two hi words. */
1611 s1 = head_sumB + head_prod;
1612 bv = s1 - head_sumB;
1613 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
1614
1615 /* Add two lo words. */
1616 t1 = tail_sumB + tail_prod;
1617 bv = t1 - tail_sumB;
1618 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
1619
1620 s2 += t1;
1621
1622 /* Renormalize (s1, s2) to (t1, s2) */
1623 t1 = s1 + s2;
1624 s2 = s2 - (t1 - s1);
1625
1626 t2 += s2;
1627
1628 /* Renormalize (t1, t2) */
1629 head_sumB = t1 + t2;
1630 tail_sumB = t2 - (head_sumB - t1);
1631 }
1632 bij += incbij;
1633 }
1634 /* now put the result into y_i */
1635 {
1636 double head_e1, tail_e1;
1637 double dt;
1638 dt = (double) alpha_i[0];
1639 {
1640 /* Compute double-double = double-double * double. */
1641 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1642
1643 con = head_sumA * split;
1644 a11 = con - head_sumA;
1645 a11 = con - a11;
1646 a21 = head_sumA - a11;
1647 con = dt * split;
1648 b1 = con - dt;
1649 b1 = con - b1;
1650 b2 = dt - b1;
1651
1652 c11 = head_sumA * dt;
1653 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1654
1655 c2 = tail_sumA * dt;
1656 t1 = c11 + c2;
1657 t2 = (c2 - (t1 - c11)) + c21;
1658
1659 head_e1 = t1 + t2;
1660 tail_e1 = t2 - (head_e1 - t1);
1661 }
1662 head_tmp1[0] = head_e1;
1663 tail_tmp1[0] = tail_e1;
1664 dt = (double) alpha_i[1];
1665 {
1666 /* Compute double-double = double-double * double. */
1667 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1668
1669 con = head_sumA * split;
1670 a11 = con - head_sumA;
1671 a11 = con - a11;
1672 a21 = head_sumA - a11;
1673 con = dt * split;
1674 b1 = con - dt;
1675 b1 = con - b1;
1676 b2 = dt - b1;
1677
1678 c11 = head_sumA * dt;
1679 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1680
1681 c2 = tail_sumA * dt;
1682 t1 = c11 + c2;
1683 t2 = (c2 - (t1 - c11)) + c21;
1684
1685 head_e1 = t1 + t2;
1686 tail_e1 = t2 - (head_e1 - t1);
1687 }
1688 head_tmp1[1] = head_e1;
1689 tail_tmp1[1] = tail_e1;
1690 }
1691 head_tmp2[0] = head_sumB;
1692 tail_tmp2[0] = tail_sumB;
1693 head_tmp2[1] = tail_tmp2[1] = 0.0;
1694 {
1695 double head_t, tail_t;
1696 double head_a, tail_a;
1697 double head_b, tail_b;
1698 /* Real part */
1699 head_a = head_tmp1[0];
1700 tail_a = tail_tmp1[0];
1701 head_b = head_tmp2[0];
1702 tail_b = tail_tmp2[0];
1703 {
1704 /* Compute double-double = double-double + double-double. */
1705 double bv;
1706 double s1, s2, t1, t2;
1707
1708 /* Add two hi words. */
1709 s1 = head_a + head_b;
1710 bv = s1 - head_a;
1711 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1712
1713 /* Add two lo words. */
1714 t1 = tail_a + tail_b;
1715 bv = t1 - tail_a;
1716 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1717
1718 s2 += t1;
1719
1720 /* Renormalize (s1, s2) to (t1, s2) */
1721 t1 = s1 + s2;
1722 s2 = s2 - (t1 - s1);
1723
1724 t2 += s2;
1725
1726 /* Renormalize (t1, t2) */
1727 head_t = t1 + t2;
1728 tail_t = t2 - (head_t - t1);
1729 }
1730 head_tmp1[0] = head_t;
1731 tail_tmp1[0] = tail_t;
1732 /* Imaginary part */
1733 head_a = head_tmp1[1];
1734 tail_a = tail_tmp1[1];
1735 head_b = head_tmp2[1];
1736 tail_b = tail_tmp2[1];
1737 {
1738 /* Compute double-double = double-double + double-double. */
1739 double bv;
1740 double s1, s2, t1, t2;
1741
1742 /* Add two hi words. */
1743 s1 = head_a + head_b;
1744 bv = s1 - head_a;
1745 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1746
1747 /* Add two lo words. */
1748 t1 = tail_a + tail_b;
1749 bv = t1 - tail_a;
1750 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1751
1752 s2 += t1;
1753
1754 /* Renormalize (s1, s2) to (t1, s2) */
1755 t1 = s1 + s2;
1756 s2 = s2 - (t1 - s1);
1757
1758 t2 += s2;
1759
1760 /* Renormalize (t1, t2) */
1761 head_t = t1 + t2;
1762 tail_t = t2 - (head_t - t1);
1763 }
1764 head_tmp1[1] = head_t;
1765 tail_tmp1[1] = tail_t;
1766 }
1767 y_i[yi] = head_tmp1[0];
1768 y_i[yi + 1] = head_tmp1[1];
1769 ai += incai;
1770 bi += incbi;
1771 }
1772 } else {
1773 /* most general form, alpha, beta are other */
1774
1775 ai = 0;
1776 bi = 0;
1777 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1778 head_sumA = tail_sumA = 0.0;
1779 aij = ai;
1780 head_sumB = tail_sumB = 0.0;
1781 bij = bi;
1782 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1783 x_elem = x_i[xi];
1784 a_elem = a_i[aij];
1785 head_prod = (double) a_elem *x_elem;
1786 tail_prod = 0.0;
1787 {
1788 /* Compute double-double = double-double + double-double. */
1789 double bv;
1790 double s1, s2, t1, t2;
1791
1792 /* Add two hi words. */
1793 s1 = head_sumA + head_prod;
1794 bv = s1 - head_sumA;
1795 s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1796
1797 /* Add two lo words. */
1798 t1 = tail_sumA + tail_prod;
1799 bv = t1 - tail_sumA;
1800 t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1801
1802 s2 += t1;
1803
1804 /* Renormalize (s1, s2) to (t1, s2) */
1805 t1 = s1 + s2;
1806 s2 = s2 - (t1 - s1);
1807
1808 t2 += s2;
1809
1810 /* Renormalize (t1, t2) */
1811 head_sumA = t1 + t2;
1812 tail_sumA = t2 - (head_sumA - t1);
1813 }
1814 aij += incaij;
1815 b_elem = b_i[bij];
1816 head_prod = (double) b_elem *x_elem;
1817 tail_prod = 0.0;
1818 {
1819 /* Compute double-double = double-double + double-double. */
1820 double bv;
1821 double s1, s2, t1, t2;
1822
1823 /* Add two hi words. */
1824 s1 = head_sumB + head_prod;
1825 bv = s1 - head_sumB;
1826 s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
1827
1828 /* Add two lo words. */
1829 t1 = tail_sumB + tail_prod;
1830 bv = t1 - tail_sumB;
1831 t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
1832
1833 s2 += t1;
1834
1835 /* Renormalize (s1, s2) to (t1, s2) */
1836 t1 = s1 + s2;
1837 s2 = s2 - (t1 - s1);
1838
1839 t2 += s2;
1840
1841 /* Renormalize (t1, t2) */
1842 head_sumB = t1 + t2;
1843 tail_sumB = t2 - (head_sumB - t1);
1844 }
1845 bij += incbij;
1846 }
1847 /* now put the result into y_i */
1848 {
1849 double head_e1, tail_e1;
1850 double dt;
1851 dt = (double) alpha_i[0];
1852 {
1853 /* Compute double-double = double-double * double. */
1854 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1855
1856 con = head_sumA * split;
1857 a11 = con - head_sumA;
1858 a11 = con - a11;
1859 a21 = head_sumA - a11;
1860 con = dt * split;
1861 b1 = con - dt;
1862 b1 = con - b1;
1863 b2 = dt - b1;
1864
1865 c11 = head_sumA * dt;
1866 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1867
1868 c2 = tail_sumA * dt;
1869 t1 = c11 + c2;
1870 t2 = (c2 - (t1 - c11)) + c21;
1871
1872 head_e1 = t1 + t2;
1873 tail_e1 = t2 - (head_e1 - t1);
1874 }
1875 head_tmp1[0] = head_e1;
1876 tail_tmp1[0] = tail_e1;
1877 dt = (double) alpha_i[1];
1878 {
1879 /* Compute double-double = double-double * double. */
1880 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1881
1882 con = head_sumA * split;
1883 a11 = con - head_sumA;
1884 a11 = con - a11;
1885 a21 = head_sumA - a11;
1886 con = dt * split;
1887 b1 = con - dt;
1888 b1 = con - b1;
1889 b2 = dt - b1;
1890
1891 c11 = head_sumA * dt;
1892 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1893
1894 c2 = tail_sumA * dt;
1895 t1 = c11 + c2;
1896 t2 = (c2 - (t1 - c11)) + c21;
1897
1898 head_e1 = t1 + t2;
1899 tail_e1 = t2 - (head_e1 - t1);
1900 }
1901 head_tmp1[1] = head_e1;
1902 tail_tmp1[1] = tail_e1;
1903 }
1904 {
1905 double head_e1, tail_e1;
1906 double dt;
1907 dt = (double) beta_i[0];
1908 {
1909 /* Compute double-double = double-double * double. */
1910 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1911
1912 con = head_sumB * split;
1913 a11 = con - head_sumB;
1914 a11 = con - a11;
1915 a21 = head_sumB - a11;
1916 con = dt * split;
1917 b1 = con - dt;
1918 b1 = con - b1;
1919 b2 = dt - b1;
1920
1921 c11 = head_sumB * dt;
1922 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1923
1924 c2 = tail_sumB * dt;
1925 t1 = c11 + c2;
1926 t2 = (c2 - (t1 - c11)) + c21;
1927
1928 head_e1 = t1 + t2;
1929 tail_e1 = t2 - (head_e1 - t1);
1930 }
1931 head_tmp2[0] = head_e1;
1932 tail_tmp2[0] = tail_e1;
1933 dt = (double) beta_i[1];
1934 {
1935 /* Compute double-double = double-double * double. */
1936 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1937
1938 con = head_sumB * split;
1939 a11 = con - head_sumB;
1940 a11 = con - a11;
1941 a21 = head_sumB - a11;
1942 con = dt * split;
1943 b1 = con - dt;
1944 b1 = con - b1;
1945 b2 = dt - b1;
1946
1947 c11 = head_sumB * dt;
1948 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1949
1950 c2 = tail_sumB * dt;
1951 t1 = c11 + c2;
1952 t2 = (c2 - (t1 - c11)) + c21;
1953
1954 head_e1 = t1 + t2;
1955 tail_e1 = t2 - (head_e1 - t1);
1956 }
1957 head_tmp2[1] = head_e1;
1958 tail_tmp2[1] = tail_e1;
1959 }
1960 {
1961 double head_t, tail_t;
1962 double head_a, tail_a;
1963 double head_b, tail_b;
1964 /* Real part */
1965 head_a = head_tmp1[0];
1966 tail_a = tail_tmp1[0];
1967 head_b = head_tmp2[0];
1968 tail_b = tail_tmp2[0];
1969 {
1970 /* Compute double-double = double-double + double-double. */
1971 double bv;
1972 double s1, s2, t1, t2;
1973
1974 /* Add two hi words. */
1975 s1 = head_a + head_b;
1976 bv = s1 - head_a;
1977 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1978
1979 /* Add two lo words. */
1980 t1 = tail_a + tail_b;
1981 bv = t1 - tail_a;
1982 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1983
1984 s2 += t1;
1985
1986 /* Renormalize (s1, s2) to (t1, s2) */
1987 t1 = s1 + s2;
1988 s2 = s2 - (t1 - s1);
1989
1990 t2 += s2;
1991
1992 /* Renormalize (t1, t2) */
1993 head_t = t1 + t2;
1994 tail_t = t2 - (head_t - t1);
1995 }
1996 head_tmp1[0] = head_t;
1997 tail_tmp1[0] = tail_t;
1998 /* Imaginary part */
1999 head_a = head_tmp1[1];
2000 tail_a = tail_tmp1[1];
2001 head_b = head_tmp2[1];
2002 tail_b = tail_tmp2[1];
2003 {
2004 /* Compute double-double = double-double + double-double. */
2005 double bv;
2006 double s1, s2, t1, t2;
2007
2008 /* Add two hi words. */
2009 s1 = head_a + head_b;
2010 bv = s1 - head_a;
2011 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
2012
2013 /* Add two lo words. */
2014 t1 = tail_a + tail_b;
2015 bv = t1 - tail_a;
2016 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
2017
2018 s2 += t1;
2019
2020 /* Renormalize (s1, s2) to (t1, s2) */
2021 t1 = s1 + s2;
2022 s2 = s2 - (t1 - s1);
2023
2024 t2 += s2;
2025
2026 /* Renormalize (t1, t2) */
2027 head_t = t1 + t2;
2028 tail_t = t2 - (head_t - t1);
2029 }
2030 head_tmp1[1] = head_t;
2031 tail_tmp1[1] = tail_t;
2032 }
2033 y_i[yi] = head_tmp1[0];
2034 y_i[yi + 1] = head_tmp1[1];
2035 ai += incai;
2036 bi += incbi;
2037 }
2038 }
2039 }
2040 FPU_FIX_STOP;
2041 }
2042 break;
2043
2044 default:
2045 {
2046 BLAS_error(routine_name, -14, prec, 0);
2047 }
2048 break;
2049 }
2050
2051 } /* end BLAS_cge_sum_mv_s_s */
2052