1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_ssymm_x(enum blas_order_type order,enum blas_side_type side,enum blas_uplo_type uplo,int m,int n,float alpha,const float * a,int lda,const float * b,int ldb,float beta,float * c,int ldc,enum blas_prec_type prec)3 void BLAS_ssymm_x(enum blas_order_type order, enum blas_side_type side,
4 enum blas_uplo_type uplo, int m, int n,
5 float alpha, const float *a, int lda,
6 const float *b, int ldb, float beta,
7 float *c, int ldc, enum blas_prec_type prec)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * This routines computes one of the matrix product:
14 *
15 * C <- alpha * A * B + beta * C
16 * C <- alpha * B * A + beta * C
17 *
18 * where A is a symmetric matrix.
19 *
20 * Arguments
21 * =========
22 *
23 * order (input) enum blas_order_type
24 * Storage format of input matrices A, B, and C.
25 *
26 * side (input) enum blas_side_type
27 * Determines which side of matrix B is matrix A is multiplied.
28 *
29 * uplo (input) enum blas_uplo_type
30 * Determines which half of matrix A (upper or lower triangle)
31 * is accessed.
32 *
33 * m n (input) int
34 * Size of matrices A, B, and C.
35 * Matrix A is m-by-m if it is multiplied on the left,
36 * n-by-n otherwise.
37 * Matrices B and C are m-by-n.
38 *
39 * alpha (input) float
40 *
41 * a (input) const float*
42 * Matrix A.
43 *
44 * lda (input) int
45 * Leading dimension of matrix A.
46 *
47 * b (input) const float*
48 * Matrix B.
49 *
50 * ldb (input) int
51 * Leading dimension of matrix B.
52 *
53 * beta (input) float
54 *
55 * c (input/output) float*
56 * Matrix C.
57 *
58 * ldc (input) int
59 * Leading dimension of matrix C.
60 *
61 * prec (input) enum blas_prec_type
62 * Specifies the internal precision to be used.
63 * = blas_prec_single: single precision.
64 * = blas_prec_double: double precision.
65 * = blas_prec_extra : anything at least 1.5 times as accurate
66 * than double, and wider than 80-bits.
67 * We use double-double in our implementation.
68 *
69 */
70 {
71 switch (prec) {
72
73 case blas_prec_single:{
74
75 /* Integer Index Variables */
76 int i, j, k;
77
78 int ai, bj, ci;
79 int aik, bkj, cij;
80
81 int incai, incbj, incci;
82 int incaik1, incaik2, incbkj, inccij;
83
84 int m_i, n_i;
85
86 /* Input Matrices */
87 const float *a_i = a;
88 const float *b_i = b;
89
90 /* Output Matrix */
91 float *c_i = c;
92
93 /* Input Scalars */
94 float alpha_i = alpha;
95 float beta_i = beta;
96
97 /* Temporary Floating-Point Variables */
98 float a_elem;
99 float b_elem;
100 float c_elem;
101 float prod;
102 float sum;
103 float tmp1;
104 float tmp2;
105
106
107
108 /* Check for error conditions. */
109 if (m <= 0 || n <= 0) {
110 return;
111 }
112
113 if (order == blas_colmajor && (ldb < m || ldc < m)) {
114 return;
115 }
116 if (order == blas_rowmajor && (ldb < n || ldc < n)) {
117 return;
118 }
119 if (side == blas_left_side && lda < m) {
120 return;
121 }
122 if (side == blas_right_side && lda < n) {
123 return;
124 }
125
126 /* Test for no-op */
127 if (alpha_i == 0.0 && beta_i == 1.0) {
128 return;
129 }
130
131 /* Set Index Parameters */
132 if (side == blas_left_side) {
133 m_i = m;
134 n_i = n;
135 } else {
136 m_i = n;
137 n_i = m;
138 }
139
140 if ((order == blas_colmajor && side == blas_left_side) ||
141 (order == blas_rowmajor && side == blas_right_side)) {
142 incbj = ldb;
143 incbkj = 1;
144 incci = 1;
145 inccij = ldc;
146 } else {
147 incbj = 1;
148 incbkj = ldb;
149 incci = ldc;
150 inccij = 1;
151 }
152
153 if ((order == blas_colmajor && uplo == blas_upper) ||
154 (order == blas_rowmajor && uplo == blas_lower)) {
155 incai = lda;
156 incaik1 = 1;
157 incaik2 = lda;
158 } else {
159 incai = 1;
160 incaik1 = lda;
161 incaik2 = 1;
162 }
163
164
165
166 /* Adjustment to increments (if any) */
167
168
169
170
171
172
173
174
175 /* alpha = 0. In this case, just return beta * C */
176 if (alpha_i == 0.0) {
177 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
178 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
179 c_elem = c_i[cij];
180 tmp1 = c_elem * beta_i;
181 c_i[cij] = tmp1;
182 }
183 }
184 } else if (alpha_i == 1.0) {
185
186 /* Case alpha == 1. */
187
188 if (beta_i == 0.0) {
189 /* Case alpha = 1, beta = 0. We compute C <--- A * B or B * A */
190 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
191 for (j = 0, cij = ci, bj = 0; j < n_i;
192 j++, cij += inccij, bj += incbj) {
193
194 sum = 0.0;
195
196 for (k = 0, aik = ai, bkj = bj; k < i;
197 k++, aik += incaik1, bkj += incbkj) {
198 a_elem = a_i[aik];
199 b_elem = b_i[bkj];
200 prod = a_elem * b_elem;
201 sum = sum + prod;
202 }
203 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
204 a_elem = a_i[aik];
205 b_elem = b_i[bkj];
206 prod = a_elem * b_elem;
207 sum = sum + prod;
208 }
209 c_i[cij] = sum;
210 }
211 }
212 } else {
213 /* Case alpha = 1, but beta != 0.
214 We compute C <--- A * B + beta * C
215 or C <--- B * A + beta * C */
216
217 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
218 for (j = 0, cij = ci, bj = 0; j < n_i;
219 j++, cij += inccij, bj += incbj) {
220
221 sum = 0.0;
222
223 for (k = 0, aik = ai, bkj = bj; k < i;
224 k++, aik += incaik1, bkj += incbkj) {
225 a_elem = a_i[aik];
226 b_elem = b_i[bkj];
227 prod = a_elem * b_elem;
228 sum = sum + prod;
229 }
230 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
231 a_elem = a_i[aik];
232 b_elem = b_i[bkj];
233 prod = a_elem * b_elem;
234 sum = sum + prod;
235 }
236 c_elem = c_i[cij];
237 tmp2 = c_elem * beta_i;
238 tmp1 = sum;
239 tmp1 = tmp2 + tmp1;
240 c_i[cij] = tmp1;
241 }
242 }
243 }
244
245 } else {
246 /* The most general form, C <--- alpha * A * B + beta * C
247 or C <--- alpha * B * A + beta * C */
248
249 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
250 for (j = 0, cij = ci, bj = 0; j < n_i;
251 j++, cij += inccij, bj += incbj) {
252
253 sum = 0.0;
254
255 for (k = 0, aik = ai, bkj = bj; k < i;
256 k++, aik += incaik1, bkj += incbkj) {
257 a_elem = a_i[aik];
258 b_elem = b_i[bkj];
259 prod = a_elem * b_elem;
260 sum = sum + prod;
261 }
262 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
263 a_elem = a_i[aik];
264 b_elem = b_i[bkj];
265 prod = a_elem * b_elem;
266 sum = sum + prod;
267 }
268 tmp1 = sum * alpha_i;
269 c_elem = c_i[cij];
270 tmp2 = c_elem * beta_i;
271 tmp1 = tmp1 + tmp2;
272 c_i[cij] = tmp1;
273 }
274 }
275 }
276
277
278
279 break;
280 }
281 case blas_prec_double:
282 case blas_prec_indigenous:{
283
284 /* Integer Index Variables */
285 int i, j, k;
286
287 int ai, bj, ci;
288 int aik, bkj, cij;
289
290 int incai, incbj, incci;
291 int incaik1, incaik2, incbkj, inccij;
292
293 int m_i, n_i;
294
295 /* Input Matrices */
296 const float *a_i = a;
297 const float *b_i = b;
298
299 /* Output Matrix */
300 float *c_i = c;
301
302 /* Input Scalars */
303 float alpha_i = alpha;
304 float beta_i = beta;
305
306 /* Temporary Floating-Point Variables */
307 float a_elem;
308 float b_elem;
309 float c_elem;
310 double prod;
311 double sum;
312 double tmp1;
313 double tmp2;
314
315
316
317 /* Check for error conditions. */
318 if (m <= 0 || n <= 0) {
319 return;
320 }
321
322 if (order == blas_colmajor && (ldb < m || ldc < m)) {
323 return;
324 }
325 if (order == blas_rowmajor && (ldb < n || ldc < n)) {
326 return;
327 }
328 if (side == blas_left_side && lda < m) {
329 return;
330 }
331 if (side == blas_right_side && lda < n) {
332 return;
333 }
334
335 /* Test for no-op */
336 if (alpha_i == 0.0 && beta_i == 1.0) {
337 return;
338 }
339
340 /* Set Index Parameters */
341 if (side == blas_left_side) {
342 m_i = m;
343 n_i = n;
344 } else {
345 m_i = n;
346 n_i = m;
347 }
348
349 if ((order == blas_colmajor && side == blas_left_side) ||
350 (order == blas_rowmajor && side == blas_right_side)) {
351 incbj = ldb;
352 incbkj = 1;
353 incci = 1;
354 inccij = ldc;
355 } else {
356 incbj = 1;
357 incbkj = ldb;
358 incci = ldc;
359 inccij = 1;
360 }
361
362 if ((order == blas_colmajor && uplo == blas_upper) ||
363 (order == blas_rowmajor && uplo == blas_lower)) {
364 incai = lda;
365 incaik1 = 1;
366 incaik2 = lda;
367 } else {
368 incai = 1;
369 incaik1 = lda;
370 incaik2 = 1;
371 }
372
373
374
375 /* Adjustment to increments (if any) */
376
377
378
379
380
381
382
383
384 /* alpha = 0. In this case, just return beta * C */
385 if (alpha_i == 0.0) {
386 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
387 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
388 c_elem = c_i[cij];
389 tmp1 = (double) c_elem *beta_i;
390 c_i[cij] = tmp1;
391 }
392 }
393 } else if (alpha_i == 1.0) {
394
395 /* Case alpha == 1. */
396
397 if (beta_i == 0.0) {
398 /* Case alpha = 1, beta = 0. We compute C <--- A * B or B * A */
399 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
400 for (j = 0, cij = ci, bj = 0; j < n_i;
401 j++, cij += inccij, bj += incbj) {
402
403 sum = 0.0;
404
405 for (k = 0, aik = ai, bkj = bj; k < i;
406 k++, aik += incaik1, bkj += incbkj) {
407 a_elem = a_i[aik];
408 b_elem = b_i[bkj];
409 prod = (double) a_elem *b_elem;
410 sum = sum + prod;
411 }
412 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
413 a_elem = a_i[aik];
414 b_elem = b_i[bkj];
415 prod = (double) a_elem *b_elem;
416 sum = sum + prod;
417 }
418 c_i[cij] = sum;
419 }
420 }
421 } else {
422 /* Case alpha = 1, but beta != 0.
423 We compute C <--- A * B + beta * C
424 or C <--- B * A + beta * C */
425
426 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
427 for (j = 0, cij = ci, bj = 0; j < n_i;
428 j++, cij += inccij, bj += incbj) {
429
430 sum = 0.0;
431
432 for (k = 0, aik = ai, bkj = bj; k < i;
433 k++, aik += incaik1, bkj += incbkj) {
434 a_elem = a_i[aik];
435 b_elem = b_i[bkj];
436 prod = (double) a_elem *b_elem;
437 sum = sum + prod;
438 }
439 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
440 a_elem = a_i[aik];
441 b_elem = b_i[bkj];
442 prod = (double) a_elem *b_elem;
443 sum = sum + prod;
444 }
445 c_elem = c_i[cij];
446 tmp2 = (double) c_elem *beta_i;
447 tmp1 = sum;
448 tmp1 = tmp2 + tmp1;
449 c_i[cij] = tmp1;
450 }
451 }
452 }
453
454 } else {
455 /* The most general form, C <--- alpha * A * B + beta * C
456 or C <--- alpha * B * A + beta * C */
457
458 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
459 for (j = 0, cij = ci, bj = 0; j < n_i;
460 j++, cij += inccij, bj += incbj) {
461
462 sum = 0.0;
463
464 for (k = 0, aik = ai, bkj = bj; k < i;
465 k++, aik += incaik1, bkj += incbkj) {
466 a_elem = a_i[aik];
467 b_elem = b_i[bkj];
468 prod = (double) a_elem *b_elem;
469 sum = sum + prod;
470 }
471 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
472 a_elem = a_i[aik];
473 b_elem = b_i[bkj];
474 prod = (double) a_elem *b_elem;
475 sum = sum + prod;
476 }
477 tmp1 = sum * alpha_i;
478 c_elem = c_i[cij];
479 tmp2 = (double) c_elem *beta_i;
480 tmp1 = tmp1 + tmp2;
481 c_i[cij] = tmp1;
482 }
483 }
484 }
485
486
487
488 break;
489 }
490
491 case blas_prec_extra:{
492
493 /* Integer Index Variables */
494 int i, j, k;
495
496 int ai, bj, ci;
497 int aik, bkj, cij;
498
499 int incai, incbj, incci;
500 int incaik1, incaik2, incbkj, inccij;
501
502 int m_i, n_i;
503
504 /* Input Matrices */
505 const float *a_i = a;
506 const float *b_i = b;
507
508 /* Output Matrix */
509 float *c_i = c;
510
511 /* Input Scalars */
512 float alpha_i = alpha;
513 float beta_i = beta;
514
515 /* Temporary Floating-Point Variables */
516 float a_elem;
517 float b_elem;
518 float c_elem;
519 double head_prod, tail_prod;
520 double head_sum, tail_sum;
521 double head_tmp1, tail_tmp1;
522 double head_tmp2, tail_tmp2;
523
524 FPU_FIX_DECL;
525
526 /* Check for error conditions. */
527 if (m <= 0 || n <= 0) {
528 return;
529 }
530
531 if (order == blas_colmajor && (ldb < m || ldc < m)) {
532 return;
533 }
534 if (order == blas_rowmajor && (ldb < n || ldc < n)) {
535 return;
536 }
537 if (side == blas_left_side && lda < m) {
538 return;
539 }
540 if (side == blas_right_side && lda < n) {
541 return;
542 }
543
544 /* Test for no-op */
545 if (alpha_i == 0.0 && beta_i == 1.0) {
546 return;
547 }
548
549 /* Set Index Parameters */
550 if (side == blas_left_side) {
551 m_i = m;
552 n_i = n;
553 } else {
554 m_i = n;
555 n_i = m;
556 }
557
558 if ((order == blas_colmajor && side == blas_left_side) ||
559 (order == blas_rowmajor && side == blas_right_side)) {
560 incbj = ldb;
561 incbkj = 1;
562 incci = 1;
563 inccij = ldc;
564 } else {
565 incbj = 1;
566 incbkj = ldb;
567 incci = ldc;
568 inccij = 1;
569 }
570
571 if ((order == blas_colmajor && uplo == blas_upper) ||
572 (order == blas_rowmajor && uplo == blas_lower)) {
573 incai = lda;
574 incaik1 = 1;
575 incaik2 = lda;
576 } else {
577 incai = 1;
578 incaik1 = lda;
579 incaik2 = 1;
580 }
581
582 FPU_FIX_START;
583
584 /* Adjustment to increments (if any) */
585
586
587
588
589
590
591
592
593 /* alpha = 0. In this case, just return beta * C */
594 if (alpha_i == 0.0) {
595 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
596 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
597 c_elem = c_i[cij];
598 head_tmp1 = (double) c_elem *beta_i;
599 tail_tmp1 = 0.0;
600 c_i[cij] = head_tmp1;
601 }
602 }
603 } else if (alpha_i == 1.0) {
604
605 /* Case alpha == 1. */
606
607 if (beta_i == 0.0) {
608 /* Case alpha = 1, beta = 0. We compute C <--- A * B or B * A */
609 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
610 for (j = 0, cij = ci, bj = 0; j < n_i;
611 j++, cij += inccij, bj += incbj) {
612
613 head_sum = tail_sum = 0.0;
614
615 for (k = 0, aik = ai, bkj = bj; k < i;
616 k++, aik += incaik1, bkj += incbkj) {
617 a_elem = a_i[aik];
618 b_elem = b_i[bkj];
619 head_prod = (double) a_elem *b_elem;
620 tail_prod = 0.0;
621 {
622 /* Compute double-double = double-double + double-double. */
623 double bv;
624 double s1, s2, t1, t2;
625
626 /* Add two hi words. */
627 s1 = head_sum + head_prod;
628 bv = s1 - head_sum;
629 s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
630
631 /* Add two lo words. */
632 t1 = tail_sum + tail_prod;
633 bv = t1 - tail_sum;
634 t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
635
636 s2 += t1;
637
638 /* Renormalize (s1, s2) to (t1, s2) */
639 t1 = s1 + s2;
640 s2 = s2 - (t1 - s1);
641
642 t2 += s2;
643
644 /* Renormalize (t1, t2) */
645 head_sum = t1 + t2;
646 tail_sum = t2 - (head_sum - t1);
647 }
648 }
649 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
650 a_elem = a_i[aik];
651 b_elem = b_i[bkj];
652 head_prod = (double) a_elem *b_elem;
653 tail_prod = 0.0;
654 {
655 /* Compute double-double = double-double + double-double. */
656 double bv;
657 double s1, s2, t1, t2;
658
659 /* Add two hi words. */
660 s1 = head_sum + head_prod;
661 bv = s1 - head_sum;
662 s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
663
664 /* Add two lo words. */
665 t1 = tail_sum + tail_prod;
666 bv = t1 - tail_sum;
667 t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
668
669 s2 += t1;
670
671 /* Renormalize (s1, s2) to (t1, s2) */
672 t1 = s1 + s2;
673 s2 = s2 - (t1 - s1);
674
675 t2 += s2;
676
677 /* Renormalize (t1, t2) */
678 head_sum = t1 + t2;
679 tail_sum = t2 - (head_sum - t1);
680 }
681 }
682 c_i[cij] = head_sum;
683 }
684 }
685 } else {
686 /* Case alpha = 1, but beta != 0.
687 We compute C <--- A * B + beta * C
688 or C <--- B * A + beta * C */
689
690 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
691 for (j = 0, cij = ci, bj = 0; j < n_i;
692 j++, cij += inccij, bj += incbj) {
693
694 head_sum = tail_sum = 0.0;
695
696 for (k = 0, aik = ai, bkj = bj; k < i;
697 k++, aik += incaik1, bkj += incbkj) {
698 a_elem = a_i[aik];
699 b_elem = b_i[bkj];
700 head_prod = (double) a_elem *b_elem;
701 tail_prod = 0.0;
702 {
703 /* Compute double-double = double-double + double-double. */
704 double bv;
705 double s1, s2, t1, t2;
706
707 /* Add two hi words. */
708 s1 = head_sum + head_prod;
709 bv = s1 - head_sum;
710 s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
711
712 /* Add two lo words. */
713 t1 = tail_sum + tail_prod;
714 bv = t1 - tail_sum;
715 t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
716
717 s2 += t1;
718
719 /* Renormalize (s1, s2) to (t1, s2) */
720 t1 = s1 + s2;
721 s2 = s2 - (t1 - s1);
722
723 t2 += s2;
724
725 /* Renormalize (t1, t2) */
726 head_sum = t1 + t2;
727 tail_sum = t2 - (head_sum - t1);
728 }
729 }
730 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
731 a_elem = a_i[aik];
732 b_elem = b_i[bkj];
733 head_prod = (double) a_elem *b_elem;
734 tail_prod = 0.0;
735 {
736 /* Compute double-double = double-double + double-double. */
737 double bv;
738 double s1, s2, t1, t2;
739
740 /* Add two hi words. */
741 s1 = head_sum + head_prod;
742 bv = s1 - head_sum;
743 s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
744
745 /* Add two lo words. */
746 t1 = tail_sum + tail_prod;
747 bv = t1 - tail_sum;
748 t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
749
750 s2 += t1;
751
752 /* Renormalize (s1, s2) to (t1, s2) */
753 t1 = s1 + s2;
754 s2 = s2 - (t1 - s1);
755
756 t2 += s2;
757
758 /* Renormalize (t1, t2) */
759 head_sum = t1 + t2;
760 tail_sum = t2 - (head_sum - t1);
761 }
762 }
763 c_elem = c_i[cij];
764 head_tmp2 = (double) c_elem *beta_i;
765 tail_tmp2 = 0.0;
766 head_tmp1 = head_sum;
767 tail_tmp1 = tail_sum;
768 {
769 /* Compute double-double = double-double + double-double. */
770 double bv;
771 double s1, s2, t1, t2;
772
773 /* Add two hi words. */
774 s1 = head_tmp2 + head_tmp1;
775 bv = s1 - head_tmp2;
776 s2 = ((head_tmp1 - bv) + (head_tmp2 - (s1 - bv)));
777
778 /* Add two lo words. */
779 t1 = tail_tmp2 + tail_tmp1;
780 bv = t1 - tail_tmp2;
781 t2 = ((tail_tmp1 - bv) + (tail_tmp2 - (t1 - bv)));
782
783 s2 += t1;
784
785 /* Renormalize (s1, s2) to (t1, s2) */
786 t1 = s1 + s2;
787 s2 = s2 - (t1 - s1);
788
789 t2 += s2;
790
791 /* Renormalize (t1, t2) */
792 head_tmp1 = t1 + t2;
793 tail_tmp1 = t2 - (head_tmp1 - t1);
794 }
795 c_i[cij] = head_tmp1;
796 }
797 }
798 }
799
800 } else {
801 /* The most general form, C <--- alpha * A * B + beta * C
802 or C <--- alpha * B * A + beta * C */
803
804 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
805 for (j = 0, cij = ci, bj = 0; j < n_i;
806 j++, cij += inccij, bj += incbj) {
807
808 head_sum = tail_sum = 0.0;
809
810 for (k = 0, aik = ai, bkj = bj; k < i;
811 k++, aik += incaik1, bkj += incbkj) {
812 a_elem = a_i[aik];
813 b_elem = b_i[bkj];
814 head_prod = (double) a_elem *b_elem;
815 tail_prod = 0.0;
816 {
817 /* Compute double-double = double-double + double-double. */
818 double bv;
819 double s1, s2, t1, t2;
820
821 /* Add two hi words. */
822 s1 = head_sum + head_prod;
823 bv = s1 - head_sum;
824 s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
825
826 /* Add two lo words. */
827 t1 = tail_sum + tail_prod;
828 bv = t1 - tail_sum;
829 t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
830
831 s2 += t1;
832
833 /* Renormalize (s1, s2) to (t1, s2) */
834 t1 = s1 + s2;
835 s2 = s2 - (t1 - s1);
836
837 t2 += s2;
838
839 /* Renormalize (t1, t2) */
840 head_sum = t1 + t2;
841 tail_sum = t2 - (head_sum - t1);
842 }
843 }
844 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
845 a_elem = a_i[aik];
846 b_elem = b_i[bkj];
847 head_prod = (double) a_elem *b_elem;
848 tail_prod = 0.0;
849 {
850 /* Compute double-double = double-double + double-double. */
851 double bv;
852 double s1, s2, t1, t2;
853
854 /* Add two hi words. */
855 s1 = head_sum + head_prod;
856 bv = s1 - head_sum;
857 s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
858
859 /* Add two lo words. */
860 t1 = tail_sum + tail_prod;
861 bv = t1 - tail_sum;
862 t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
863
864 s2 += t1;
865
866 /* Renormalize (s1, s2) to (t1, s2) */
867 t1 = s1 + s2;
868 s2 = s2 - (t1 - s1);
869
870 t2 += s2;
871
872 /* Renormalize (t1, t2) */
873 head_sum = t1 + t2;
874 tail_sum = t2 - (head_sum - t1);
875 }
876 }
877 {
878 double dt = (double) alpha_i;
879 {
880 /* Compute double-double = double-double * double. */
881 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
882
883 con = head_sum * split;
884 a11 = con - head_sum;
885 a11 = con - a11;
886 a21 = head_sum - a11;
887 con = dt * split;
888 b1 = con - dt;
889 b1 = con - b1;
890 b2 = dt - b1;
891
892 c11 = head_sum * dt;
893 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
894
895 c2 = tail_sum * dt;
896 t1 = c11 + c2;
897 t2 = (c2 - (t1 - c11)) + c21;
898
899 head_tmp1 = t1 + t2;
900 tail_tmp1 = t2 - (head_tmp1 - t1);
901 }
902 }
903 c_elem = c_i[cij];
904 head_tmp2 = (double) c_elem *beta_i;
905 tail_tmp2 = 0.0;
906 {
907 /* Compute double-double = double-double + double-double. */
908 double bv;
909 double s1, s2, t1, t2;
910
911 /* Add two hi words. */
912 s1 = head_tmp1 + head_tmp2;
913 bv = s1 - head_tmp1;
914 s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
915
916 /* Add two lo words. */
917 t1 = tail_tmp1 + tail_tmp2;
918 bv = t1 - tail_tmp1;
919 t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
920
921 s2 += t1;
922
923 /* Renormalize (s1, s2) to (t1, s2) */
924 t1 = s1 + s2;
925 s2 = s2 - (t1 - s1);
926
927 t2 += s2;
928
929 /* Renormalize (t1, t2) */
930 head_tmp1 = t1 + t2;
931 tail_tmp1 = t2 - (head_tmp1 - t1);
932 }
933 c_i[cij] = head_tmp1;
934 }
935 }
936 }
937
938 FPU_FIX_STOP;
939
940 break;
941 }
942 }
943 }
944