1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_dgemm_x(enum blas_order_type order,enum blas_trans_type transa,enum blas_trans_type transb,int m,int n,int k,double alpha,const double * a,int lda,const double * b,int ldb,double beta,double * c,int ldc,enum blas_prec_type prec)3 void BLAS_dgemm_x(enum blas_order_type order, enum blas_trans_type transa,
4 enum blas_trans_type transb, int m, int n, int k,
5 double alpha, const double *a, int lda, const double *b,
6 int ldb, double beta, double *c, int ldc,
7 enum blas_prec_type prec)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * This routine computes the matrix product:
14 *
15 * C <- alpha * op(A) * op(B) + beta * C .
16 *
17 * where op(M) represents either M, M transpose,
18 * or M conjugate transpose.
19 *
20 * Arguments
21 * =========
22 *
23 * order (input) enum blas_order_type
24 * Storage format of input matrices A, B, and C.
25 *
26 * transa (input) enum blas_trans_type
27 * Operation to be done on matrix A before multiplication.
28 * Can be no operation, transposition, or conjugate transposition.
29 *
30 * transb (input) enum blas_trans_type
31 * Operation to be done on matrix B before multiplication.
32 * Can be no operation, transposition, or conjugate transposition.
33 *
34 * m n k (input) int
35 * The dimensions of matrices A, B, and C.
36 * Matrix C is m-by-n matrix.
37 * Matrix A is m-by-k if A is not transposed,
38 * k-by-m otherwise.
39 * Matrix B is k-by-n if B is not transposed,
40 * n-by-k otherwise.
41 *
42 * alpha (input) double
43 *
44 * a (input) const double*
45 * matrix A.
46 *
47 * lda (input) int
48 * leading dimension of A.
49 *
50 * b (input) const double*
51 * matrix B
52 *
53 * ldb (input) int
54 * leading dimension of B.
55 *
56 * beta (input) double
57 *
58 * c (input/output) double*
59 * matrix C
60 *
61 * ldc (input) int
62 * leading dimension of C.
63 *
64 * prec (input) enum blas_prec_type
65 * Specifies the internal precision to be used.
66 * = blas_prec_single: single precision.
67 * = blas_prec_double: double precision.
68 * = blas_prec_extra : anything at least 1.5 times as accurate
69 * than double, and wider than 80-bits.
70 * We use double-double in our implementation.
71 *
72 */
73 {
74 static const char routine_name[] = "BLAS_dgemm_x";
75 switch (prec) {
76
77 case blas_prec_single:
78 case blas_prec_double:
79 case blas_prec_indigenous:{
80
81
82 /* Integer Index Variables */
83 int i, j, h;
84
85 int ai, bj, ci;
86 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
87
88 int incai, incaih; /* Index increments for matrix a */
89 int incbj, incbhj; /* Index increments for matrix b */
90 int incci, inccij; /* Index increments for matrix c */
91
92 /* Input Matrices */
93 const double *a_i = a;
94 const double *b_i = b;
95
96 /* Output Matrix */
97 double *c_i = c;
98
99 /* Input Scalars */
100 double alpha_i = alpha;
101 double beta_i = beta;
102
103 /* Temporary Floating-Point Variables */
104 double a_elem;
105 double b_elem;
106 double c_elem;
107 double prod;
108 double sum;
109 double tmp1;
110 double tmp2;
111
112
113
114 /* Test for error conditions */
115 if (m < 0)
116 BLAS_error(routine_name, -4, m, NULL);
117 if (n < 0)
118 BLAS_error(routine_name, -5, n, NULL);
119 if (k < 0)
120 BLAS_error(routine_name, -6, k, NULL);
121
122 if (order == blas_colmajor) {
123
124 if (ldc < m)
125 BLAS_error(routine_name, -14, ldc, NULL);
126
127 if (transa == blas_no_trans) {
128 if (lda < m)
129 BLAS_error(routine_name, -9, lda, NULL);
130 } else {
131 if (lda < k)
132 BLAS_error(routine_name, -9, lda, NULL);
133 }
134
135 if (transb == blas_no_trans) {
136 if (ldb < k)
137 BLAS_error(routine_name, -11, ldb, NULL);
138 } else {
139 if (ldb < n)
140 BLAS_error(routine_name, -11, ldb, NULL);
141 }
142
143 } else {
144 /* row major */
145 if (ldc < n)
146 BLAS_error(routine_name, -14, ldc, NULL);
147
148 if (transa == blas_no_trans) {
149 if (lda < k)
150 BLAS_error(routine_name, -9, lda, NULL);
151 } else {
152 if (lda < m)
153 BLAS_error(routine_name, -9, lda, NULL);
154 }
155
156 if (transb == blas_no_trans) {
157 if (ldb < n)
158 BLAS_error(routine_name, -11, ldb, NULL);
159 } else {
160 if (ldb < k)
161 BLAS_error(routine_name, -11, ldb, NULL);
162 }
163 }
164
165 /* Test for no-op */
166 if (n == 0 || m == 0 || k == 0)
167 return;
168 if (alpha_i == 0.0 && beta_i == 1.0) {
169 return;
170 }
171
172 /* Set Index Parameters */
173 if (order == blas_colmajor) {
174 incci = 1;
175 inccij = ldc;
176
177 if (transa == blas_no_trans) {
178 incai = 1;
179 incaih = lda;
180 } else {
181 incai = lda;
182 incaih = 1;
183 }
184
185 if (transb == blas_no_trans) {
186 incbj = ldb;
187 incbhj = 1;
188 } else {
189 incbj = 1;
190 incbhj = ldb;
191 }
192
193 } else {
194 /* row major */
195 incci = ldc;
196 inccij = 1;
197
198 if (transa == blas_no_trans) {
199 incai = lda;
200 incaih = 1;
201 } else {
202 incai = 1;
203 incaih = lda;
204 }
205
206 if (transb == blas_no_trans) {
207 incbj = 1;
208 incbhj = ldb;
209 } else {
210 incbj = ldb;
211 incbhj = 1;
212 }
213
214 }
215
216
217
218 /* Ajustment to increments */
219
220
221
222
223
224
225
226 /* alpha = 0. In this case, just return beta * C */
227 if (alpha_i == 0.0) {
228
229 ci = 0;
230 for (i = 0; i < m; i++, ci += incci) {
231 cij = ci;
232 for (j = 0; j < n; j++, cij += inccij) {
233 c_elem = c_i[cij];
234 tmp1 = c_elem * beta_i;
235 c_i[cij] = tmp1;
236 }
237 }
238
239 } else if (alpha_i == 1.0) {
240
241 /* Case alpha == 1. */
242
243 if (beta_i == 0.0) {
244 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
245
246 ci = 0;
247 ai = 0;
248 for (i = 0; i < m; i++, ci += incci, ai += incai) {
249
250 cij = ci;
251 bj = 0;
252
253 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
254
255 aih = ai;
256 bhj = bj;
257
258 sum = 0.0;
259
260 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
261 a_elem = a_i[aih];
262 b_elem = b_i[bhj];
263 if (transa == blas_conj_trans) {
264
265 }
266 if (transb == blas_conj_trans) {
267
268 }
269 prod = a_elem * b_elem;
270 sum = sum + prod;
271 }
272 c_i[cij] = sum;
273 }
274 }
275
276 } else {
277 /* Case alpha == 1, but beta != 0.
278 We compute C <--- A * B + beta * C */
279
280 ci = 0;
281 ai = 0;
282 for (i = 0; i < m; i++, ci += incci, ai += incai) {
283
284 cij = ci;
285 bj = 0;
286
287 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
288
289 aih = ai;
290 bhj = bj;
291
292 sum = 0.0;
293
294 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
295 a_elem = a_i[aih];
296 b_elem = b_i[bhj];
297 if (transa == blas_conj_trans) {
298
299 }
300 if (transb == blas_conj_trans) {
301
302 }
303 prod = a_elem * b_elem;
304 sum = sum + prod;
305 }
306
307 c_elem = c_i[cij];
308 tmp2 = c_elem * beta_i;
309 tmp1 = sum;
310 tmp1 = tmp2 + tmp1;
311 c_i[cij] = tmp1;
312 }
313 }
314 }
315
316 } else {
317
318 /* The most general form, C <-- alpha * A * B + beta * C */
319 ci = 0;
320 ai = 0;
321 for (i = 0; i < m; i++, ci += incci, ai += incai) {
322
323 cij = ci;
324 bj = 0;
325
326 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
327
328 aih = ai;
329 bhj = bj;
330
331 sum = 0.0;
332
333 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
334 a_elem = a_i[aih];
335 b_elem = b_i[bhj];
336 if (transa == blas_conj_trans) {
337
338 }
339 if (transb == blas_conj_trans) {
340
341 }
342 prod = a_elem * b_elem;
343 sum = sum + prod;
344 }
345
346 tmp1 = sum * alpha_i;
347 c_elem = c_i[cij];
348 tmp2 = c_elem * beta_i;
349 tmp1 = tmp1 + tmp2;
350 c_i[cij] = tmp1;
351 }
352 }
353
354 }
355
356
357
358 break;
359 }
360
361 case blas_prec_extra:{
362
363
364 /* Integer Index Variables */
365 int i, j, h;
366
367 int ai, bj, ci;
368 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
369
370 int incai, incaih; /* Index increments for matrix a */
371 int incbj, incbhj; /* Index increments for matrix b */
372 int incci, inccij; /* Index increments for matrix c */
373
374 /* Input Matrices */
375 const double *a_i = a;
376 const double *b_i = b;
377
378 /* Output Matrix */
379 double *c_i = c;
380
381 /* Input Scalars */
382 double alpha_i = alpha;
383 double beta_i = beta;
384
385 /* Temporary Floating-Point Variables */
386 double a_elem;
387 double b_elem;
388 double c_elem;
389 double head_prod, tail_prod;
390 double head_sum, tail_sum;
391 double head_tmp1, tail_tmp1;
392 double head_tmp2, tail_tmp2;
393
394 FPU_FIX_DECL;
395
396 /* Test for error conditions */
397 if (m < 0)
398 BLAS_error(routine_name, -4, m, NULL);
399 if (n < 0)
400 BLAS_error(routine_name, -5, n, NULL);
401 if (k < 0)
402 BLAS_error(routine_name, -6, k, NULL);
403
404 if (order == blas_colmajor) {
405
406 if (ldc < m)
407 BLAS_error(routine_name, -14, ldc, NULL);
408
409 if (transa == blas_no_trans) {
410 if (lda < m)
411 BLAS_error(routine_name, -9, lda, NULL);
412 } else {
413 if (lda < k)
414 BLAS_error(routine_name, -9, lda, NULL);
415 }
416
417 if (transb == blas_no_trans) {
418 if (ldb < k)
419 BLAS_error(routine_name, -11, ldb, NULL);
420 } else {
421 if (ldb < n)
422 BLAS_error(routine_name, -11, ldb, NULL);
423 }
424
425 } else {
426 /* row major */
427 if (ldc < n)
428 BLAS_error(routine_name, -14, ldc, NULL);
429
430 if (transa == blas_no_trans) {
431 if (lda < k)
432 BLAS_error(routine_name, -9, lda, NULL);
433 } else {
434 if (lda < m)
435 BLAS_error(routine_name, -9, lda, NULL);
436 }
437
438 if (transb == blas_no_trans) {
439 if (ldb < n)
440 BLAS_error(routine_name, -11, ldb, NULL);
441 } else {
442 if (ldb < k)
443 BLAS_error(routine_name, -11, ldb, NULL);
444 }
445 }
446
447 /* Test for no-op */
448 if (n == 0 || m == 0 || k == 0)
449 return;
450 if (alpha_i == 0.0 && beta_i == 1.0) {
451 return;
452 }
453
454 /* Set Index Parameters */
455 if (order == blas_colmajor) {
456 incci = 1;
457 inccij = ldc;
458
459 if (transa == blas_no_trans) {
460 incai = 1;
461 incaih = lda;
462 } else {
463 incai = lda;
464 incaih = 1;
465 }
466
467 if (transb == blas_no_trans) {
468 incbj = ldb;
469 incbhj = 1;
470 } else {
471 incbj = 1;
472 incbhj = ldb;
473 }
474
475 } else {
476 /* row major */
477 incci = ldc;
478 inccij = 1;
479
480 if (transa == blas_no_trans) {
481 incai = lda;
482 incaih = 1;
483 } else {
484 incai = 1;
485 incaih = lda;
486 }
487
488 if (transb == blas_no_trans) {
489 incbj = 1;
490 incbhj = ldb;
491 } else {
492 incbj = ldb;
493 incbhj = 1;
494 }
495
496 }
497
498 FPU_FIX_START;
499
500 /* Ajustment to increments */
501
502
503
504
505
506
507
508 /* alpha = 0. In this case, just return beta * C */
509 if (alpha_i == 0.0) {
510
511 ci = 0;
512 for (i = 0; i < m; i++, ci += incci) {
513 cij = ci;
514 for (j = 0; j < n; j++, cij += inccij) {
515 c_elem = c_i[cij];
516 {
517 /* Compute double_double = double * double. */
518 double a1, a2, b1, b2, con;
519
520 con = c_elem * split;
521 a1 = con - c_elem;
522 a1 = con - a1;
523 a2 = c_elem - a1;
524 con = beta_i * split;
525 b1 = con - beta_i;
526 b1 = con - b1;
527 b2 = beta_i - b1;
528
529 head_tmp1 = c_elem * beta_i;
530 tail_tmp1 =
531 (((a1 * b1 - head_tmp1) + a1 * b2) + a2 * b1) + a2 * b2;
532 }
533 c_i[cij] = head_tmp1;
534 }
535 }
536
537 } else if (alpha_i == 1.0) {
538
539 /* Case alpha == 1. */
540
541 if (beta_i == 0.0) {
542 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
543
544 ci = 0;
545 ai = 0;
546 for (i = 0; i < m; i++, ci += incci, ai += incai) {
547
548 cij = ci;
549 bj = 0;
550
551 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
552
553 aih = ai;
554 bhj = bj;
555
556 head_sum = tail_sum = 0.0;
557
558 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
559 a_elem = a_i[aih];
560 b_elem = b_i[bhj];
561 if (transa == blas_conj_trans) {
562
563 }
564 if (transb == blas_conj_trans) {
565
566 }
567 {
568 /* Compute double_double = double * double. */
569 double a1, a2, b1, b2, con;
570
571 con = a_elem * split;
572 a1 = con - a_elem;
573 a1 = con - a1;
574 a2 = a_elem - a1;
575 con = b_elem * split;
576 b1 = con - b_elem;
577 b1 = con - b1;
578 b2 = b_elem - b1;
579
580 head_prod = a_elem * b_elem;
581 tail_prod =
582 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
583 }
584 {
585 /* Compute double-double = double-double + double-double. */
586 double bv;
587 double s1, s2, t1, t2;
588
589 /* Add two hi words. */
590 s1 = head_sum + head_prod;
591 bv = s1 - head_sum;
592 s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
593
594 /* Add two lo words. */
595 t1 = tail_sum + tail_prod;
596 bv = t1 - tail_sum;
597 t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
598
599 s2 += t1;
600
601 /* Renormalize (s1, s2) to (t1, s2) */
602 t1 = s1 + s2;
603 s2 = s2 - (t1 - s1);
604
605 t2 += s2;
606
607 /* Renormalize (t1, t2) */
608 head_sum = t1 + t2;
609 tail_sum = t2 - (head_sum - t1);
610 }
611 }
612 c_i[cij] = head_sum;
613 }
614 }
615
616 } else {
617 /* Case alpha == 1, but beta != 0.
618 We compute C <--- A * B + beta * C */
619
620 ci = 0;
621 ai = 0;
622 for (i = 0; i < m; i++, ci += incci, ai += incai) {
623
624 cij = ci;
625 bj = 0;
626
627 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
628
629 aih = ai;
630 bhj = bj;
631
632 head_sum = tail_sum = 0.0;
633
634 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
635 a_elem = a_i[aih];
636 b_elem = b_i[bhj];
637 if (transa == blas_conj_trans) {
638
639 }
640 if (transb == blas_conj_trans) {
641
642 }
643 {
644 /* Compute double_double = double * double. */
645 double a1, a2, b1, b2, con;
646
647 con = a_elem * split;
648 a1 = con - a_elem;
649 a1 = con - a1;
650 a2 = a_elem - a1;
651 con = b_elem * split;
652 b1 = con - b_elem;
653 b1 = con - b1;
654 b2 = b_elem - b1;
655
656 head_prod = a_elem * b_elem;
657 tail_prod =
658 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
659 }
660 {
661 /* Compute double-double = double-double + double-double. */
662 double bv;
663 double s1, s2, t1, t2;
664
665 /* Add two hi words. */
666 s1 = head_sum + head_prod;
667 bv = s1 - head_sum;
668 s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
669
670 /* Add two lo words. */
671 t1 = tail_sum + tail_prod;
672 bv = t1 - tail_sum;
673 t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
674
675 s2 += t1;
676
677 /* Renormalize (s1, s2) to (t1, s2) */
678 t1 = s1 + s2;
679 s2 = s2 - (t1 - s1);
680
681 t2 += s2;
682
683 /* Renormalize (t1, t2) */
684 head_sum = t1 + t2;
685 tail_sum = t2 - (head_sum - t1);
686 }
687 }
688
689 c_elem = c_i[cij];
690 {
691 /* Compute double_double = double * double. */
692 double a1, a2, b1, b2, con;
693
694 con = c_elem * split;
695 a1 = con - c_elem;
696 a1 = con - a1;
697 a2 = c_elem - a1;
698 con = beta_i * split;
699 b1 = con - beta_i;
700 b1 = con - b1;
701 b2 = beta_i - b1;
702
703 head_tmp2 = c_elem * beta_i;
704 tail_tmp2 =
705 (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
706 }
707 head_tmp1 = head_sum;
708 tail_tmp1 = tail_sum;
709 {
710 /* Compute double-double = double-double + double-double. */
711 double bv;
712 double s1, s2, t1, t2;
713
714 /* Add two hi words. */
715 s1 = head_tmp2 + head_tmp1;
716 bv = s1 - head_tmp2;
717 s2 = ((head_tmp1 - bv) + (head_tmp2 - (s1 - bv)));
718
719 /* Add two lo words. */
720 t1 = tail_tmp2 + tail_tmp1;
721 bv = t1 - tail_tmp2;
722 t2 = ((tail_tmp1 - bv) + (tail_tmp2 - (t1 - bv)));
723
724 s2 += t1;
725
726 /* Renormalize (s1, s2) to (t1, s2) */
727 t1 = s1 + s2;
728 s2 = s2 - (t1 - s1);
729
730 t2 += s2;
731
732 /* Renormalize (t1, t2) */
733 head_tmp1 = t1 + t2;
734 tail_tmp1 = t2 - (head_tmp1 - t1);
735 }
736 c_i[cij] = head_tmp1;
737 }
738 }
739 }
740
741 } else {
742
743 /* The most general form, C <-- alpha * A * B + beta * C */
744 ci = 0;
745 ai = 0;
746 for (i = 0; i < m; i++, ci += incci, ai += incai) {
747
748 cij = ci;
749 bj = 0;
750
751 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
752
753 aih = ai;
754 bhj = bj;
755
756 head_sum = tail_sum = 0.0;
757
758 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
759 a_elem = a_i[aih];
760 b_elem = b_i[bhj];
761 if (transa == blas_conj_trans) {
762
763 }
764 if (transb == blas_conj_trans) {
765
766 }
767 {
768 /* Compute double_double = double * double. */
769 double a1, a2, b1, b2, con;
770
771 con = a_elem * split;
772 a1 = con - a_elem;
773 a1 = con - a1;
774 a2 = a_elem - a1;
775 con = b_elem * split;
776 b1 = con - b_elem;
777 b1 = con - b1;
778 b2 = b_elem - b1;
779
780 head_prod = a_elem * b_elem;
781 tail_prod =
782 (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
783 }
784 {
785 /* Compute double-double = double-double + double-double. */
786 double bv;
787 double s1, s2, t1, t2;
788
789 /* Add two hi words. */
790 s1 = head_sum + head_prod;
791 bv = s1 - head_sum;
792 s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
793
794 /* Add two lo words. */
795 t1 = tail_sum + tail_prod;
796 bv = t1 - tail_sum;
797 t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
798
799 s2 += t1;
800
801 /* Renormalize (s1, s2) to (t1, s2) */
802 t1 = s1 + s2;
803 s2 = s2 - (t1 - s1);
804
805 t2 += s2;
806
807 /* Renormalize (t1, t2) */
808 head_sum = t1 + t2;
809 tail_sum = t2 - (head_sum - t1);
810 }
811 }
812
813 {
814 /* Compute double-double = double-double * double. */
815 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
816
817 con = head_sum * split;
818 a11 = con - head_sum;
819 a11 = con - a11;
820 a21 = head_sum - a11;
821 con = alpha_i * split;
822 b1 = con - alpha_i;
823 b1 = con - b1;
824 b2 = alpha_i - b1;
825
826 c11 = head_sum * alpha_i;
827 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
828
829 c2 = tail_sum * alpha_i;
830 t1 = c11 + c2;
831 t2 = (c2 - (t1 - c11)) + c21;
832
833 head_tmp1 = t1 + t2;
834 tail_tmp1 = t2 - (head_tmp1 - t1);
835 }
836 c_elem = c_i[cij];
837 {
838 /* Compute double_double = double * double. */
839 double a1, a2, b1, b2, con;
840
841 con = c_elem * split;
842 a1 = con - c_elem;
843 a1 = con - a1;
844 a2 = c_elem - a1;
845 con = beta_i * split;
846 b1 = con - beta_i;
847 b1 = con - b1;
848 b2 = beta_i - b1;
849
850 head_tmp2 = c_elem * beta_i;
851 tail_tmp2 =
852 (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
853 }
854 {
855 /* Compute double-double = double-double + double-double. */
856 double bv;
857 double s1, s2, t1, t2;
858
859 /* Add two hi words. */
860 s1 = head_tmp1 + head_tmp2;
861 bv = s1 - head_tmp1;
862 s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
863
864 /* Add two lo words. */
865 t1 = tail_tmp1 + tail_tmp2;
866 bv = t1 - tail_tmp1;
867 t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
868
869 s2 += t1;
870
871 /* Renormalize (s1, s2) to (t1, s2) */
872 t1 = s1 + s2;
873 s2 = s2 - (t1 - s1);
874
875 t2 += s2;
876
877 /* Renormalize (t1, t2) */
878 head_tmp1 = t1 + t2;
879 tail_tmp1 = t2 - (head_tmp1 - t1);
880 }
881 c_i[cij] = head_tmp1;
882 }
883 }
884
885 }
886
887 FPU_FIX_STOP;
888
889 break;
890 }
891 }
892 }
893