1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_cgemm_s_c_x(enum blas_order_type order,enum blas_trans_type transa,enum blas_trans_type transb,int m,int n,int k,const void * alpha,const float * a,int lda,const void * b,int ldb,const void * beta,void * c,int ldc,enum blas_prec_type prec)3 void BLAS_cgemm_s_c_x(enum blas_order_type order, enum blas_trans_type transa,
4 enum blas_trans_type transb, int m, int n, int k,
5 const void *alpha, const float *a, int lda,
6 const void *b, int ldb, const void *beta, void *c,
7 int ldc, enum blas_prec_type prec)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * This routine computes the matrix product:
14 *
15 * C <- alpha * op(A) * op(B) + beta * C .
16 *
17 * where op(M) represents either M, M transpose,
18 * or M conjugate transpose.
19 *
20 * Arguments
21 * =========
22 *
23 * order (input) enum blas_order_type
24 * Storage format of input matrices A, B, and C.
25 *
26 * transa (input) enum blas_trans_type
27 * Operation to be done on matrix A before multiplication.
28 * Can be no operation, transposition, or conjugate transposition.
29 *
30 * transb (input) enum blas_trans_type
31 * Operation to be done on matrix B before multiplication.
32 * Can be no operation, transposition, or conjugate transposition.
33 *
34 * m n k (input) int
35 * The dimensions of matrices A, B, and C.
36 * Matrix C is m-by-n matrix.
37 * Matrix A is m-by-k if A is not transposed,
38 * k-by-m otherwise.
39 * Matrix B is k-by-n if B is not transposed,
40 * n-by-k otherwise.
41 *
42 * alpha (input) const void*
43 *
44 * a (input) const float*
45 * matrix A.
46 *
47 * lda (input) int
48 * leading dimension of A.
49 *
50 * b (input) const void*
51 * matrix B
52 *
53 * ldb (input) int
54 * leading dimension of B.
55 *
56 * beta (input) const void*
57 *
58 * c (input/output) void*
59 * matrix C
60 *
61 * ldc (input) int
62 * leading dimension of C.
63 *
64 * prec (input) enum blas_prec_type
65 * Specifies the internal precision to be used.
66 * = blas_prec_single: single precision.
67 * = blas_prec_double: double precision.
68 * = blas_prec_extra : anything at least 1.5 times as accurate
69 * than double, and wider than 80-bits.
70 * We use double-double in our implementation.
71 *
72 */
73 {
74 static const char routine_name[] = "BLAS_cgemm_s_c_x";
75 switch (prec) {
76
77 case blas_prec_single:{
78
79
80 /* Integer Index Variables */
81 int i, j, h;
82
83 int ai, bj, ci;
84 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
85
86 int incai, incaih; /* Index increments for matrix a */
87 int incbj, incbhj; /* Index increments for matrix b */
88 int incci, inccij; /* Index increments for matrix c */
89
90 /* Input Matrices */
91 const float *a_i = a;
92 const float *b_i = (float *) b;
93
94 /* Output Matrix */
95 float *c_i = (float *) c;
96
97 /* Input Scalars */
98 float *alpha_i = (float *) alpha;
99 float *beta_i = (float *) beta;
100
101 /* Temporary Floating-Point Variables */
102 float a_elem;
103 float b_elem[2];
104 float c_elem[2];
105 float prod[2];
106 float sum[2];
107 float tmp1[2];
108 float tmp2[2];
109
110
111
112 /* Test for error conditions */
113 if (m < 0)
114 BLAS_error(routine_name, -4, m, NULL);
115 if (n < 0)
116 BLAS_error(routine_name, -5, n, NULL);
117 if (k < 0)
118 BLAS_error(routine_name, -6, k, NULL);
119
120 if (order == blas_colmajor) {
121
122 if (ldc < m)
123 BLAS_error(routine_name, -14, ldc, NULL);
124
125 if (transa == blas_no_trans) {
126 if (lda < m)
127 BLAS_error(routine_name, -9, lda, NULL);
128 } else {
129 if (lda < k)
130 BLAS_error(routine_name, -9, lda, NULL);
131 }
132
133 if (transb == blas_no_trans) {
134 if (ldb < k)
135 BLAS_error(routine_name, -11, ldb, NULL);
136 } else {
137 if (ldb < n)
138 BLAS_error(routine_name, -11, ldb, NULL);
139 }
140
141 } else {
142 /* row major */
143 if (ldc < n)
144 BLAS_error(routine_name, -14, ldc, NULL);
145
146 if (transa == blas_no_trans) {
147 if (lda < k)
148 BLAS_error(routine_name, -9, lda, NULL);
149 } else {
150 if (lda < m)
151 BLAS_error(routine_name, -9, lda, NULL);
152 }
153
154 if (transb == blas_no_trans) {
155 if (ldb < n)
156 BLAS_error(routine_name, -11, ldb, NULL);
157 } else {
158 if (ldb < k)
159 BLAS_error(routine_name, -11, ldb, NULL);
160 }
161 }
162
163 /* Test for no-op */
164 if (n == 0 || m == 0 || k == 0)
165 return;
166 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
167 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
168 return;
169 }
170
171 /* Set Index Parameters */
172 if (order == blas_colmajor) {
173 incci = 1;
174 inccij = ldc;
175
176 if (transa == blas_no_trans) {
177 incai = 1;
178 incaih = lda;
179 } else {
180 incai = lda;
181 incaih = 1;
182 }
183
184 if (transb == blas_no_trans) {
185 incbj = ldb;
186 incbhj = 1;
187 } else {
188 incbj = 1;
189 incbhj = ldb;
190 }
191
192 } else {
193 /* row major */
194 incci = ldc;
195 inccij = 1;
196
197 if (transa == blas_no_trans) {
198 incai = lda;
199 incaih = 1;
200 } else {
201 incai = 1;
202 incaih = lda;
203 }
204
205 if (transb == blas_no_trans) {
206 incbj = 1;
207 incbhj = ldb;
208 } else {
209 incbj = ldb;
210 incbhj = 1;
211 }
212
213 }
214
215
216
217 /* Ajustment to increments */
218 incci *= 2;
219 inccij *= 2;
220
221
222 incbj *= 2;
223 incbhj *= 2;
224
225 /* alpha = 0. In this case, just return beta * C */
226 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
227
228 ci = 0;
229 for (i = 0; i < m; i++, ci += incci) {
230 cij = ci;
231 for (j = 0; j < n; j++, cij += inccij) {
232 c_elem[0] = c_i[cij];
233 c_elem[1] = c_i[cij + 1];
234 {
235 tmp1[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
236 tmp1[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
237 }
238
239 c_i[cij] = tmp1[0];
240 c_i[cij + 1] = tmp1[1];
241 }
242 }
243
244 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
245
246 /* Case alpha == 1. */
247
248 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
249 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
250
251 ci = 0;
252 ai = 0;
253 for (i = 0; i < m; i++, ci += incci, ai += incai) {
254
255 cij = ci;
256 bj = 0;
257
258 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
259
260 aih = ai;
261 bhj = bj;
262
263 sum[0] = sum[1] = 0.0;
264
265 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
266 a_elem = a_i[aih];
267 b_elem[0] = b_i[bhj];
268 b_elem[1] = b_i[bhj + 1];
269 if (transa == blas_conj_trans) {
270
271 }
272 if (transb == blas_conj_trans) {
273 b_elem[1] = -b_elem[1];
274 }
275 {
276 prod[0] = b_elem[0] * a_elem;
277 prod[1] = b_elem[1] * a_elem;
278 }
279 sum[0] = sum[0] + prod[0];
280 sum[1] = sum[1] + prod[1];
281 }
282 c_i[cij] = sum[0];
283 c_i[cij + 1] = sum[1];
284 }
285 }
286
287 } else {
288 /* Case alpha == 1, but beta != 0.
289 We compute C <--- A * B + beta * C */
290
291 ci = 0;
292 ai = 0;
293 for (i = 0; i < m; i++, ci += incci, ai += incai) {
294
295 cij = ci;
296 bj = 0;
297
298 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
299
300 aih = ai;
301 bhj = bj;
302
303 sum[0] = sum[1] = 0.0;
304
305 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
306 a_elem = a_i[aih];
307 b_elem[0] = b_i[bhj];
308 b_elem[1] = b_i[bhj + 1];
309 if (transa == blas_conj_trans) {
310
311 }
312 if (transb == blas_conj_trans) {
313 b_elem[1] = -b_elem[1];
314 }
315 {
316 prod[0] = b_elem[0] * a_elem;
317 prod[1] = b_elem[1] * a_elem;
318 }
319 sum[0] = sum[0] + prod[0];
320 sum[1] = sum[1] + prod[1];
321 }
322
323 c_elem[0] = c_i[cij];
324 c_elem[1] = c_i[cij + 1];
325 {
326 tmp2[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
327 tmp2[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
328 }
329
330 tmp1[0] = sum[0];
331 tmp1[1] = sum[1];
332 tmp1[0] = tmp2[0] + tmp1[0];
333 tmp1[1] = tmp2[1] + tmp1[1];
334 c_i[cij] = tmp1[0];
335 c_i[cij + 1] = tmp1[1];
336 }
337 }
338 }
339
340 } else {
341
342 /* The most general form, C <-- alpha * A * B + beta * C */
343 ci = 0;
344 ai = 0;
345 for (i = 0; i < m; i++, ci += incci, ai += incai) {
346
347 cij = ci;
348 bj = 0;
349
350 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
351
352 aih = ai;
353 bhj = bj;
354
355 sum[0] = sum[1] = 0.0;
356
357 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
358 a_elem = a_i[aih];
359 b_elem[0] = b_i[bhj];
360 b_elem[1] = b_i[bhj + 1];
361 if (transa == blas_conj_trans) {
362
363 }
364 if (transb == blas_conj_trans) {
365 b_elem[1] = -b_elem[1];
366 }
367 {
368 prod[0] = b_elem[0] * a_elem;
369 prod[1] = b_elem[1] * a_elem;
370 }
371 sum[0] = sum[0] + prod[0];
372 sum[1] = sum[1] + prod[1];
373 }
374
375 {
376 tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
377 tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
378 }
379
380 c_elem[0] = c_i[cij];
381 c_elem[1] = c_i[cij + 1];
382 {
383 tmp2[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
384 tmp2[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
385 }
386
387 tmp1[0] = tmp1[0] + tmp2[0];
388 tmp1[1] = tmp1[1] + tmp2[1];
389 c_i[cij] = tmp1[0];
390 c_i[cij + 1] = tmp1[1];
391 }
392 }
393
394 }
395
396
397
398 break;
399 }
400 case blas_prec_double:
401 case blas_prec_indigenous:{
402
403
404 /* Integer Index Variables */
405 int i, j, h;
406
407 int ai, bj, ci;
408 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
409
410 int incai, incaih; /* Index increments for matrix a */
411 int incbj, incbhj; /* Index increments for matrix b */
412 int incci, inccij; /* Index increments for matrix c */
413
414 /* Input Matrices */
415 const float *a_i = a;
416 const float *b_i = (float *) b;
417
418 /* Output Matrix */
419 float *c_i = (float *) c;
420
421 /* Input Scalars */
422 float *alpha_i = (float *) alpha;
423 float *beta_i = (float *) beta;
424
425 /* Temporary Floating-Point Variables */
426 float a_elem;
427 float b_elem[2];
428 float c_elem[2];
429 double prod[2];
430 double sum[2];
431 double tmp1[2];
432 double tmp2[2];
433
434
435
436 /* Test for error conditions */
437 if (m < 0)
438 BLAS_error(routine_name, -4, m, NULL);
439 if (n < 0)
440 BLAS_error(routine_name, -5, n, NULL);
441 if (k < 0)
442 BLAS_error(routine_name, -6, k, NULL);
443
444 if (order == blas_colmajor) {
445
446 if (ldc < m)
447 BLAS_error(routine_name, -14, ldc, NULL);
448
449 if (transa == blas_no_trans) {
450 if (lda < m)
451 BLAS_error(routine_name, -9, lda, NULL);
452 } else {
453 if (lda < k)
454 BLAS_error(routine_name, -9, lda, NULL);
455 }
456
457 if (transb == blas_no_trans) {
458 if (ldb < k)
459 BLAS_error(routine_name, -11, ldb, NULL);
460 } else {
461 if (ldb < n)
462 BLAS_error(routine_name, -11, ldb, NULL);
463 }
464
465 } else {
466 /* row major */
467 if (ldc < n)
468 BLAS_error(routine_name, -14, ldc, NULL);
469
470 if (transa == blas_no_trans) {
471 if (lda < k)
472 BLAS_error(routine_name, -9, lda, NULL);
473 } else {
474 if (lda < m)
475 BLAS_error(routine_name, -9, lda, NULL);
476 }
477
478 if (transb == blas_no_trans) {
479 if (ldb < n)
480 BLAS_error(routine_name, -11, ldb, NULL);
481 } else {
482 if (ldb < k)
483 BLAS_error(routine_name, -11, ldb, NULL);
484 }
485 }
486
487 /* Test for no-op */
488 if (n == 0 || m == 0 || k == 0)
489 return;
490 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
491 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
492 return;
493 }
494
495 /* Set Index Parameters */
496 if (order == blas_colmajor) {
497 incci = 1;
498 inccij = ldc;
499
500 if (transa == blas_no_trans) {
501 incai = 1;
502 incaih = lda;
503 } else {
504 incai = lda;
505 incaih = 1;
506 }
507
508 if (transb == blas_no_trans) {
509 incbj = ldb;
510 incbhj = 1;
511 } else {
512 incbj = 1;
513 incbhj = ldb;
514 }
515
516 } else {
517 /* row major */
518 incci = ldc;
519 inccij = 1;
520
521 if (transa == blas_no_trans) {
522 incai = lda;
523 incaih = 1;
524 } else {
525 incai = 1;
526 incaih = lda;
527 }
528
529 if (transb == blas_no_trans) {
530 incbj = 1;
531 incbhj = ldb;
532 } else {
533 incbj = ldb;
534 incbhj = 1;
535 }
536
537 }
538
539
540
541 /* Ajustment to increments */
542 incci *= 2;
543 inccij *= 2;
544
545
546 incbj *= 2;
547 incbhj *= 2;
548
549 /* alpha = 0. In this case, just return beta * C */
550 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
551
552 ci = 0;
553 for (i = 0; i < m; i++, ci += incci) {
554 cij = ci;
555 for (j = 0; j < n; j++, cij += inccij) {
556 c_elem[0] = c_i[cij];
557 c_elem[1] = c_i[cij + 1];
558 {
559 tmp1[0] =
560 (double) c_elem[0] * beta_i[0] -
561 (double) c_elem[1] * beta_i[1];
562 tmp1[1] =
563 (double) c_elem[0] * beta_i[1] +
564 (double) c_elem[1] * beta_i[0];
565 }
566 c_i[cij] = tmp1[0];
567 c_i[cij + 1] = tmp1[1];
568 }
569 }
570
571 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
572
573 /* Case alpha == 1. */
574
575 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
576 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
577
578 ci = 0;
579 ai = 0;
580 for (i = 0; i < m; i++, ci += incci, ai += incai) {
581
582 cij = ci;
583 bj = 0;
584
585 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
586
587 aih = ai;
588 bhj = bj;
589
590 sum[0] = sum[1] = 0.0;
591
592 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
593 a_elem = a_i[aih];
594 b_elem[0] = b_i[bhj];
595 b_elem[1] = b_i[bhj + 1];
596 if (transa == blas_conj_trans) {
597
598 }
599 if (transb == blas_conj_trans) {
600 b_elem[1] = -b_elem[1];
601 }
602 {
603 prod[0] = (double) b_elem[0] * a_elem;
604 prod[1] = (double) b_elem[1] * a_elem;
605 }
606 sum[0] = sum[0] + prod[0];
607 sum[1] = sum[1] + prod[1];
608 }
609 c_i[cij] = sum[0];
610 c_i[cij + 1] = sum[1];
611 }
612 }
613
614 } else {
615 /* Case alpha == 1, but beta != 0.
616 We compute C <--- A * B + beta * C */
617
618 ci = 0;
619 ai = 0;
620 for (i = 0; i < m; i++, ci += incci, ai += incai) {
621
622 cij = ci;
623 bj = 0;
624
625 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
626
627 aih = ai;
628 bhj = bj;
629
630 sum[0] = sum[1] = 0.0;
631
632 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
633 a_elem = a_i[aih];
634 b_elem[0] = b_i[bhj];
635 b_elem[1] = b_i[bhj + 1];
636 if (transa == blas_conj_trans) {
637
638 }
639 if (transb == blas_conj_trans) {
640 b_elem[1] = -b_elem[1];
641 }
642 {
643 prod[0] = (double) b_elem[0] * a_elem;
644 prod[1] = (double) b_elem[1] * a_elem;
645 }
646 sum[0] = sum[0] + prod[0];
647 sum[1] = sum[1] + prod[1];
648 }
649
650 c_elem[0] = c_i[cij];
651 c_elem[1] = c_i[cij + 1];
652 {
653 tmp2[0] =
654 (double) c_elem[0] * beta_i[0] -
655 (double) c_elem[1] * beta_i[1];
656 tmp2[1] =
657 (double) c_elem[0] * beta_i[1] +
658 (double) c_elem[1] * beta_i[0];
659 }
660 tmp1[0] = sum[0];
661 tmp1[1] = sum[1];
662 tmp1[0] = tmp2[0] + tmp1[0];
663 tmp1[1] = tmp2[1] + tmp1[1];
664 c_i[cij] = tmp1[0];
665 c_i[cij + 1] = tmp1[1];
666 }
667 }
668 }
669
670 } else {
671
672 /* The most general form, C <-- alpha * A * B + beta * C */
673 ci = 0;
674 ai = 0;
675 for (i = 0; i < m; i++, ci += incci, ai += incai) {
676
677 cij = ci;
678 bj = 0;
679
680 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
681
682 aih = ai;
683 bhj = bj;
684
685 sum[0] = sum[1] = 0.0;
686
687 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
688 a_elem = a_i[aih];
689 b_elem[0] = b_i[bhj];
690 b_elem[1] = b_i[bhj + 1];
691 if (transa == blas_conj_trans) {
692
693 }
694 if (transb == blas_conj_trans) {
695 b_elem[1] = -b_elem[1];
696 }
697 {
698 prod[0] = (double) b_elem[0] * a_elem;
699 prod[1] = (double) b_elem[1] * a_elem;
700 }
701 sum[0] = sum[0] + prod[0];
702 sum[1] = sum[1] + prod[1];
703 }
704
705 {
706 tmp1[0] =
707 (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
708 tmp1[1] =
709 (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
710 }
711 c_elem[0] = c_i[cij];
712 c_elem[1] = c_i[cij + 1];
713 {
714 tmp2[0] =
715 (double) c_elem[0] * beta_i[0] -
716 (double) c_elem[1] * beta_i[1];
717 tmp2[1] =
718 (double) c_elem[0] * beta_i[1] +
719 (double) c_elem[1] * beta_i[0];
720 }
721 tmp1[0] = tmp1[0] + tmp2[0];
722 tmp1[1] = tmp1[1] + tmp2[1];
723 c_i[cij] = tmp1[0];
724 c_i[cij + 1] = tmp1[1];
725 }
726 }
727
728 }
729
730
731
732 break;
733 }
734
735 case blas_prec_extra:{
736
737
738 /* Integer Index Variables */
739 int i, j, h;
740
741 int ai, bj, ci;
742 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
743
744 int incai, incaih; /* Index increments for matrix a */
745 int incbj, incbhj; /* Index increments for matrix b */
746 int incci, inccij; /* Index increments for matrix c */
747
748 /* Input Matrices */
749 const float *a_i = a;
750 const float *b_i = (float *) b;
751
752 /* Output Matrix */
753 float *c_i = (float *) c;
754
755 /* Input Scalars */
756 float *alpha_i = (float *) alpha;
757 float *beta_i = (float *) beta;
758
759 /* Temporary Floating-Point Variables */
760 float a_elem;
761 float b_elem[2];
762 float c_elem[2];
763 double head_prod[2], tail_prod[2];
764 double head_sum[2], tail_sum[2];
765 double head_tmp1[2], tail_tmp1[2];
766 double head_tmp2[2], tail_tmp2[2];
767
768 FPU_FIX_DECL;
769
770 /* Test for error conditions */
771 if (m < 0)
772 BLAS_error(routine_name, -4, m, NULL);
773 if (n < 0)
774 BLAS_error(routine_name, -5, n, NULL);
775 if (k < 0)
776 BLAS_error(routine_name, -6, k, NULL);
777
778 if (order == blas_colmajor) {
779
780 if (ldc < m)
781 BLAS_error(routine_name, -14, ldc, NULL);
782
783 if (transa == blas_no_trans) {
784 if (lda < m)
785 BLAS_error(routine_name, -9, lda, NULL);
786 } else {
787 if (lda < k)
788 BLAS_error(routine_name, -9, lda, NULL);
789 }
790
791 if (transb == blas_no_trans) {
792 if (ldb < k)
793 BLAS_error(routine_name, -11, ldb, NULL);
794 } else {
795 if (ldb < n)
796 BLAS_error(routine_name, -11, ldb, NULL);
797 }
798
799 } else {
800 /* row major */
801 if (ldc < n)
802 BLAS_error(routine_name, -14, ldc, NULL);
803
804 if (transa == blas_no_trans) {
805 if (lda < k)
806 BLAS_error(routine_name, -9, lda, NULL);
807 } else {
808 if (lda < m)
809 BLAS_error(routine_name, -9, lda, NULL);
810 }
811
812 if (transb == blas_no_trans) {
813 if (ldb < n)
814 BLAS_error(routine_name, -11, ldb, NULL);
815 } else {
816 if (ldb < k)
817 BLAS_error(routine_name, -11, ldb, NULL);
818 }
819 }
820
821 /* Test for no-op */
822 if (n == 0 || m == 0 || k == 0)
823 return;
824 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
825 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
826 return;
827 }
828
829 /* Set Index Parameters */
830 if (order == blas_colmajor) {
831 incci = 1;
832 inccij = ldc;
833
834 if (transa == blas_no_trans) {
835 incai = 1;
836 incaih = lda;
837 } else {
838 incai = lda;
839 incaih = 1;
840 }
841
842 if (transb == blas_no_trans) {
843 incbj = ldb;
844 incbhj = 1;
845 } else {
846 incbj = 1;
847 incbhj = ldb;
848 }
849
850 } else {
851 /* row major */
852 incci = ldc;
853 inccij = 1;
854
855 if (transa == blas_no_trans) {
856 incai = lda;
857 incaih = 1;
858 } else {
859 incai = 1;
860 incaih = lda;
861 }
862
863 if (transb == blas_no_trans) {
864 incbj = 1;
865 incbhj = ldb;
866 } else {
867 incbj = ldb;
868 incbhj = 1;
869 }
870
871 }
872
873 FPU_FIX_START;
874
875 /* Ajustment to increments */
876 incci *= 2;
877 inccij *= 2;
878
879
880 incbj *= 2;
881 incbhj *= 2;
882
883 /* alpha = 0. In this case, just return beta * C */
884 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
885
886 ci = 0;
887 for (i = 0; i < m; i++, ci += incci) {
888 cij = ci;
889 for (j = 0; j < n; j++, cij += inccij) {
890 c_elem[0] = c_i[cij];
891 c_elem[1] = c_i[cij + 1];
892 {
893 double head_e1, tail_e1;
894 double d1;
895 double d2;
896 /* Real part */
897 d1 = (double) c_elem[0] * beta_i[0];
898 d2 = (double) -c_elem[1] * beta_i[1];
899 {
900 /* Compute double-double = double + double. */
901 double e, t1, t2;
902
903 /* Knuth trick. */
904 t1 = d1 + d2;
905 e = t1 - d1;
906 t2 = ((d2 - e) + (d1 - (t1 - e)));
907
908 /* The result is t1 + t2, after normalization. */
909 head_e1 = t1 + t2;
910 tail_e1 = t2 - (head_e1 - t1);
911 }
912 head_tmp1[0] = head_e1;
913 tail_tmp1[0] = tail_e1;
914 /* imaginary part */
915 d1 = (double) c_elem[0] * beta_i[1];
916 d2 = (double) c_elem[1] * beta_i[0];
917 {
918 /* Compute double-double = double + double. */
919 double e, t1, t2;
920
921 /* Knuth trick. */
922 t1 = d1 + d2;
923 e = t1 - d1;
924 t2 = ((d2 - e) + (d1 - (t1 - e)));
925
926 /* The result is t1 + t2, after normalization. */
927 head_e1 = t1 + t2;
928 tail_e1 = t2 - (head_e1 - t1);
929 }
930 head_tmp1[1] = head_e1;
931 tail_tmp1[1] = tail_e1;
932 }
933 c_i[cij] = head_tmp1[0];
934 c_i[cij + 1] = head_tmp1[1];
935 }
936 }
937
938 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
939
940 /* Case alpha == 1. */
941
942 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
943 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
944
945 ci = 0;
946 ai = 0;
947 for (i = 0; i < m; i++, ci += incci, ai += incai) {
948
949 cij = ci;
950 bj = 0;
951
952 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
953
954 aih = ai;
955 bhj = bj;
956
957 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
958
959 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
960 a_elem = a_i[aih];
961 b_elem[0] = b_i[bhj];
962 b_elem[1] = b_i[bhj + 1];
963 if (transa == blas_conj_trans) {
964
965 }
966 if (transb == blas_conj_trans) {
967 b_elem[1] = -b_elem[1];
968 }
969 {
970 head_prod[0] = (double) b_elem[0] * a_elem;
971 tail_prod[0] = 0.0;
972 head_prod[1] = (double) b_elem[1] * a_elem;
973 tail_prod[1] = 0.0;
974 }
975 {
976 double head_t, tail_t;
977 double head_a, tail_a;
978 double head_b, tail_b;
979 /* Real part */
980 head_a = head_sum[0];
981 tail_a = tail_sum[0];
982 head_b = head_prod[0];
983 tail_b = tail_prod[0];
984 {
985 /* Compute double-double = double-double + double-double. */
986 double bv;
987 double s1, s2, t1, t2;
988
989 /* Add two hi words. */
990 s1 = head_a + head_b;
991 bv = s1 - head_a;
992 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
993
994 /* Add two lo words. */
995 t1 = tail_a + tail_b;
996 bv = t1 - tail_a;
997 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
998
999 s2 += t1;
1000
1001 /* Renormalize (s1, s2) to (t1, s2) */
1002 t1 = s1 + s2;
1003 s2 = s2 - (t1 - s1);
1004
1005 t2 += s2;
1006
1007 /* Renormalize (t1, t2) */
1008 head_t = t1 + t2;
1009 tail_t = t2 - (head_t - t1);
1010 }
1011 head_sum[0] = head_t;
1012 tail_sum[0] = tail_t;
1013 /* Imaginary part */
1014 head_a = head_sum[1];
1015 tail_a = tail_sum[1];
1016 head_b = head_prod[1];
1017 tail_b = tail_prod[1];
1018 {
1019 /* Compute double-double = double-double + double-double. */
1020 double bv;
1021 double s1, s2, t1, t2;
1022
1023 /* Add two hi words. */
1024 s1 = head_a + head_b;
1025 bv = s1 - head_a;
1026 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1027
1028 /* Add two lo words. */
1029 t1 = tail_a + tail_b;
1030 bv = t1 - tail_a;
1031 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1032
1033 s2 += t1;
1034
1035 /* Renormalize (s1, s2) to (t1, s2) */
1036 t1 = s1 + s2;
1037 s2 = s2 - (t1 - s1);
1038
1039 t2 += s2;
1040
1041 /* Renormalize (t1, t2) */
1042 head_t = t1 + t2;
1043 tail_t = t2 - (head_t - t1);
1044 }
1045 head_sum[1] = head_t;
1046 tail_sum[1] = tail_t;
1047 }
1048 }
1049 c_i[cij] = head_sum[0];
1050 c_i[cij + 1] = head_sum[1];
1051 }
1052 }
1053
1054 } else {
1055 /* Case alpha == 1, but beta != 0.
1056 We compute C <--- A * B + beta * C */
1057
1058 ci = 0;
1059 ai = 0;
1060 for (i = 0; i < m; i++, ci += incci, ai += incai) {
1061
1062 cij = ci;
1063 bj = 0;
1064
1065 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
1066
1067 aih = ai;
1068 bhj = bj;
1069
1070 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
1071
1072 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
1073 a_elem = a_i[aih];
1074 b_elem[0] = b_i[bhj];
1075 b_elem[1] = b_i[bhj + 1];
1076 if (transa == blas_conj_trans) {
1077
1078 }
1079 if (transb == blas_conj_trans) {
1080 b_elem[1] = -b_elem[1];
1081 }
1082 {
1083 head_prod[0] = (double) b_elem[0] * a_elem;
1084 tail_prod[0] = 0.0;
1085 head_prod[1] = (double) b_elem[1] * a_elem;
1086 tail_prod[1] = 0.0;
1087 }
1088 {
1089 double head_t, tail_t;
1090 double head_a, tail_a;
1091 double head_b, tail_b;
1092 /* Real part */
1093 head_a = head_sum[0];
1094 tail_a = tail_sum[0];
1095 head_b = head_prod[0];
1096 tail_b = tail_prod[0];
1097 {
1098 /* Compute double-double = double-double + double-double. */
1099 double bv;
1100 double s1, s2, t1, t2;
1101
1102 /* Add two hi words. */
1103 s1 = head_a + head_b;
1104 bv = s1 - head_a;
1105 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1106
1107 /* Add two lo words. */
1108 t1 = tail_a + tail_b;
1109 bv = t1 - tail_a;
1110 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1111
1112 s2 += t1;
1113
1114 /* Renormalize (s1, s2) to (t1, s2) */
1115 t1 = s1 + s2;
1116 s2 = s2 - (t1 - s1);
1117
1118 t2 += s2;
1119
1120 /* Renormalize (t1, t2) */
1121 head_t = t1 + t2;
1122 tail_t = t2 - (head_t - t1);
1123 }
1124 head_sum[0] = head_t;
1125 tail_sum[0] = tail_t;
1126 /* Imaginary part */
1127 head_a = head_sum[1];
1128 tail_a = tail_sum[1];
1129 head_b = head_prod[1];
1130 tail_b = tail_prod[1];
1131 {
1132 /* Compute double-double = double-double + double-double. */
1133 double bv;
1134 double s1, s2, t1, t2;
1135
1136 /* Add two hi words. */
1137 s1 = head_a + head_b;
1138 bv = s1 - head_a;
1139 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1140
1141 /* Add two lo words. */
1142 t1 = tail_a + tail_b;
1143 bv = t1 - tail_a;
1144 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1145
1146 s2 += t1;
1147
1148 /* Renormalize (s1, s2) to (t1, s2) */
1149 t1 = s1 + s2;
1150 s2 = s2 - (t1 - s1);
1151
1152 t2 += s2;
1153
1154 /* Renormalize (t1, t2) */
1155 head_t = t1 + t2;
1156 tail_t = t2 - (head_t - t1);
1157 }
1158 head_sum[1] = head_t;
1159 tail_sum[1] = tail_t;
1160 }
1161 }
1162
1163 c_elem[0] = c_i[cij];
1164 c_elem[1] = c_i[cij + 1];
1165 {
1166 double head_e1, tail_e1;
1167 double d1;
1168 double d2;
1169 /* Real part */
1170 d1 = (double) c_elem[0] * beta_i[0];
1171 d2 = (double) -c_elem[1] * beta_i[1];
1172 {
1173 /* Compute double-double = double + double. */
1174 double e, t1, t2;
1175
1176 /* Knuth trick. */
1177 t1 = d1 + d2;
1178 e = t1 - d1;
1179 t2 = ((d2 - e) + (d1 - (t1 - e)));
1180
1181 /* The result is t1 + t2, after normalization. */
1182 head_e1 = t1 + t2;
1183 tail_e1 = t2 - (head_e1 - t1);
1184 }
1185 head_tmp2[0] = head_e1;
1186 tail_tmp2[0] = tail_e1;
1187 /* imaginary part */
1188 d1 = (double) c_elem[0] * beta_i[1];
1189 d2 = (double) c_elem[1] * beta_i[0];
1190 {
1191 /* Compute double-double = double + double. */
1192 double e, t1, t2;
1193
1194 /* Knuth trick. */
1195 t1 = d1 + d2;
1196 e = t1 - d1;
1197 t2 = ((d2 - e) + (d1 - (t1 - e)));
1198
1199 /* The result is t1 + t2, after normalization. */
1200 head_e1 = t1 + t2;
1201 tail_e1 = t2 - (head_e1 - t1);
1202 }
1203 head_tmp2[1] = head_e1;
1204 tail_tmp2[1] = tail_e1;
1205 }
1206 head_tmp1[0] = head_sum[0];
1207 tail_tmp1[0] = tail_sum[0];
1208 head_tmp1[1] = head_sum[1];
1209 tail_tmp1[1] = tail_sum[1];
1210 {
1211 double head_t, tail_t;
1212 double head_a, tail_a;
1213 double head_b, tail_b;
1214 /* Real part */
1215 head_a = head_tmp2[0];
1216 tail_a = tail_tmp2[0];
1217 head_b = head_tmp1[0];
1218 tail_b = tail_tmp1[0];
1219 {
1220 /* Compute double-double = double-double + double-double. */
1221 double bv;
1222 double s1, s2, t1, t2;
1223
1224 /* Add two hi words. */
1225 s1 = head_a + head_b;
1226 bv = s1 - head_a;
1227 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1228
1229 /* Add two lo words. */
1230 t1 = tail_a + tail_b;
1231 bv = t1 - tail_a;
1232 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1233
1234 s2 += t1;
1235
1236 /* Renormalize (s1, s2) to (t1, s2) */
1237 t1 = s1 + s2;
1238 s2 = s2 - (t1 - s1);
1239
1240 t2 += s2;
1241
1242 /* Renormalize (t1, t2) */
1243 head_t = t1 + t2;
1244 tail_t = t2 - (head_t - t1);
1245 }
1246 head_tmp1[0] = head_t;
1247 tail_tmp1[0] = tail_t;
1248 /* Imaginary part */
1249 head_a = head_tmp2[1];
1250 tail_a = tail_tmp2[1];
1251 head_b = head_tmp1[1];
1252 tail_b = tail_tmp1[1];
1253 {
1254 /* Compute double-double = double-double + double-double. */
1255 double bv;
1256 double s1, s2, t1, t2;
1257
1258 /* Add two hi words. */
1259 s1 = head_a + head_b;
1260 bv = s1 - head_a;
1261 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1262
1263 /* Add two lo words. */
1264 t1 = tail_a + tail_b;
1265 bv = t1 - tail_a;
1266 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1267
1268 s2 += t1;
1269
1270 /* Renormalize (s1, s2) to (t1, s2) */
1271 t1 = s1 + s2;
1272 s2 = s2 - (t1 - s1);
1273
1274 t2 += s2;
1275
1276 /* Renormalize (t1, t2) */
1277 head_t = t1 + t2;
1278 tail_t = t2 - (head_t - t1);
1279 }
1280 head_tmp1[1] = head_t;
1281 tail_tmp1[1] = tail_t;
1282 }
1283 c_i[cij] = head_tmp1[0];
1284 c_i[cij + 1] = head_tmp1[1];
1285 }
1286 }
1287 }
1288
1289 } else {
1290
1291 /* The most general form, C <-- alpha * A * B + beta * C */
1292 ci = 0;
1293 ai = 0;
1294 for (i = 0; i < m; i++, ci += incci, ai += incai) {
1295
1296 cij = ci;
1297 bj = 0;
1298
1299 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
1300
1301 aih = ai;
1302 bhj = bj;
1303
1304 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
1305
1306 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
1307 a_elem = a_i[aih];
1308 b_elem[0] = b_i[bhj];
1309 b_elem[1] = b_i[bhj + 1];
1310 if (transa == blas_conj_trans) {
1311
1312 }
1313 if (transb == blas_conj_trans) {
1314 b_elem[1] = -b_elem[1];
1315 }
1316 {
1317 head_prod[0] = (double) b_elem[0] * a_elem;
1318 tail_prod[0] = 0.0;
1319 head_prod[1] = (double) b_elem[1] * a_elem;
1320 tail_prod[1] = 0.0;
1321 }
1322 {
1323 double head_t, tail_t;
1324 double head_a, tail_a;
1325 double head_b, tail_b;
1326 /* Real part */
1327 head_a = head_sum[0];
1328 tail_a = tail_sum[0];
1329 head_b = head_prod[0];
1330 tail_b = tail_prod[0];
1331 {
1332 /* Compute double-double = double-double + double-double. */
1333 double bv;
1334 double s1, s2, t1, t2;
1335
1336 /* Add two hi words. */
1337 s1 = head_a + head_b;
1338 bv = s1 - head_a;
1339 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1340
1341 /* Add two lo words. */
1342 t1 = tail_a + tail_b;
1343 bv = t1 - tail_a;
1344 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1345
1346 s2 += t1;
1347
1348 /* Renormalize (s1, s2) to (t1, s2) */
1349 t1 = s1 + s2;
1350 s2 = s2 - (t1 - s1);
1351
1352 t2 += s2;
1353
1354 /* Renormalize (t1, t2) */
1355 head_t = t1 + t2;
1356 tail_t = t2 - (head_t - t1);
1357 }
1358 head_sum[0] = head_t;
1359 tail_sum[0] = tail_t;
1360 /* Imaginary part */
1361 head_a = head_sum[1];
1362 tail_a = tail_sum[1];
1363 head_b = head_prod[1];
1364 tail_b = tail_prod[1];
1365 {
1366 /* Compute double-double = double-double + double-double. */
1367 double bv;
1368 double s1, s2, t1, t2;
1369
1370 /* Add two hi words. */
1371 s1 = head_a + head_b;
1372 bv = s1 - head_a;
1373 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1374
1375 /* Add two lo words. */
1376 t1 = tail_a + tail_b;
1377 bv = t1 - tail_a;
1378 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1379
1380 s2 += t1;
1381
1382 /* Renormalize (s1, s2) to (t1, s2) */
1383 t1 = s1 + s2;
1384 s2 = s2 - (t1 - s1);
1385
1386 t2 += s2;
1387
1388 /* Renormalize (t1, t2) */
1389 head_t = t1 + t2;
1390 tail_t = t2 - (head_t - t1);
1391 }
1392 head_sum[1] = head_t;
1393 tail_sum[1] = tail_t;
1394 }
1395 }
1396
1397 {
1398 double cd[2];
1399 cd[0] = (double) alpha_i[0];
1400 cd[1] = (double) alpha_i[1];
1401 {
1402 /* Compute complex-extra = complex-extra * complex-double. */
1403 double head_a0, tail_a0;
1404 double head_a1, tail_a1;
1405 double head_t1, tail_t1;
1406 double head_t2, tail_t2;
1407 head_a0 = head_sum[0];
1408 tail_a0 = tail_sum[0];
1409 head_a1 = head_sum[1];
1410 tail_a1 = tail_sum[1];
1411 /* real part */
1412 {
1413 /* Compute double-double = double-double * double. */
1414 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1415
1416 con = head_a0 * split;
1417 a11 = con - head_a0;
1418 a11 = con - a11;
1419 a21 = head_a0 - a11;
1420 con = cd[0] * split;
1421 b1 = con - cd[0];
1422 b1 = con - b1;
1423 b2 = cd[0] - b1;
1424
1425 c11 = head_a0 * cd[0];
1426 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1427
1428 c2 = tail_a0 * cd[0];
1429 t1 = c11 + c2;
1430 t2 = (c2 - (t1 - c11)) + c21;
1431
1432 head_t1 = t1 + t2;
1433 tail_t1 = t2 - (head_t1 - t1);
1434 }
1435 {
1436 /* Compute double-double = double-double * double. */
1437 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1438
1439 con = head_a1 * split;
1440 a11 = con - head_a1;
1441 a11 = con - a11;
1442 a21 = head_a1 - a11;
1443 con = cd[1] * split;
1444 b1 = con - cd[1];
1445 b1 = con - b1;
1446 b2 = cd[1] - b1;
1447
1448 c11 = head_a1 * cd[1];
1449 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1450
1451 c2 = tail_a1 * cd[1];
1452 t1 = c11 + c2;
1453 t2 = (c2 - (t1 - c11)) + c21;
1454
1455 head_t2 = t1 + t2;
1456 tail_t2 = t2 - (head_t2 - t1);
1457 }
1458 head_t2 = -head_t2;
1459 tail_t2 = -tail_t2;
1460 {
1461 /* Compute double-double = double-double + double-double. */
1462 double bv;
1463 double s1, s2, t1, t2;
1464
1465 /* Add two hi words. */
1466 s1 = head_t1 + head_t2;
1467 bv = s1 - head_t1;
1468 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1469
1470 /* Add two lo words. */
1471 t1 = tail_t1 + tail_t2;
1472 bv = t1 - tail_t1;
1473 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1474
1475 s2 += t1;
1476
1477 /* Renormalize (s1, s2) to (t1, s2) */
1478 t1 = s1 + s2;
1479 s2 = s2 - (t1 - s1);
1480
1481 t2 += s2;
1482
1483 /* Renormalize (t1, t2) */
1484 head_t1 = t1 + t2;
1485 tail_t1 = t2 - (head_t1 - t1);
1486 }
1487 head_tmp1[0] = head_t1;
1488 tail_tmp1[0] = tail_t1;
1489 /* imaginary part */
1490 {
1491 /* Compute double-double = double-double * double. */
1492 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1493
1494 con = head_a1 * split;
1495 a11 = con - head_a1;
1496 a11 = con - a11;
1497 a21 = head_a1 - a11;
1498 con = cd[0] * split;
1499 b1 = con - cd[0];
1500 b1 = con - b1;
1501 b2 = cd[0] - b1;
1502
1503 c11 = head_a1 * cd[0];
1504 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1505
1506 c2 = tail_a1 * cd[0];
1507 t1 = c11 + c2;
1508 t2 = (c2 - (t1 - c11)) + c21;
1509
1510 head_t1 = t1 + t2;
1511 tail_t1 = t2 - (head_t1 - t1);
1512 }
1513 {
1514 /* Compute double-double = double-double * double. */
1515 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1516
1517 con = head_a0 * split;
1518 a11 = con - head_a0;
1519 a11 = con - a11;
1520 a21 = head_a0 - a11;
1521 con = cd[1] * split;
1522 b1 = con - cd[1];
1523 b1 = con - b1;
1524 b2 = cd[1] - b1;
1525
1526 c11 = head_a0 * cd[1];
1527 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1528
1529 c2 = tail_a0 * cd[1];
1530 t1 = c11 + c2;
1531 t2 = (c2 - (t1 - c11)) + c21;
1532
1533 head_t2 = t1 + t2;
1534 tail_t2 = t2 - (head_t2 - t1);
1535 }
1536 {
1537 /* Compute double-double = double-double + double-double. */
1538 double bv;
1539 double s1, s2, t1, t2;
1540
1541 /* Add two hi words. */
1542 s1 = head_t1 + head_t2;
1543 bv = s1 - head_t1;
1544 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1545
1546 /* Add two lo words. */
1547 t1 = tail_t1 + tail_t2;
1548 bv = t1 - tail_t1;
1549 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1550
1551 s2 += t1;
1552
1553 /* Renormalize (s1, s2) to (t1, s2) */
1554 t1 = s1 + s2;
1555 s2 = s2 - (t1 - s1);
1556
1557 t2 += s2;
1558
1559 /* Renormalize (t1, t2) */
1560 head_t1 = t1 + t2;
1561 tail_t1 = t2 - (head_t1 - t1);
1562 }
1563 head_tmp1[1] = head_t1;
1564 tail_tmp1[1] = tail_t1;
1565 }
1566
1567 }
1568 c_elem[0] = c_i[cij];
1569 c_elem[1] = c_i[cij + 1];
1570 {
1571 double head_e1, tail_e1;
1572 double d1;
1573 double d2;
1574 /* Real part */
1575 d1 = (double) c_elem[0] * beta_i[0];
1576 d2 = (double) -c_elem[1] * beta_i[1];
1577 {
1578 /* Compute double-double = double + double. */
1579 double e, t1, t2;
1580
1581 /* Knuth trick. */
1582 t1 = d1 + d2;
1583 e = t1 - d1;
1584 t2 = ((d2 - e) + (d1 - (t1 - e)));
1585
1586 /* The result is t1 + t2, after normalization. */
1587 head_e1 = t1 + t2;
1588 tail_e1 = t2 - (head_e1 - t1);
1589 }
1590 head_tmp2[0] = head_e1;
1591 tail_tmp2[0] = tail_e1;
1592 /* imaginary part */
1593 d1 = (double) c_elem[0] * beta_i[1];
1594 d2 = (double) c_elem[1] * beta_i[0];
1595 {
1596 /* Compute double-double = double + double. */
1597 double e, t1, t2;
1598
1599 /* Knuth trick. */
1600 t1 = d1 + d2;
1601 e = t1 - d1;
1602 t2 = ((d2 - e) + (d1 - (t1 - e)));
1603
1604 /* The result is t1 + t2, after normalization. */
1605 head_e1 = t1 + t2;
1606 tail_e1 = t2 - (head_e1 - t1);
1607 }
1608 head_tmp2[1] = head_e1;
1609 tail_tmp2[1] = tail_e1;
1610 }
1611 {
1612 double head_t, tail_t;
1613 double head_a, tail_a;
1614 double head_b, tail_b;
1615 /* Real part */
1616 head_a = head_tmp1[0];
1617 tail_a = tail_tmp1[0];
1618 head_b = head_tmp2[0];
1619 tail_b = tail_tmp2[0];
1620 {
1621 /* Compute double-double = double-double + double-double. */
1622 double bv;
1623 double s1, s2, t1, t2;
1624
1625 /* Add two hi words. */
1626 s1 = head_a + head_b;
1627 bv = s1 - head_a;
1628 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1629
1630 /* Add two lo words. */
1631 t1 = tail_a + tail_b;
1632 bv = t1 - tail_a;
1633 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1634
1635 s2 += t1;
1636
1637 /* Renormalize (s1, s2) to (t1, s2) */
1638 t1 = s1 + s2;
1639 s2 = s2 - (t1 - s1);
1640
1641 t2 += s2;
1642
1643 /* Renormalize (t1, t2) */
1644 head_t = t1 + t2;
1645 tail_t = t2 - (head_t - t1);
1646 }
1647 head_tmp1[0] = head_t;
1648 tail_tmp1[0] = tail_t;
1649 /* Imaginary part */
1650 head_a = head_tmp1[1];
1651 tail_a = tail_tmp1[1];
1652 head_b = head_tmp2[1];
1653 tail_b = tail_tmp2[1];
1654 {
1655 /* Compute double-double = double-double + double-double. */
1656 double bv;
1657 double s1, s2, t1, t2;
1658
1659 /* Add two hi words. */
1660 s1 = head_a + head_b;
1661 bv = s1 - head_a;
1662 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1663
1664 /* Add two lo words. */
1665 t1 = tail_a + tail_b;
1666 bv = t1 - tail_a;
1667 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1668
1669 s2 += t1;
1670
1671 /* Renormalize (s1, s2) to (t1, s2) */
1672 t1 = s1 + s2;
1673 s2 = s2 - (t1 - s1);
1674
1675 t2 += s2;
1676
1677 /* Renormalize (t1, t2) */
1678 head_t = t1 + t2;
1679 tail_t = t2 - (head_t - t1);
1680 }
1681 head_tmp1[1] = head_t;
1682 tail_tmp1[1] = tail_t;
1683 }
1684 c_i[cij] = head_tmp1[0];
1685 c_i[cij + 1] = head_tmp1[1];
1686 }
1687 }
1688
1689 }
1690
1691 FPU_FIX_STOP;
1692
1693 break;
1694 }
1695 }
1696 }
1697