1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_zgemm_c_c_x(enum blas_order_type order,enum blas_trans_type transa,enum blas_trans_type transb,int m,int n,int k,const void * alpha,const void * a,int lda,const void * b,int ldb,const void * beta,void * c,int ldc,enum blas_prec_type prec)3 void BLAS_zgemm_c_c_x(enum blas_order_type order, enum blas_trans_type transa,
4 enum blas_trans_type transb, int m, int n, int k,
5 const void *alpha, const void *a, int lda,
6 const void *b, int ldb, const void *beta, void *c,
7 int ldc, enum blas_prec_type prec)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * This routine computes the matrix product:
14 *
15 * C <- alpha * op(A) * op(B) + beta * C .
16 *
17 * where op(M) represents either M, M transpose,
18 * or M conjugate transpose.
19 *
20 * Arguments
21 * =========
22 *
23 * order (input) enum blas_order_type
24 * Storage format of input matrices A, B, and C.
25 *
26 * transa (input) enum blas_trans_type
27 * Operation to be done on matrix A before multiplication.
28 * Can be no operation, transposition, or conjugate transposition.
29 *
30 * transb (input) enum blas_trans_type
31 * Operation to be done on matrix B before multiplication.
32 * Can be no operation, transposition, or conjugate transposition.
33 *
34 * m n k (input) int
35 * The dimensions of matrices A, B, and C.
36 * Matrix C is m-by-n matrix.
37 * Matrix A is m-by-k if A is not transposed,
38 * k-by-m otherwise.
39 * Matrix B is k-by-n if B is not transposed,
40 * n-by-k otherwise.
41 *
42 * alpha (input) const void*
43 *
44 * a (input) const void*
45 * matrix A.
46 *
47 * lda (input) int
48 * leading dimension of A.
49 *
50 * b (input) const void*
51 * matrix B
52 *
53 * ldb (input) int
54 * leading dimension of B.
55 *
56 * beta (input) const void*
57 *
58 * c (input/output) void*
59 * matrix C
60 *
61 * ldc (input) int
62 * leading dimension of C.
63 *
64 * prec (input) enum blas_prec_type
65 * Specifies the internal precision to be used.
66 * = blas_prec_single: single precision.
67 * = blas_prec_double: double precision.
68 * = blas_prec_extra : anything at least 1.5 times as accurate
69 * than double, and wider than 80-bits.
70 * We use double-double in our implementation.
71 *
72 */
73 {
74 static const char routine_name[] = "BLAS_zgemm_c_c_x";
75 switch (prec) {
76
77 case blas_prec_single:
78 case blas_prec_double:
79 case blas_prec_indigenous:{
80
81
82 /* Integer Index Variables */
83 int i, j, h;
84
85 int ai, bj, ci;
86 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
87
88 int incai, incaih; /* Index increments for matrix a */
89 int incbj, incbhj; /* Index increments for matrix b */
90 int incci, inccij; /* Index increments for matrix c */
91
92 /* Input Matrices */
93 const float *a_i = (float *) a;
94 const float *b_i = (float *) b;
95
96 /* Output Matrix */
97 double *c_i = (double *) c;
98
99 /* Input Scalars */
100 double *alpha_i = (double *) alpha;
101 double *beta_i = (double *) beta;
102
103 /* Temporary Floating-Point Variables */
104 float a_elem[2];
105 float b_elem[2];
106 double c_elem[2];
107 double prod[2];
108 double sum[2];
109 double tmp1[2];
110 double tmp2[2];
111
112
113
114 /* Test for error conditions */
115 if (m < 0)
116 BLAS_error(routine_name, -4, m, NULL);
117 if (n < 0)
118 BLAS_error(routine_name, -5, n, NULL);
119 if (k < 0)
120 BLAS_error(routine_name, -6, k, NULL);
121
122 if (order == blas_colmajor) {
123
124 if (ldc < m)
125 BLAS_error(routine_name, -14, ldc, NULL);
126
127 if (transa == blas_no_trans) {
128 if (lda < m)
129 BLAS_error(routine_name, -9, lda, NULL);
130 } else {
131 if (lda < k)
132 BLAS_error(routine_name, -9, lda, NULL);
133 }
134
135 if (transb == blas_no_trans) {
136 if (ldb < k)
137 BLAS_error(routine_name, -11, ldb, NULL);
138 } else {
139 if (ldb < n)
140 BLAS_error(routine_name, -11, ldb, NULL);
141 }
142
143 } else {
144 /* row major */
145 if (ldc < n)
146 BLAS_error(routine_name, -14, ldc, NULL);
147
148 if (transa == blas_no_trans) {
149 if (lda < k)
150 BLAS_error(routine_name, -9, lda, NULL);
151 } else {
152 if (lda < m)
153 BLAS_error(routine_name, -9, lda, NULL);
154 }
155
156 if (transb == blas_no_trans) {
157 if (ldb < n)
158 BLAS_error(routine_name, -11, ldb, NULL);
159 } else {
160 if (ldb < k)
161 BLAS_error(routine_name, -11, ldb, NULL);
162 }
163 }
164
165 /* Test for no-op */
166 if (n == 0 || m == 0 || k == 0)
167 return;
168 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
169 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
170 return;
171 }
172
173 /* Set Index Parameters */
174 if (order == blas_colmajor) {
175 incci = 1;
176 inccij = ldc;
177
178 if (transa == blas_no_trans) {
179 incai = 1;
180 incaih = lda;
181 } else {
182 incai = lda;
183 incaih = 1;
184 }
185
186 if (transb == blas_no_trans) {
187 incbj = ldb;
188 incbhj = 1;
189 } else {
190 incbj = 1;
191 incbhj = ldb;
192 }
193
194 } else {
195 /* row major */
196 incci = ldc;
197 inccij = 1;
198
199 if (transa == blas_no_trans) {
200 incai = lda;
201 incaih = 1;
202 } else {
203 incai = 1;
204 incaih = lda;
205 }
206
207 if (transb == blas_no_trans) {
208 incbj = 1;
209 incbhj = ldb;
210 } else {
211 incbj = ldb;
212 incbhj = 1;
213 }
214
215 }
216
217
218
219 /* Ajustment to increments */
220 incci *= 2;
221 inccij *= 2;
222 incai *= 2;
223 incaih *= 2;
224 incbj *= 2;
225 incbhj *= 2;
226
227 /* alpha = 0. In this case, just return beta * C */
228 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
229
230 ci = 0;
231 for (i = 0; i < m; i++, ci += incci) {
232 cij = ci;
233 for (j = 0; j < n; j++, cij += inccij) {
234 c_elem[0] = c_i[cij];
235 c_elem[1] = c_i[cij + 1];
236 {
237 tmp1[0] =
238 (double) c_elem[0] * beta_i[0] -
239 (double) c_elem[1] * beta_i[1];
240 tmp1[1] =
241 (double) c_elem[0] * beta_i[1] +
242 (double) c_elem[1] * beta_i[0];
243 }
244 c_i[cij] = tmp1[0];
245 c_i[cij + 1] = tmp1[1];
246 }
247 }
248
249 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
250
251 /* Case alpha == 1. */
252
253 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
254 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
255
256 ci = 0;
257 ai = 0;
258 for (i = 0; i < m; i++, ci += incci, ai += incai) {
259
260 cij = ci;
261 bj = 0;
262
263 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
264
265 aih = ai;
266 bhj = bj;
267
268 sum[0] = sum[1] = 0.0;
269
270 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
271 a_elem[0] = a_i[aih];
272 a_elem[1] = a_i[aih + 1];
273 b_elem[0] = b_i[bhj];
274 b_elem[1] = b_i[bhj + 1];
275 if (transa == blas_conj_trans) {
276 a_elem[1] = -a_elem[1];
277 }
278 if (transb == blas_conj_trans) {
279 b_elem[1] = -b_elem[1];
280 }
281 {
282 prod[0] =
283 (double) a_elem[0] * b_elem[0] -
284 (double) a_elem[1] * b_elem[1];
285 prod[1] =
286 (double) a_elem[0] * b_elem[1] +
287 (double) a_elem[1] * b_elem[0];
288 }
289 sum[0] = sum[0] + prod[0];
290 sum[1] = sum[1] + prod[1];
291 }
292 c_i[cij] = sum[0];
293 c_i[cij + 1] = sum[1];
294 }
295 }
296
297 } else {
298 /* Case alpha == 1, but beta != 0.
299 We compute C <--- A * B + beta * C */
300
301 ci = 0;
302 ai = 0;
303 for (i = 0; i < m; i++, ci += incci, ai += incai) {
304
305 cij = ci;
306 bj = 0;
307
308 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
309
310 aih = ai;
311 bhj = bj;
312
313 sum[0] = sum[1] = 0.0;
314
315 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
316 a_elem[0] = a_i[aih];
317 a_elem[1] = a_i[aih + 1];
318 b_elem[0] = b_i[bhj];
319 b_elem[1] = b_i[bhj + 1];
320 if (transa == blas_conj_trans) {
321 a_elem[1] = -a_elem[1];
322 }
323 if (transb == blas_conj_trans) {
324 b_elem[1] = -b_elem[1];
325 }
326 {
327 prod[0] =
328 (double) a_elem[0] * b_elem[0] -
329 (double) a_elem[1] * b_elem[1];
330 prod[1] =
331 (double) a_elem[0] * b_elem[1] +
332 (double) a_elem[1] * b_elem[0];
333 }
334 sum[0] = sum[0] + prod[0];
335 sum[1] = sum[1] + prod[1];
336 }
337
338 c_elem[0] = c_i[cij];
339 c_elem[1] = c_i[cij + 1];
340 {
341 tmp2[0] =
342 (double) c_elem[0] * beta_i[0] -
343 (double) c_elem[1] * beta_i[1];
344 tmp2[1] =
345 (double) c_elem[0] * beta_i[1] +
346 (double) c_elem[1] * beta_i[0];
347 }
348 tmp1[0] = sum[0];
349 tmp1[1] = sum[1];
350 tmp1[0] = tmp2[0] + tmp1[0];
351 tmp1[1] = tmp2[1] + tmp1[1];
352 c_i[cij] = tmp1[0];
353 c_i[cij + 1] = tmp1[1];
354 }
355 }
356 }
357
358 } else {
359
360 /* The most general form, C <-- alpha * A * B + beta * C */
361 ci = 0;
362 ai = 0;
363 for (i = 0; i < m; i++, ci += incci, ai += incai) {
364
365 cij = ci;
366 bj = 0;
367
368 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
369
370 aih = ai;
371 bhj = bj;
372
373 sum[0] = sum[1] = 0.0;
374
375 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
376 a_elem[0] = a_i[aih];
377 a_elem[1] = a_i[aih + 1];
378 b_elem[0] = b_i[bhj];
379 b_elem[1] = b_i[bhj + 1];
380 if (transa == blas_conj_trans) {
381 a_elem[1] = -a_elem[1];
382 }
383 if (transb == blas_conj_trans) {
384 b_elem[1] = -b_elem[1];
385 }
386 {
387 prod[0] =
388 (double) a_elem[0] * b_elem[0] -
389 (double) a_elem[1] * b_elem[1];
390 prod[1] =
391 (double) a_elem[0] * b_elem[1] +
392 (double) a_elem[1] * b_elem[0];
393 }
394 sum[0] = sum[0] + prod[0];
395 sum[1] = sum[1] + prod[1];
396 }
397
398 {
399 tmp1[0] =
400 (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
401 tmp1[1] =
402 (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
403 }
404 c_elem[0] = c_i[cij];
405 c_elem[1] = c_i[cij + 1];
406 {
407 tmp2[0] =
408 (double) c_elem[0] * beta_i[0] -
409 (double) c_elem[1] * beta_i[1];
410 tmp2[1] =
411 (double) c_elem[0] * beta_i[1] +
412 (double) c_elem[1] * beta_i[0];
413 }
414 tmp1[0] = tmp1[0] + tmp2[0];
415 tmp1[1] = tmp1[1] + tmp2[1];
416 c_i[cij] = tmp1[0];
417 c_i[cij + 1] = tmp1[1];
418 }
419 }
420
421 }
422
423
424
425 break;
426 }
427
428 case blas_prec_extra:{
429
430
431 /* Integer Index Variables */
432 int i, j, h;
433
434 int ai, bj, ci;
435 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
436
437 int incai, incaih; /* Index increments for matrix a */
438 int incbj, incbhj; /* Index increments for matrix b */
439 int incci, inccij; /* Index increments for matrix c */
440
441 /* Input Matrices */
442 const float *a_i = (float *) a;
443 const float *b_i = (float *) b;
444
445 /* Output Matrix */
446 double *c_i = (double *) c;
447
448 /* Input Scalars */
449 double *alpha_i = (double *) alpha;
450 double *beta_i = (double *) beta;
451
452 /* Temporary Floating-Point Variables */
453 float a_elem[2];
454 float b_elem[2];
455 double c_elem[2];
456 double head_prod[2], tail_prod[2];
457 double head_sum[2], tail_sum[2];
458 double head_tmp1[2], tail_tmp1[2];
459 double head_tmp2[2], tail_tmp2[2];
460
461 FPU_FIX_DECL;
462
463 /* Test for error conditions */
464 if (m < 0)
465 BLAS_error(routine_name, -4, m, NULL);
466 if (n < 0)
467 BLAS_error(routine_name, -5, n, NULL);
468 if (k < 0)
469 BLAS_error(routine_name, -6, k, NULL);
470
471 if (order == blas_colmajor) {
472
473 if (ldc < m)
474 BLAS_error(routine_name, -14, ldc, NULL);
475
476 if (transa == blas_no_trans) {
477 if (lda < m)
478 BLAS_error(routine_name, -9, lda, NULL);
479 } else {
480 if (lda < k)
481 BLAS_error(routine_name, -9, lda, NULL);
482 }
483
484 if (transb == blas_no_trans) {
485 if (ldb < k)
486 BLAS_error(routine_name, -11, ldb, NULL);
487 } else {
488 if (ldb < n)
489 BLAS_error(routine_name, -11, ldb, NULL);
490 }
491
492 } else {
493 /* row major */
494 if (ldc < n)
495 BLAS_error(routine_name, -14, ldc, NULL);
496
497 if (transa == blas_no_trans) {
498 if (lda < k)
499 BLAS_error(routine_name, -9, lda, NULL);
500 } else {
501 if (lda < m)
502 BLAS_error(routine_name, -9, lda, NULL);
503 }
504
505 if (transb == blas_no_trans) {
506 if (ldb < n)
507 BLAS_error(routine_name, -11, ldb, NULL);
508 } else {
509 if (ldb < k)
510 BLAS_error(routine_name, -11, ldb, NULL);
511 }
512 }
513
514 /* Test for no-op */
515 if (n == 0 || m == 0 || k == 0)
516 return;
517 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
518 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
519 return;
520 }
521
522 /* Set Index Parameters */
523 if (order == blas_colmajor) {
524 incci = 1;
525 inccij = ldc;
526
527 if (transa == blas_no_trans) {
528 incai = 1;
529 incaih = lda;
530 } else {
531 incai = lda;
532 incaih = 1;
533 }
534
535 if (transb == blas_no_trans) {
536 incbj = ldb;
537 incbhj = 1;
538 } else {
539 incbj = 1;
540 incbhj = ldb;
541 }
542
543 } else {
544 /* row major */
545 incci = ldc;
546 inccij = 1;
547
548 if (transa == blas_no_trans) {
549 incai = lda;
550 incaih = 1;
551 } else {
552 incai = 1;
553 incaih = lda;
554 }
555
556 if (transb == blas_no_trans) {
557 incbj = 1;
558 incbhj = ldb;
559 } else {
560 incbj = ldb;
561 incbhj = 1;
562 }
563
564 }
565
566 FPU_FIX_START;
567
568 /* Ajustment to increments */
569 incci *= 2;
570 inccij *= 2;
571 incai *= 2;
572 incaih *= 2;
573 incbj *= 2;
574 incbhj *= 2;
575
576 /* alpha = 0. In this case, just return beta * C */
577 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
578
579 ci = 0;
580 for (i = 0; i < m; i++, ci += incci) {
581 cij = ci;
582 for (j = 0; j < n; j++, cij += inccij) {
583 c_elem[0] = c_i[cij];
584 c_elem[1] = c_i[cij + 1];
585 {
586 /* Compute complex-extra = complex-double * complex-double. */
587 double head_t1, tail_t1;
588 double head_t2, tail_t2;
589 /* Real part */
590 {
591 /* Compute double_double = double * double. */
592 double a1, a2, b1, b2, con;
593
594 con = c_elem[0] * split;
595 a1 = con - c_elem[0];
596 a1 = con - a1;
597 a2 = c_elem[0] - a1;
598 con = beta_i[0] * split;
599 b1 = con - beta_i[0];
600 b1 = con - b1;
601 b2 = beta_i[0] - b1;
602
603 head_t1 = c_elem[0] * beta_i[0];
604 tail_t1 =
605 (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
606 }
607 {
608 /* Compute double_double = double * double. */
609 double a1, a2, b1, b2, con;
610
611 con = c_elem[1] * split;
612 a1 = con - c_elem[1];
613 a1 = con - a1;
614 a2 = c_elem[1] - a1;
615 con = beta_i[1] * split;
616 b1 = con - beta_i[1];
617 b1 = con - b1;
618 b2 = beta_i[1] - b1;
619
620 head_t2 = c_elem[1] * beta_i[1];
621 tail_t2 =
622 (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
623 }
624 head_t2 = -head_t2;
625 tail_t2 = -tail_t2;
626 {
627 /* Compute double-double = double-double + double-double. */
628 double bv;
629 double s1, s2, t1, t2;
630
631 /* Add two hi words. */
632 s1 = head_t1 + head_t2;
633 bv = s1 - head_t1;
634 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
635
636 /* Add two lo words. */
637 t1 = tail_t1 + tail_t2;
638 bv = t1 - tail_t1;
639 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
640
641 s2 += t1;
642
643 /* Renormalize (s1, s2) to (t1, s2) */
644 t1 = s1 + s2;
645 s2 = s2 - (t1 - s1);
646
647 t2 += s2;
648
649 /* Renormalize (t1, t2) */
650 head_t1 = t1 + t2;
651 tail_t1 = t2 - (head_t1 - t1);
652 }
653 head_tmp1[0] = head_t1;
654 tail_tmp1[0] = tail_t1;
655 /* Imaginary part */
656 {
657 /* Compute double_double = double * double. */
658 double a1, a2, b1, b2, con;
659
660 con = c_elem[1] * split;
661 a1 = con - c_elem[1];
662 a1 = con - a1;
663 a2 = c_elem[1] - a1;
664 con = beta_i[0] * split;
665 b1 = con - beta_i[0];
666 b1 = con - b1;
667 b2 = beta_i[0] - b1;
668
669 head_t1 = c_elem[1] * beta_i[0];
670 tail_t1 =
671 (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
672 }
673 {
674 /* Compute double_double = double * double. */
675 double a1, a2, b1, b2, con;
676
677 con = c_elem[0] * split;
678 a1 = con - c_elem[0];
679 a1 = con - a1;
680 a2 = c_elem[0] - a1;
681 con = beta_i[1] * split;
682 b1 = con - beta_i[1];
683 b1 = con - b1;
684 b2 = beta_i[1] - b1;
685
686 head_t2 = c_elem[0] * beta_i[1];
687 tail_t2 =
688 (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
689 }
690 {
691 /* Compute double-double = double-double + double-double. */
692 double bv;
693 double s1, s2, t1, t2;
694
695 /* Add two hi words. */
696 s1 = head_t1 + head_t2;
697 bv = s1 - head_t1;
698 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
699
700 /* Add two lo words. */
701 t1 = tail_t1 + tail_t2;
702 bv = t1 - tail_t1;
703 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
704
705 s2 += t1;
706
707 /* Renormalize (s1, s2) to (t1, s2) */
708 t1 = s1 + s2;
709 s2 = s2 - (t1 - s1);
710
711 t2 += s2;
712
713 /* Renormalize (t1, t2) */
714 head_t1 = t1 + t2;
715 tail_t1 = t2 - (head_t1 - t1);
716 }
717 head_tmp1[1] = head_t1;
718 tail_tmp1[1] = tail_t1;
719 }
720 c_i[cij] = head_tmp1[0];
721 c_i[cij + 1] = head_tmp1[1];
722 }
723 }
724
725 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
726
727 /* Case alpha == 1. */
728
729 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
730 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
731
732 ci = 0;
733 ai = 0;
734 for (i = 0; i < m; i++, ci += incci, ai += incai) {
735
736 cij = ci;
737 bj = 0;
738
739 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
740
741 aih = ai;
742 bhj = bj;
743
744 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
745
746 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
747 a_elem[0] = a_i[aih];
748 a_elem[1] = a_i[aih + 1];
749 b_elem[0] = b_i[bhj];
750 b_elem[1] = b_i[bhj + 1];
751 if (transa == blas_conj_trans) {
752 a_elem[1] = -a_elem[1];
753 }
754 if (transb == blas_conj_trans) {
755 b_elem[1] = -b_elem[1];
756 }
757 {
758 double head_e1, tail_e1;
759 double d1;
760 double d2;
761 /* Real part */
762 d1 = (double) a_elem[0] * b_elem[0];
763 d2 = (double) -a_elem[1] * b_elem[1];
764 {
765 /* Compute double-double = double + double. */
766 double e, t1, t2;
767
768 /* Knuth trick. */
769 t1 = d1 + d2;
770 e = t1 - d1;
771 t2 = ((d2 - e) + (d1 - (t1 - e)));
772
773 /* The result is t1 + t2, after normalization. */
774 head_e1 = t1 + t2;
775 tail_e1 = t2 - (head_e1 - t1);
776 }
777 head_prod[0] = head_e1;
778 tail_prod[0] = tail_e1;
779 /* imaginary part */
780 d1 = (double) a_elem[0] * b_elem[1];
781 d2 = (double) a_elem[1] * b_elem[0];
782 {
783 /* Compute double-double = double + double. */
784 double e, t1, t2;
785
786 /* Knuth trick. */
787 t1 = d1 + d2;
788 e = t1 - d1;
789 t2 = ((d2 - e) + (d1 - (t1 - e)));
790
791 /* The result is t1 + t2, after normalization. */
792 head_e1 = t1 + t2;
793 tail_e1 = t2 - (head_e1 - t1);
794 }
795 head_prod[1] = head_e1;
796 tail_prod[1] = tail_e1;
797 }
798 {
799 double head_t, tail_t;
800 double head_a, tail_a;
801 double head_b, tail_b;
802 /* Real part */
803 head_a = head_sum[0];
804 tail_a = tail_sum[0];
805 head_b = head_prod[0];
806 tail_b = tail_prod[0];
807 {
808 /* Compute double-double = double-double + double-double. */
809 double bv;
810 double s1, s2, t1, t2;
811
812 /* Add two hi words. */
813 s1 = head_a + head_b;
814 bv = s1 - head_a;
815 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
816
817 /* Add two lo words. */
818 t1 = tail_a + tail_b;
819 bv = t1 - tail_a;
820 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
821
822 s2 += t1;
823
824 /* Renormalize (s1, s2) to (t1, s2) */
825 t1 = s1 + s2;
826 s2 = s2 - (t1 - s1);
827
828 t2 += s2;
829
830 /* Renormalize (t1, t2) */
831 head_t = t1 + t2;
832 tail_t = t2 - (head_t - t1);
833 }
834 head_sum[0] = head_t;
835 tail_sum[0] = tail_t;
836 /* Imaginary part */
837 head_a = head_sum[1];
838 tail_a = tail_sum[1];
839 head_b = head_prod[1];
840 tail_b = tail_prod[1];
841 {
842 /* Compute double-double = double-double + double-double. */
843 double bv;
844 double s1, s2, t1, t2;
845
846 /* Add two hi words. */
847 s1 = head_a + head_b;
848 bv = s1 - head_a;
849 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
850
851 /* Add two lo words. */
852 t1 = tail_a + tail_b;
853 bv = t1 - tail_a;
854 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
855
856 s2 += t1;
857
858 /* Renormalize (s1, s2) to (t1, s2) */
859 t1 = s1 + s2;
860 s2 = s2 - (t1 - s1);
861
862 t2 += s2;
863
864 /* Renormalize (t1, t2) */
865 head_t = t1 + t2;
866 tail_t = t2 - (head_t - t1);
867 }
868 head_sum[1] = head_t;
869 tail_sum[1] = tail_t;
870 }
871 }
872 c_i[cij] = head_sum[0];
873 c_i[cij + 1] = head_sum[1];
874 }
875 }
876
877 } else {
878 /* Case alpha == 1, but beta != 0.
879 We compute C <--- A * B + beta * C */
880
881 ci = 0;
882 ai = 0;
883 for (i = 0; i < m; i++, ci += incci, ai += incai) {
884
885 cij = ci;
886 bj = 0;
887
888 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
889
890 aih = ai;
891 bhj = bj;
892
893 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
894
895 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
896 a_elem[0] = a_i[aih];
897 a_elem[1] = a_i[aih + 1];
898 b_elem[0] = b_i[bhj];
899 b_elem[1] = b_i[bhj + 1];
900 if (transa == blas_conj_trans) {
901 a_elem[1] = -a_elem[1];
902 }
903 if (transb == blas_conj_trans) {
904 b_elem[1] = -b_elem[1];
905 }
906 {
907 double head_e1, tail_e1;
908 double d1;
909 double d2;
910 /* Real part */
911 d1 = (double) a_elem[0] * b_elem[0];
912 d2 = (double) -a_elem[1] * b_elem[1];
913 {
914 /* Compute double-double = double + double. */
915 double e, t1, t2;
916
917 /* Knuth trick. */
918 t1 = d1 + d2;
919 e = t1 - d1;
920 t2 = ((d2 - e) + (d1 - (t1 - e)));
921
922 /* The result is t1 + t2, after normalization. */
923 head_e1 = t1 + t2;
924 tail_e1 = t2 - (head_e1 - t1);
925 }
926 head_prod[0] = head_e1;
927 tail_prod[0] = tail_e1;
928 /* imaginary part */
929 d1 = (double) a_elem[0] * b_elem[1];
930 d2 = (double) a_elem[1] * b_elem[0];
931 {
932 /* Compute double-double = double + double. */
933 double e, t1, t2;
934
935 /* Knuth trick. */
936 t1 = d1 + d2;
937 e = t1 - d1;
938 t2 = ((d2 - e) + (d1 - (t1 - e)));
939
940 /* The result is t1 + t2, after normalization. */
941 head_e1 = t1 + t2;
942 tail_e1 = t2 - (head_e1 - t1);
943 }
944 head_prod[1] = head_e1;
945 tail_prod[1] = tail_e1;
946 }
947 {
948 double head_t, tail_t;
949 double head_a, tail_a;
950 double head_b, tail_b;
951 /* Real part */
952 head_a = head_sum[0];
953 tail_a = tail_sum[0];
954 head_b = head_prod[0];
955 tail_b = tail_prod[0];
956 {
957 /* Compute double-double = double-double + double-double. */
958 double bv;
959 double s1, s2, t1, t2;
960
961 /* Add two hi words. */
962 s1 = head_a + head_b;
963 bv = s1 - head_a;
964 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
965
966 /* Add two lo words. */
967 t1 = tail_a + tail_b;
968 bv = t1 - tail_a;
969 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
970
971 s2 += t1;
972
973 /* Renormalize (s1, s2) to (t1, s2) */
974 t1 = s1 + s2;
975 s2 = s2 - (t1 - s1);
976
977 t2 += s2;
978
979 /* Renormalize (t1, t2) */
980 head_t = t1 + t2;
981 tail_t = t2 - (head_t - t1);
982 }
983 head_sum[0] = head_t;
984 tail_sum[0] = tail_t;
985 /* Imaginary part */
986 head_a = head_sum[1];
987 tail_a = tail_sum[1];
988 head_b = head_prod[1];
989 tail_b = tail_prod[1];
990 {
991 /* Compute double-double = double-double + double-double. */
992 double bv;
993 double s1, s2, t1, t2;
994
995 /* Add two hi words. */
996 s1 = head_a + head_b;
997 bv = s1 - head_a;
998 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
999
1000 /* Add two lo words. */
1001 t1 = tail_a + tail_b;
1002 bv = t1 - tail_a;
1003 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1004
1005 s2 += t1;
1006
1007 /* Renormalize (s1, s2) to (t1, s2) */
1008 t1 = s1 + s2;
1009 s2 = s2 - (t1 - s1);
1010
1011 t2 += s2;
1012
1013 /* Renormalize (t1, t2) */
1014 head_t = t1 + t2;
1015 tail_t = t2 - (head_t - t1);
1016 }
1017 head_sum[1] = head_t;
1018 tail_sum[1] = tail_t;
1019 }
1020 }
1021
1022 c_elem[0] = c_i[cij];
1023 c_elem[1] = c_i[cij + 1];
1024 {
1025 /* Compute complex-extra = complex-double * complex-double. */
1026 double head_t1, tail_t1;
1027 double head_t2, tail_t2;
1028 /* Real part */
1029 {
1030 /* Compute double_double = double * double. */
1031 double a1, a2, b1, b2, con;
1032
1033 con = c_elem[0] * split;
1034 a1 = con - c_elem[0];
1035 a1 = con - a1;
1036 a2 = c_elem[0] - a1;
1037 con = beta_i[0] * split;
1038 b1 = con - beta_i[0];
1039 b1 = con - b1;
1040 b2 = beta_i[0] - b1;
1041
1042 head_t1 = c_elem[0] * beta_i[0];
1043 tail_t1 =
1044 (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
1045 }
1046 {
1047 /* Compute double_double = double * double. */
1048 double a1, a2, b1, b2, con;
1049
1050 con = c_elem[1] * split;
1051 a1 = con - c_elem[1];
1052 a1 = con - a1;
1053 a2 = c_elem[1] - a1;
1054 con = beta_i[1] * split;
1055 b1 = con - beta_i[1];
1056 b1 = con - b1;
1057 b2 = beta_i[1] - b1;
1058
1059 head_t2 = c_elem[1] * beta_i[1];
1060 tail_t2 =
1061 (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
1062 }
1063 head_t2 = -head_t2;
1064 tail_t2 = -tail_t2;
1065 {
1066 /* Compute double-double = double-double + double-double. */
1067 double bv;
1068 double s1, s2, t1, t2;
1069
1070 /* Add two hi words. */
1071 s1 = head_t1 + head_t2;
1072 bv = s1 - head_t1;
1073 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1074
1075 /* Add two lo words. */
1076 t1 = tail_t1 + tail_t2;
1077 bv = t1 - tail_t1;
1078 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1079
1080 s2 += t1;
1081
1082 /* Renormalize (s1, s2) to (t1, s2) */
1083 t1 = s1 + s2;
1084 s2 = s2 - (t1 - s1);
1085
1086 t2 += s2;
1087
1088 /* Renormalize (t1, t2) */
1089 head_t1 = t1 + t2;
1090 tail_t1 = t2 - (head_t1 - t1);
1091 }
1092 head_tmp2[0] = head_t1;
1093 tail_tmp2[0] = tail_t1;
1094 /* Imaginary part */
1095 {
1096 /* Compute double_double = double * double. */
1097 double a1, a2, b1, b2, con;
1098
1099 con = c_elem[1] * split;
1100 a1 = con - c_elem[1];
1101 a1 = con - a1;
1102 a2 = c_elem[1] - a1;
1103 con = beta_i[0] * split;
1104 b1 = con - beta_i[0];
1105 b1 = con - b1;
1106 b2 = beta_i[0] - b1;
1107
1108 head_t1 = c_elem[1] * beta_i[0];
1109 tail_t1 =
1110 (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
1111 }
1112 {
1113 /* Compute double_double = double * double. */
1114 double a1, a2, b1, b2, con;
1115
1116 con = c_elem[0] * split;
1117 a1 = con - c_elem[0];
1118 a1 = con - a1;
1119 a2 = c_elem[0] - a1;
1120 con = beta_i[1] * split;
1121 b1 = con - beta_i[1];
1122 b1 = con - b1;
1123 b2 = beta_i[1] - b1;
1124
1125 head_t2 = c_elem[0] * beta_i[1];
1126 tail_t2 =
1127 (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
1128 }
1129 {
1130 /* Compute double-double = double-double + double-double. */
1131 double bv;
1132 double s1, s2, t1, t2;
1133
1134 /* Add two hi words. */
1135 s1 = head_t1 + head_t2;
1136 bv = s1 - head_t1;
1137 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1138
1139 /* Add two lo words. */
1140 t1 = tail_t1 + tail_t2;
1141 bv = t1 - tail_t1;
1142 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1143
1144 s2 += t1;
1145
1146 /* Renormalize (s1, s2) to (t1, s2) */
1147 t1 = s1 + s2;
1148 s2 = s2 - (t1 - s1);
1149
1150 t2 += s2;
1151
1152 /* Renormalize (t1, t2) */
1153 head_t1 = t1 + t2;
1154 tail_t1 = t2 - (head_t1 - t1);
1155 }
1156 head_tmp2[1] = head_t1;
1157 tail_tmp2[1] = tail_t1;
1158 }
1159 head_tmp1[0] = head_sum[0];
1160 tail_tmp1[0] = tail_sum[0];
1161 head_tmp1[1] = head_sum[1];
1162 tail_tmp1[1] = tail_sum[1];
1163 {
1164 double head_t, tail_t;
1165 double head_a, tail_a;
1166 double head_b, tail_b;
1167 /* Real part */
1168 head_a = head_tmp2[0];
1169 tail_a = tail_tmp2[0];
1170 head_b = head_tmp1[0];
1171 tail_b = tail_tmp1[0];
1172 {
1173 /* Compute double-double = double-double + double-double. */
1174 double bv;
1175 double s1, s2, t1, t2;
1176
1177 /* Add two hi words. */
1178 s1 = head_a + head_b;
1179 bv = s1 - head_a;
1180 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1181
1182 /* Add two lo words. */
1183 t1 = tail_a + tail_b;
1184 bv = t1 - tail_a;
1185 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1186
1187 s2 += t1;
1188
1189 /* Renormalize (s1, s2) to (t1, s2) */
1190 t1 = s1 + s2;
1191 s2 = s2 - (t1 - s1);
1192
1193 t2 += s2;
1194
1195 /* Renormalize (t1, t2) */
1196 head_t = t1 + t2;
1197 tail_t = t2 - (head_t - t1);
1198 }
1199 head_tmp1[0] = head_t;
1200 tail_tmp1[0] = tail_t;
1201 /* Imaginary part */
1202 head_a = head_tmp2[1];
1203 tail_a = tail_tmp2[1];
1204 head_b = head_tmp1[1];
1205 tail_b = tail_tmp1[1];
1206 {
1207 /* Compute double-double = double-double + double-double. */
1208 double bv;
1209 double s1, s2, t1, t2;
1210
1211 /* Add two hi words. */
1212 s1 = head_a + head_b;
1213 bv = s1 - head_a;
1214 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1215
1216 /* Add two lo words. */
1217 t1 = tail_a + tail_b;
1218 bv = t1 - tail_a;
1219 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1220
1221 s2 += t1;
1222
1223 /* Renormalize (s1, s2) to (t1, s2) */
1224 t1 = s1 + s2;
1225 s2 = s2 - (t1 - s1);
1226
1227 t2 += s2;
1228
1229 /* Renormalize (t1, t2) */
1230 head_t = t1 + t2;
1231 tail_t = t2 - (head_t - t1);
1232 }
1233 head_tmp1[1] = head_t;
1234 tail_tmp1[1] = tail_t;
1235 }
1236 c_i[cij] = head_tmp1[0];
1237 c_i[cij + 1] = head_tmp1[1];
1238 }
1239 }
1240 }
1241
1242 } else {
1243
1244 /* The most general form, C <-- alpha * A * B + beta * C */
1245 ci = 0;
1246 ai = 0;
1247 for (i = 0; i < m; i++, ci += incci, ai += incai) {
1248
1249 cij = ci;
1250 bj = 0;
1251
1252 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
1253
1254 aih = ai;
1255 bhj = bj;
1256
1257 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
1258
1259 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
1260 a_elem[0] = a_i[aih];
1261 a_elem[1] = a_i[aih + 1];
1262 b_elem[0] = b_i[bhj];
1263 b_elem[1] = b_i[bhj + 1];
1264 if (transa == blas_conj_trans) {
1265 a_elem[1] = -a_elem[1];
1266 }
1267 if (transb == blas_conj_trans) {
1268 b_elem[1] = -b_elem[1];
1269 }
1270 {
1271 double head_e1, tail_e1;
1272 double d1;
1273 double d2;
1274 /* Real part */
1275 d1 = (double) a_elem[0] * b_elem[0];
1276 d2 = (double) -a_elem[1] * b_elem[1];
1277 {
1278 /* Compute double-double = double + double. */
1279 double e, t1, t2;
1280
1281 /* Knuth trick. */
1282 t1 = d1 + d2;
1283 e = t1 - d1;
1284 t2 = ((d2 - e) + (d1 - (t1 - e)));
1285
1286 /* The result is t1 + t2, after normalization. */
1287 head_e1 = t1 + t2;
1288 tail_e1 = t2 - (head_e1 - t1);
1289 }
1290 head_prod[0] = head_e1;
1291 tail_prod[0] = tail_e1;
1292 /* imaginary part */
1293 d1 = (double) a_elem[0] * b_elem[1];
1294 d2 = (double) a_elem[1] * b_elem[0];
1295 {
1296 /* Compute double-double = double + double. */
1297 double e, t1, t2;
1298
1299 /* Knuth trick. */
1300 t1 = d1 + d2;
1301 e = t1 - d1;
1302 t2 = ((d2 - e) + (d1 - (t1 - e)));
1303
1304 /* The result is t1 + t2, after normalization. */
1305 head_e1 = t1 + t2;
1306 tail_e1 = t2 - (head_e1 - t1);
1307 }
1308 head_prod[1] = head_e1;
1309 tail_prod[1] = tail_e1;
1310 }
1311 {
1312 double head_t, tail_t;
1313 double head_a, tail_a;
1314 double head_b, tail_b;
1315 /* Real part */
1316 head_a = head_sum[0];
1317 tail_a = tail_sum[0];
1318 head_b = head_prod[0];
1319 tail_b = tail_prod[0];
1320 {
1321 /* Compute double-double = double-double + double-double. */
1322 double bv;
1323 double s1, s2, t1, t2;
1324
1325 /* Add two hi words. */
1326 s1 = head_a + head_b;
1327 bv = s1 - head_a;
1328 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1329
1330 /* Add two lo words. */
1331 t1 = tail_a + tail_b;
1332 bv = t1 - tail_a;
1333 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1334
1335 s2 += t1;
1336
1337 /* Renormalize (s1, s2) to (t1, s2) */
1338 t1 = s1 + s2;
1339 s2 = s2 - (t1 - s1);
1340
1341 t2 += s2;
1342
1343 /* Renormalize (t1, t2) */
1344 head_t = t1 + t2;
1345 tail_t = t2 - (head_t - t1);
1346 }
1347 head_sum[0] = head_t;
1348 tail_sum[0] = tail_t;
1349 /* Imaginary part */
1350 head_a = head_sum[1];
1351 tail_a = tail_sum[1];
1352 head_b = head_prod[1];
1353 tail_b = tail_prod[1];
1354 {
1355 /* Compute double-double = double-double + double-double. */
1356 double bv;
1357 double s1, s2, t1, t2;
1358
1359 /* Add two hi words. */
1360 s1 = head_a + head_b;
1361 bv = s1 - head_a;
1362 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1363
1364 /* Add two lo words. */
1365 t1 = tail_a + tail_b;
1366 bv = t1 - tail_a;
1367 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1368
1369 s2 += t1;
1370
1371 /* Renormalize (s1, s2) to (t1, s2) */
1372 t1 = s1 + s2;
1373 s2 = s2 - (t1 - s1);
1374
1375 t2 += s2;
1376
1377 /* Renormalize (t1, t2) */
1378 head_t = t1 + t2;
1379 tail_t = t2 - (head_t - t1);
1380 }
1381 head_sum[1] = head_t;
1382 tail_sum[1] = tail_t;
1383 }
1384 }
1385
1386 {
1387 /* Compute complex-extra = complex-extra * complex-double. */
1388 double head_a0, tail_a0;
1389 double head_a1, tail_a1;
1390 double head_t1, tail_t1;
1391 double head_t2, tail_t2;
1392 head_a0 = head_sum[0];
1393 tail_a0 = tail_sum[0];
1394 head_a1 = head_sum[1];
1395 tail_a1 = tail_sum[1];
1396 /* real part */
1397 {
1398 /* Compute double-double = double-double * double. */
1399 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1400
1401 con = head_a0 * split;
1402 a11 = con - head_a0;
1403 a11 = con - a11;
1404 a21 = head_a0 - a11;
1405 con = alpha_i[0] * split;
1406 b1 = con - alpha_i[0];
1407 b1 = con - b1;
1408 b2 = alpha_i[0] - b1;
1409
1410 c11 = head_a0 * alpha_i[0];
1411 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1412
1413 c2 = tail_a0 * alpha_i[0];
1414 t1 = c11 + c2;
1415 t2 = (c2 - (t1 - c11)) + c21;
1416
1417 head_t1 = t1 + t2;
1418 tail_t1 = t2 - (head_t1 - t1);
1419 }
1420 {
1421 /* Compute double-double = double-double * double. */
1422 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1423
1424 con = head_a1 * split;
1425 a11 = con - head_a1;
1426 a11 = con - a11;
1427 a21 = head_a1 - a11;
1428 con = alpha_i[1] * split;
1429 b1 = con - alpha_i[1];
1430 b1 = con - b1;
1431 b2 = alpha_i[1] - b1;
1432
1433 c11 = head_a1 * alpha_i[1];
1434 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1435
1436 c2 = tail_a1 * alpha_i[1];
1437 t1 = c11 + c2;
1438 t2 = (c2 - (t1 - c11)) + c21;
1439
1440 head_t2 = t1 + t2;
1441 tail_t2 = t2 - (head_t2 - t1);
1442 }
1443 head_t2 = -head_t2;
1444 tail_t2 = -tail_t2;
1445 {
1446 /* Compute double-double = double-double + double-double. */
1447 double bv;
1448 double s1, s2, t1, t2;
1449
1450 /* Add two hi words. */
1451 s1 = head_t1 + head_t2;
1452 bv = s1 - head_t1;
1453 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1454
1455 /* Add two lo words. */
1456 t1 = tail_t1 + tail_t2;
1457 bv = t1 - tail_t1;
1458 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1459
1460 s2 += t1;
1461
1462 /* Renormalize (s1, s2) to (t1, s2) */
1463 t1 = s1 + s2;
1464 s2 = s2 - (t1 - s1);
1465
1466 t2 += s2;
1467
1468 /* Renormalize (t1, t2) */
1469 head_t1 = t1 + t2;
1470 tail_t1 = t2 - (head_t1 - t1);
1471 }
1472 head_tmp1[0] = head_t1;
1473 tail_tmp1[0] = tail_t1;
1474 /* imaginary part */
1475 {
1476 /* Compute double-double = double-double * double. */
1477 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1478
1479 con = head_a1 * split;
1480 a11 = con - head_a1;
1481 a11 = con - a11;
1482 a21 = head_a1 - a11;
1483 con = alpha_i[0] * split;
1484 b1 = con - alpha_i[0];
1485 b1 = con - b1;
1486 b2 = alpha_i[0] - b1;
1487
1488 c11 = head_a1 * alpha_i[0];
1489 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1490
1491 c2 = tail_a1 * alpha_i[0];
1492 t1 = c11 + c2;
1493 t2 = (c2 - (t1 - c11)) + c21;
1494
1495 head_t1 = t1 + t2;
1496 tail_t1 = t2 - (head_t1 - t1);
1497 }
1498 {
1499 /* Compute double-double = double-double * double. */
1500 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1501
1502 con = head_a0 * split;
1503 a11 = con - head_a0;
1504 a11 = con - a11;
1505 a21 = head_a0 - a11;
1506 con = alpha_i[1] * split;
1507 b1 = con - alpha_i[1];
1508 b1 = con - b1;
1509 b2 = alpha_i[1] - b1;
1510
1511 c11 = head_a0 * alpha_i[1];
1512 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1513
1514 c2 = tail_a0 * alpha_i[1];
1515 t1 = c11 + c2;
1516 t2 = (c2 - (t1 - c11)) + c21;
1517
1518 head_t2 = t1 + t2;
1519 tail_t2 = t2 - (head_t2 - t1);
1520 }
1521 {
1522 /* Compute double-double = double-double + double-double. */
1523 double bv;
1524 double s1, s2, t1, t2;
1525
1526 /* Add two hi words. */
1527 s1 = head_t1 + head_t2;
1528 bv = s1 - head_t1;
1529 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1530
1531 /* Add two lo words. */
1532 t1 = tail_t1 + tail_t2;
1533 bv = t1 - tail_t1;
1534 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1535
1536 s2 += t1;
1537
1538 /* Renormalize (s1, s2) to (t1, s2) */
1539 t1 = s1 + s2;
1540 s2 = s2 - (t1 - s1);
1541
1542 t2 += s2;
1543
1544 /* Renormalize (t1, t2) */
1545 head_t1 = t1 + t2;
1546 tail_t1 = t2 - (head_t1 - t1);
1547 }
1548 head_tmp1[1] = head_t1;
1549 tail_tmp1[1] = tail_t1;
1550 }
1551
1552 c_elem[0] = c_i[cij];
1553 c_elem[1] = c_i[cij + 1];
1554 {
1555 /* Compute complex-extra = complex-double * complex-double. */
1556 double head_t1, tail_t1;
1557 double head_t2, tail_t2;
1558 /* Real part */
1559 {
1560 /* Compute double_double = double * double. */
1561 double a1, a2, b1, b2, con;
1562
1563 con = c_elem[0] * split;
1564 a1 = con - c_elem[0];
1565 a1 = con - a1;
1566 a2 = c_elem[0] - a1;
1567 con = beta_i[0] * split;
1568 b1 = con - beta_i[0];
1569 b1 = con - b1;
1570 b2 = beta_i[0] - b1;
1571
1572 head_t1 = c_elem[0] * beta_i[0];
1573 tail_t1 =
1574 (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
1575 }
1576 {
1577 /* Compute double_double = double * double. */
1578 double a1, a2, b1, b2, con;
1579
1580 con = c_elem[1] * split;
1581 a1 = con - c_elem[1];
1582 a1 = con - a1;
1583 a2 = c_elem[1] - a1;
1584 con = beta_i[1] * split;
1585 b1 = con - beta_i[1];
1586 b1 = con - b1;
1587 b2 = beta_i[1] - b1;
1588
1589 head_t2 = c_elem[1] * beta_i[1];
1590 tail_t2 =
1591 (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
1592 }
1593 head_t2 = -head_t2;
1594 tail_t2 = -tail_t2;
1595 {
1596 /* Compute double-double = double-double + double-double. */
1597 double bv;
1598 double s1, s2, t1, t2;
1599
1600 /* Add two hi words. */
1601 s1 = head_t1 + head_t2;
1602 bv = s1 - head_t1;
1603 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1604
1605 /* Add two lo words. */
1606 t1 = tail_t1 + tail_t2;
1607 bv = t1 - tail_t1;
1608 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1609
1610 s2 += t1;
1611
1612 /* Renormalize (s1, s2) to (t1, s2) */
1613 t1 = s1 + s2;
1614 s2 = s2 - (t1 - s1);
1615
1616 t2 += s2;
1617
1618 /* Renormalize (t1, t2) */
1619 head_t1 = t1 + t2;
1620 tail_t1 = t2 - (head_t1 - t1);
1621 }
1622 head_tmp2[0] = head_t1;
1623 tail_tmp2[0] = tail_t1;
1624 /* Imaginary part */
1625 {
1626 /* Compute double_double = double * double. */
1627 double a1, a2, b1, b2, con;
1628
1629 con = c_elem[1] * split;
1630 a1 = con - c_elem[1];
1631 a1 = con - a1;
1632 a2 = c_elem[1] - a1;
1633 con = beta_i[0] * split;
1634 b1 = con - beta_i[0];
1635 b1 = con - b1;
1636 b2 = beta_i[0] - b1;
1637
1638 head_t1 = c_elem[1] * beta_i[0];
1639 tail_t1 =
1640 (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
1641 }
1642 {
1643 /* Compute double_double = double * double. */
1644 double a1, a2, b1, b2, con;
1645
1646 con = c_elem[0] * split;
1647 a1 = con - c_elem[0];
1648 a1 = con - a1;
1649 a2 = c_elem[0] - a1;
1650 con = beta_i[1] * split;
1651 b1 = con - beta_i[1];
1652 b1 = con - b1;
1653 b2 = beta_i[1] - b1;
1654
1655 head_t2 = c_elem[0] * beta_i[1];
1656 tail_t2 =
1657 (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
1658 }
1659 {
1660 /* Compute double-double = double-double + double-double. */
1661 double bv;
1662 double s1, s2, t1, t2;
1663
1664 /* Add two hi words. */
1665 s1 = head_t1 + head_t2;
1666 bv = s1 - head_t1;
1667 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1668
1669 /* Add two lo words. */
1670 t1 = tail_t1 + tail_t2;
1671 bv = t1 - tail_t1;
1672 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1673
1674 s2 += t1;
1675
1676 /* Renormalize (s1, s2) to (t1, s2) */
1677 t1 = s1 + s2;
1678 s2 = s2 - (t1 - s1);
1679
1680 t2 += s2;
1681
1682 /* Renormalize (t1, t2) */
1683 head_t1 = t1 + t2;
1684 tail_t1 = t2 - (head_t1 - t1);
1685 }
1686 head_tmp2[1] = head_t1;
1687 tail_tmp2[1] = tail_t1;
1688 }
1689 {
1690 double head_t, tail_t;
1691 double head_a, tail_a;
1692 double head_b, tail_b;
1693 /* Real part */
1694 head_a = head_tmp1[0];
1695 tail_a = tail_tmp1[0];
1696 head_b = head_tmp2[0];
1697 tail_b = tail_tmp2[0];
1698 {
1699 /* Compute double-double = double-double + double-double. */
1700 double bv;
1701 double s1, s2, t1, t2;
1702
1703 /* Add two hi words. */
1704 s1 = head_a + head_b;
1705 bv = s1 - head_a;
1706 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1707
1708 /* Add two lo words. */
1709 t1 = tail_a + tail_b;
1710 bv = t1 - tail_a;
1711 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1712
1713 s2 += t1;
1714
1715 /* Renormalize (s1, s2) to (t1, s2) */
1716 t1 = s1 + s2;
1717 s2 = s2 - (t1 - s1);
1718
1719 t2 += s2;
1720
1721 /* Renormalize (t1, t2) */
1722 head_t = t1 + t2;
1723 tail_t = t2 - (head_t - t1);
1724 }
1725 head_tmp1[0] = head_t;
1726 tail_tmp1[0] = tail_t;
1727 /* Imaginary part */
1728 head_a = head_tmp1[1];
1729 tail_a = tail_tmp1[1];
1730 head_b = head_tmp2[1];
1731 tail_b = tail_tmp2[1];
1732 {
1733 /* Compute double-double = double-double + double-double. */
1734 double bv;
1735 double s1, s2, t1, t2;
1736
1737 /* Add two hi words. */
1738 s1 = head_a + head_b;
1739 bv = s1 - head_a;
1740 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1741
1742 /* Add two lo words. */
1743 t1 = tail_a + tail_b;
1744 bv = t1 - tail_a;
1745 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1746
1747 s2 += t1;
1748
1749 /* Renormalize (s1, s2) to (t1, s2) */
1750 t1 = s1 + s2;
1751 s2 = s2 - (t1 - s1);
1752
1753 t2 += s2;
1754
1755 /* Renormalize (t1, t2) */
1756 head_t = t1 + t2;
1757 tail_t = t2 - (head_t - t1);
1758 }
1759 head_tmp1[1] = head_t;
1760 tail_tmp1[1] = tail_t;
1761 }
1762 c_i[cij] = head_tmp1[0];
1763 c_i[cij + 1] = head_tmp1[1];
1764 }
1765 }
1766
1767 }
1768
1769 FPU_FIX_STOP;
1770
1771 break;
1772 }
1773 }
1774 }
1775