1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_cgemm_x(enum blas_order_type order,enum blas_trans_type transa,enum blas_trans_type transb,int m,int n,int k,const void * alpha,const void * a,int lda,const void * b,int ldb,const void * beta,void * c,int ldc,enum blas_prec_type prec)3 void BLAS_cgemm_x(enum blas_order_type order, enum blas_trans_type transa,
4 enum blas_trans_type transb, int m, int n, int k,
5 const void *alpha, const void *a, int lda, const void *b,
6 int ldb, const void *beta, void *c, int ldc,
7 enum blas_prec_type prec)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * This routine computes the matrix product:
14 *
15 * C <- alpha * op(A) * op(B) + beta * C .
16 *
17 * where op(M) represents either M, M transpose,
18 * or M conjugate transpose.
19 *
20 * Arguments
21 * =========
22 *
23 * order (input) enum blas_order_type
24 * Storage format of input matrices A, B, and C.
25 *
26 * transa (input) enum blas_trans_type
27 * Operation to be done on matrix A before multiplication.
28 * Can be no operation, transposition, or conjugate transposition.
29 *
30 * transb (input) enum blas_trans_type
31 * Operation to be done on matrix B before multiplication.
32 * Can be no operation, transposition, or conjugate transposition.
33 *
34 * m n k (input) int
35 * The dimensions of matrices A, B, and C.
36 * Matrix C is m-by-n matrix.
37 * Matrix A is m-by-k if A is not transposed,
38 * k-by-m otherwise.
39 * Matrix B is k-by-n if B is not transposed,
40 * n-by-k otherwise.
41 *
42 * alpha (input) const void*
43 *
44 * a (input) const void*
45 * matrix A.
46 *
47 * lda (input) int
48 * leading dimension of A.
49 *
50 * b (input) const void*
51 * matrix B
52 *
53 * ldb (input) int
54 * leading dimension of B.
55 *
56 * beta (input) const void*
57 *
58 * c (input/output) void*
59 * matrix C
60 *
61 * ldc (input) int
62 * leading dimension of C.
63 *
64 * prec (input) enum blas_prec_type
65 * Specifies the internal precision to be used.
66 * = blas_prec_single: single precision.
67 * = blas_prec_double: double precision.
68 * = blas_prec_extra : anything at least 1.5 times as accurate
69 * than double, and wider than 80-bits.
70 * We use double-double in our implementation.
71 *
72 */
73 {
74 static const char routine_name[] = "BLAS_cgemm_x";
75 switch (prec) {
76
77 case blas_prec_single:{
78
79
80 /* Integer Index Variables */
81 int i, j, h;
82
83 int ai, bj, ci;
84 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
85
86 int incai, incaih; /* Index increments for matrix a */
87 int incbj, incbhj; /* Index increments for matrix b */
88 int incci, inccij; /* Index increments for matrix c */
89
90 /* Input Matrices */
91 const float *a_i = (float *) a;
92 const float *b_i = (float *) b;
93
94 /* Output Matrix */
95 float *c_i = (float *) c;
96
97 /* Input Scalars */
98 float *alpha_i = (float *) alpha;
99 float *beta_i = (float *) beta;
100
101 /* Temporary Floating-Point Variables */
102 float a_elem[2];
103 float b_elem[2];
104 float c_elem[2];
105 float prod[2];
106 float sum[2];
107 float tmp1[2];
108 float tmp2[2];
109
110
111
112 /* Test for error conditions */
113 if (m < 0)
114 BLAS_error(routine_name, -4, m, NULL);
115 if (n < 0)
116 BLAS_error(routine_name, -5, n, NULL);
117 if (k < 0)
118 BLAS_error(routine_name, -6, k, NULL);
119
120 if (order == blas_colmajor) {
121
122 if (ldc < m)
123 BLAS_error(routine_name, -14, ldc, NULL);
124
125 if (transa == blas_no_trans) {
126 if (lda < m)
127 BLAS_error(routine_name, -9, lda, NULL);
128 } else {
129 if (lda < k)
130 BLAS_error(routine_name, -9, lda, NULL);
131 }
132
133 if (transb == blas_no_trans) {
134 if (ldb < k)
135 BLAS_error(routine_name, -11, ldb, NULL);
136 } else {
137 if (ldb < n)
138 BLAS_error(routine_name, -11, ldb, NULL);
139 }
140
141 } else {
142 /* row major */
143 if (ldc < n)
144 BLAS_error(routine_name, -14, ldc, NULL);
145
146 if (transa == blas_no_trans) {
147 if (lda < k)
148 BLAS_error(routine_name, -9, lda, NULL);
149 } else {
150 if (lda < m)
151 BLAS_error(routine_name, -9, lda, NULL);
152 }
153
154 if (transb == blas_no_trans) {
155 if (ldb < n)
156 BLAS_error(routine_name, -11, ldb, NULL);
157 } else {
158 if (ldb < k)
159 BLAS_error(routine_name, -11, ldb, NULL);
160 }
161 }
162
163 /* Test for no-op */
164 if (n == 0 || m == 0 || k == 0)
165 return;
166 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
167 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
168 return;
169 }
170
171 /* Set Index Parameters */
172 if (order == blas_colmajor) {
173 incci = 1;
174 inccij = ldc;
175
176 if (transa == blas_no_trans) {
177 incai = 1;
178 incaih = lda;
179 } else {
180 incai = lda;
181 incaih = 1;
182 }
183
184 if (transb == blas_no_trans) {
185 incbj = ldb;
186 incbhj = 1;
187 } else {
188 incbj = 1;
189 incbhj = ldb;
190 }
191
192 } else {
193 /* row major */
194 incci = ldc;
195 inccij = 1;
196
197 if (transa == blas_no_trans) {
198 incai = lda;
199 incaih = 1;
200 } else {
201 incai = 1;
202 incaih = lda;
203 }
204
205 if (transb == blas_no_trans) {
206 incbj = 1;
207 incbhj = ldb;
208 } else {
209 incbj = ldb;
210 incbhj = 1;
211 }
212
213 }
214
215
216
217 /* Ajustment to increments */
218 incci *= 2;
219 inccij *= 2;
220 incai *= 2;
221 incaih *= 2;
222 incbj *= 2;
223 incbhj *= 2;
224
225 /* alpha = 0. In this case, just return beta * C */
226 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
227
228 ci = 0;
229 for (i = 0; i < m; i++, ci += incci) {
230 cij = ci;
231 for (j = 0; j < n; j++, cij += inccij) {
232 c_elem[0] = c_i[cij];
233 c_elem[1] = c_i[cij + 1];
234 {
235 tmp1[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
236 tmp1[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
237 }
238
239 c_i[cij] = tmp1[0];
240 c_i[cij + 1] = tmp1[1];
241 }
242 }
243
244 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
245
246 /* Case alpha == 1. */
247
248 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
249 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
250
251 ci = 0;
252 ai = 0;
253 for (i = 0; i < m; i++, ci += incci, ai += incai) {
254
255 cij = ci;
256 bj = 0;
257
258 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
259
260 aih = ai;
261 bhj = bj;
262
263 sum[0] = sum[1] = 0.0;
264
265 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
266 a_elem[0] = a_i[aih];
267 a_elem[1] = a_i[aih + 1];
268 b_elem[0] = b_i[bhj];
269 b_elem[1] = b_i[bhj + 1];
270 if (transa == blas_conj_trans) {
271 a_elem[1] = -a_elem[1];
272 }
273 if (transb == blas_conj_trans) {
274 b_elem[1] = -b_elem[1];
275 }
276 {
277 prod[0] = a_elem[0] * b_elem[0] - a_elem[1] * b_elem[1];
278 prod[1] = a_elem[0] * b_elem[1] + a_elem[1] * b_elem[0];
279 }
280
281 sum[0] = sum[0] + prod[0];
282 sum[1] = sum[1] + prod[1];
283 }
284 c_i[cij] = sum[0];
285 c_i[cij + 1] = sum[1];
286 }
287 }
288
289 } else {
290 /* Case alpha == 1, but beta != 0.
291 We compute C <--- A * B + beta * C */
292
293 ci = 0;
294 ai = 0;
295 for (i = 0; i < m; i++, ci += incci, ai += incai) {
296
297 cij = ci;
298 bj = 0;
299
300 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
301
302 aih = ai;
303 bhj = bj;
304
305 sum[0] = sum[1] = 0.0;
306
307 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
308 a_elem[0] = a_i[aih];
309 a_elem[1] = a_i[aih + 1];
310 b_elem[0] = b_i[bhj];
311 b_elem[1] = b_i[bhj + 1];
312 if (transa == blas_conj_trans) {
313 a_elem[1] = -a_elem[1];
314 }
315 if (transb == blas_conj_trans) {
316 b_elem[1] = -b_elem[1];
317 }
318 {
319 prod[0] = a_elem[0] * b_elem[0] - a_elem[1] * b_elem[1];
320 prod[1] = a_elem[0] * b_elem[1] + a_elem[1] * b_elem[0];
321 }
322
323 sum[0] = sum[0] + prod[0];
324 sum[1] = sum[1] + prod[1];
325 }
326
327 c_elem[0] = c_i[cij];
328 c_elem[1] = c_i[cij + 1];
329 {
330 tmp2[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
331 tmp2[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
332 }
333
334 tmp1[0] = sum[0];
335 tmp1[1] = sum[1];
336 tmp1[0] = tmp2[0] + tmp1[0];
337 tmp1[1] = tmp2[1] + tmp1[1];
338 c_i[cij] = tmp1[0];
339 c_i[cij + 1] = tmp1[1];
340 }
341 }
342 }
343
344 } else {
345
346 /* The most general form, C <-- alpha * A * B + beta * C */
347 ci = 0;
348 ai = 0;
349 for (i = 0; i < m; i++, ci += incci, ai += incai) {
350
351 cij = ci;
352 bj = 0;
353
354 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
355
356 aih = ai;
357 bhj = bj;
358
359 sum[0] = sum[1] = 0.0;
360
361 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
362 a_elem[0] = a_i[aih];
363 a_elem[1] = a_i[aih + 1];
364 b_elem[0] = b_i[bhj];
365 b_elem[1] = b_i[bhj + 1];
366 if (transa == blas_conj_trans) {
367 a_elem[1] = -a_elem[1];
368 }
369 if (transb == blas_conj_trans) {
370 b_elem[1] = -b_elem[1];
371 }
372 {
373 prod[0] = a_elem[0] * b_elem[0] - a_elem[1] * b_elem[1];
374 prod[1] = a_elem[0] * b_elem[1] + a_elem[1] * b_elem[0];
375 }
376
377 sum[0] = sum[0] + prod[0];
378 sum[1] = sum[1] + prod[1];
379 }
380
381 {
382 tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
383 tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
384 }
385
386 c_elem[0] = c_i[cij];
387 c_elem[1] = c_i[cij + 1];
388 {
389 tmp2[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
390 tmp2[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
391 }
392
393 tmp1[0] = tmp1[0] + tmp2[0];
394 tmp1[1] = tmp1[1] + tmp2[1];
395 c_i[cij] = tmp1[0];
396 c_i[cij + 1] = tmp1[1];
397 }
398 }
399
400 }
401
402
403
404 break;
405 }
406 case blas_prec_double:
407 case blas_prec_indigenous:{
408
409
410 /* Integer Index Variables */
411 int i, j, h;
412
413 int ai, bj, ci;
414 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
415
416 int incai, incaih; /* Index increments for matrix a */
417 int incbj, incbhj; /* Index increments for matrix b */
418 int incci, inccij; /* Index increments for matrix c */
419
420 /* Input Matrices */
421 const float *a_i = (float *) a;
422 const float *b_i = (float *) b;
423
424 /* Output Matrix */
425 float *c_i = (float *) c;
426
427 /* Input Scalars */
428 float *alpha_i = (float *) alpha;
429 float *beta_i = (float *) beta;
430
431 /* Temporary Floating-Point Variables */
432 float a_elem[2];
433 float b_elem[2];
434 float c_elem[2];
435 double prod[2];
436 double sum[2];
437 double tmp1[2];
438 double tmp2[2];
439
440
441
442 /* Test for error conditions */
443 if (m < 0)
444 BLAS_error(routine_name, -4, m, NULL);
445 if (n < 0)
446 BLAS_error(routine_name, -5, n, NULL);
447 if (k < 0)
448 BLAS_error(routine_name, -6, k, NULL);
449
450 if (order == blas_colmajor) {
451
452 if (ldc < m)
453 BLAS_error(routine_name, -14, ldc, NULL);
454
455 if (transa == blas_no_trans) {
456 if (lda < m)
457 BLAS_error(routine_name, -9, lda, NULL);
458 } else {
459 if (lda < k)
460 BLAS_error(routine_name, -9, lda, NULL);
461 }
462
463 if (transb == blas_no_trans) {
464 if (ldb < k)
465 BLAS_error(routine_name, -11, ldb, NULL);
466 } else {
467 if (ldb < n)
468 BLAS_error(routine_name, -11, ldb, NULL);
469 }
470
471 } else {
472 /* row major */
473 if (ldc < n)
474 BLAS_error(routine_name, -14, ldc, NULL);
475
476 if (transa == blas_no_trans) {
477 if (lda < k)
478 BLAS_error(routine_name, -9, lda, NULL);
479 } else {
480 if (lda < m)
481 BLAS_error(routine_name, -9, lda, NULL);
482 }
483
484 if (transb == blas_no_trans) {
485 if (ldb < n)
486 BLAS_error(routine_name, -11, ldb, NULL);
487 } else {
488 if (ldb < k)
489 BLAS_error(routine_name, -11, ldb, NULL);
490 }
491 }
492
493 /* Test for no-op */
494 if (n == 0 || m == 0 || k == 0)
495 return;
496 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
497 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
498 return;
499 }
500
501 /* Set Index Parameters */
502 if (order == blas_colmajor) {
503 incci = 1;
504 inccij = ldc;
505
506 if (transa == blas_no_trans) {
507 incai = 1;
508 incaih = lda;
509 } else {
510 incai = lda;
511 incaih = 1;
512 }
513
514 if (transb == blas_no_trans) {
515 incbj = ldb;
516 incbhj = 1;
517 } else {
518 incbj = 1;
519 incbhj = ldb;
520 }
521
522 } else {
523 /* row major */
524 incci = ldc;
525 inccij = 1;
526
527 if (transa == blas_no_trans) {
528 incai = lda;
529 incaih = 1;
530 } else {
531 incai = 1;
532 incaih = lda;
533 }
534
535 if (transb == blas_no_trans) {
536 incbj = 1;
537 incbhj = ldb;
538 } else {
539 incbj = ldb;
540 incbhj = 1;
541 }
542
543 }
544
545
546
547 /* Ajustment to increments */
548 incci *= 2;
549 inccij *= 2;
550 incai *= 2;
551 incaih *= 2;
552 incbj *= 2;
553 incbhj *= 2;
554
555 /* alpha = 0. In this case, just return beta * C */
556 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
557
558 ci = 0;
559 for (i = 0; i < m; i++, ci += incci) {
560 cij = ci;
561 for (j = 0; j < n; j++, cij += inccij) {
562 c_elem[0] = c_i[cij];
563 c_elem[1] = c_i[cij + 1];
564 {
565 tmp1[0] =
566 (double) c_elem[0] * beta_i[0] -
567 (double) c_elem[1] * beta_i[1];
568 tmp1[1] =
569 (double) c_elem[0] * beta_i[1] +
570 (double) c_elem[1] * beta_i[0];
571 }
572 c_i[cij] = tmp1[0];
573 c_i[cij + 1] = tmp1[1];
574 }
575 }
576
577 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
578
579 /* Case alpha == 1. */
580
581 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
582 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
583
584 ci = 0;
585 ai = 0;
586 for (i = 0; i < m; i++, ci += incci, ai += incai) {
587
588 cij = ci;
589 bj = 0;
590
591 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
592
593 aih = ai;
594 bhj = bj;
595
596 sum[0] = sum[1] = 0.0;
597
598 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
599 a_elem[0] = a_i[aih];
600 a_elem[1] = a_i[aih + 1];
601 b_elem[0] = b_i[bhj];
602 b_elem[1] = b_i[bhj + 1];
603 if (transa == blas_conj_trans) {
604 a_elem[1] = -a_elem[1];
605 }
606 if (transb == blas_conj_trans) {
607 b_elem[1] = -b_elem[1];
608 }
609 {
610 prod[0] =
611 (double) a_elem[0] * b_elem[0] -
612 (double) a_elem[1] * b_elem[1];
613 prod[1] =
614 (double) a_elem[0] * b_elem[1] +
615 (double) a_elem[1] * b_elem[0];
616 }
617 sum[0] = sum[0] + prod[0];
618 sum[1] = sum[1] + prod[1];
619 }
620 c_i[cij] = sum[0];
621 c_i[cij + 1] = sum[1];
622 }
623 }
624
625 } else {
626 /* Case alpha == 1, but beta != 0.
627 We compute C <--- A * B + beta * C */
628
629 ci = 0;
630 ai = 0;
631 for (i = 0; i < m; i++, ci += incci, ai += incai) {
632
633 cij = ci;
634 bj = 0;
635
636 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
637
638 aih = ai;
639 bhj = bj;
640
641 sum[0] = sum[1] = 0.0;
642
643 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
644 a_elem[0] = a_i[aih];
645 a_elem[1] = a_i[aih + 1];
646 b_elem[0] = b_i[bhj];
647 b_elem[1] = b_i[bhj + 1];
648 if (transa == blas_conj_trans) {
649 a_elem[1] = -a_elem[1];
650 }
651 if (transb == blas_conj_trans) {
652 b_elem[1] = -b_elem[1];
653 }
654 {
655 prod[0] =
656 (double) a_elem[0] * b_elem[0] -
657 (double) a_elem[1] * b_elem[1];
658 prod[1] =
659 (double) a_elem[0] * b_elem[1] +
660 (double) a_elem[1] * b_elem[0];
661 }
662 sum[0] = sum[0] + prod[0];
663 sum[1] = sum[1] + prod[1];
664 }
665
666 c_elem[0] = c_i[cij];
667 c_elem[1] = c_i[cij + 1];
668 {
669 tmp2[0] =
670 (double) c_elem[0] * beta_i[0] -
671 (double) c_elem[1] * beta_i[1];
672 tmp2[1] =
673 (double) c_elem[0] * beta_i[1] +
674 (double) c_elem[1] * beta_i[0];
675 }
676 tmp1[0] = sum[0];
677 tmp1[1] = sum[1];
678 tmp1[0] = tmp2[0] + tmp1[0];
679 tmp1[1] = tmp2[1] + tmp1[1];
680 c_i[cij] = tmp1[0];
681 c_i[cij + 1] = tmp1[1];
682 }
683 }
684 }
685
686 } else {
687
688 /* The most general form, C <-- alpha * A * B + beta * C */
689 ci = 0;
690 ai = 0;
691 for (i = 0; i < m; i++, ci += incci, ai += incai) {
692
693 cij = ci;
694 bj = 0;
695
696 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
697
698 aih = ai;
699 bhj = bj;
700
701 sum[0] = sum[1] = 0.0;
702
703 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
704 a_elem[0] = a_i[aih];
705 a_elem[1] = a_i[aih + 1];
706 b_elem[0] = b_i[bhj];
707 b_elem[1] = b_i[bhj + 1];
708 if (transa == blas_conj_trans) {
709 a_elem[1] = -a_elem[1];
710 }
711 if (transb == blas_conj_trans) {
712 b_elem[1] = -b_elem[1];
713 }
714 {
715 prod[0] =
716 (double) a_elem[0] * b_elem[0] -
717 (double) a_elem[1] * b_elem[1];
718 prod[1] =
719 (double) a_elem[0] * b_elem[1] +
720 (double) a_elem[1] * b_elem[0];
721 }
722 sum[0] = sum[0] + prod[0];
723 sum[1] = sum[1] + prod[1];
724 }
725
726 {
727 tmp1[0] =
728 (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
729 tmp1[1] =
730 (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
731 }
732 c_elem[0] = c_i[cij];
733 c_elem[1] = c_i[cij + 1];
734 {
735 tmp2[0] =
736 (double) c_elem[0] * beta_i[0] -
737 (double) c_elem[1] * beta_i[1];
738 tmp2[1] =
739 (double) c_elem[0] * beta_i[1] +
740 (double) c_elem[1] * beta_i[0];
741 }
742 tmp1[0] = tmp1[0] + tmp2[0];
743 tmp1[1] = tmp1[1] + tmp2[1];
744 c_i[cij] = tmp1[0];
745 c_i[cij + 1] = tmp1[1];
746 }
747 }
748
749 }
750
751
752
753 break;
754 }
755
756 case blas_prec_extra:{
757
758
759 /* Integer Index Variables */
760 int i, j, h;
761
762 int ai, bj, ci;
763 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
764
765 int incai, incaih; /* Index increments for matrix a */
766 int incbj, incbhj; /* Index increments for matrix b */
767 int incci, inccij; /* Index increments for matrix c */
768
769 /* Input Matrices */
770 const float *a_i = (float *) a;
771 const float *b_i = (float *) b;
772
773 /* Output Matrix */
774 float *c_i = (float *) c;
775
776 /* Input Scalars */
777 float *alpha_i = (float *) alpha;
778 float *beta_i = (float *) beta;
779
780 /* Temporary Floating-Point Variables */
781 float a_elem[2];
782 float b_elem[2];
783 float c_elem[2];
784 double head_prod[2], tail_prod[2];
785 double head_sum[2], tail_sum[2];
786 double head_tmp1[2], tail_tmp1[2];
787 double head_tmp2[2], tail_tmp2[2];
788
789 FPU_FIX_DECL;
790
791 /* Test for error conditions */
792 if (m < 0)
793 BLAS_error(routine_name, -4, m, NULL);
794 if (n < 0)
795 BLAS_error(routine_name, -5, n, NULL);
796 if (k < 0)
797 BLAS_error(routine_name, -6, k, NULL);
798
799 if (order == blas_colmajor) {
800
801 if (ldc < m)
802 BLAS_error(routine_name, -14, ldc, NULL);
803
804 if (transa == blas_no_trans) {
805 if (lda < m)
806 BLAS_error(routine_name, -9, lda, NULL);
807 } else {
808 if (lda < k)
809 BLAS_error(routine_name, -9, lda, NULL);
810 }
811
812 if (transb == blas_no_trans) {
813 if (ldb < k)
814 BLAS_error(routine_name, -11, ldb, NULL);
815 } else {
816 if (ldb < n)
817 BLAS_error(routine_name, -11, ldb, NULL);
818 }
819
820 } else {
821 /* row major */
822 if (ldc < n)
823 BLAS_error(routine_name, -14, ldc, NULL);
824
825 if (transa == blas_no_trans) {
826 if (lda < k)
827 BLAS_error(routine_name, -9, lda, NULL);
828 } else {
829 if (lda < m)
830 BLAS_error(routine_name, -9, lda, NULL);
831 }
832
833 if (transb == blas_no_trans) {
834 if (ldb < n)
835 BLAS_error(routine_name, -11, ldb, NULL);
836 } else {
837 if (ldb < k)
838 BLAS_error(routine_name, -11, ldb, NULL);
839 }
840 }
841
842 /* Test for no-op */
843 if (n == 0 || m == 0 || k == 0)
844 return;
845 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
846 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
847 return;
848 }
849
850 /* Set Index Parameters */
851 if (order == blas_colmajor) {
852 incci = 1;
853 inccij = ldc;
854
855 if (transa == blas_no_trans) {
856 incai = 1;
857 incaih = lda;
858 } else {
859 incai = lda;
860 incaih = 1;
861 }
862
863 if (transb == blas_no_trans) {
864 incbj = ldb;
865 incbhj = 1;
866 } else {
867 incbj = 1;
868 incbhj = ldb;
869 }
870
871 } else {
872 /* row major */
873 incci = ldc;
874 inccij = 1;
875
876 if (transa == blas_no_trans) {
877 incai = lda;
878 incaih = 1;
879 } else {
880 incai = 1;
881 incaih = lda;
882 }
883
884 if (transb == blas_no_trans) {
885 incbj = 1;
886 incbhj = ldb;
887 } else {
888 incbj = ldb;
889 incbhj = 1;
890 }
891
892 }
893
894 FPU_FIX_START;
895
896 /* Ajustment to increments */
897 incci *= 2;
898 inccij *= 2;
899 incai *= 2;
900 incaih *= 2;
901 incbj *= 2;
902 incbhj *= 2;
903
904 /* alpha = 0. In this case, just return beta * C */
905 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
906
907 ci = 0;
908 for (i = 0; i < m; i++, ci += incci) {
909 cij = ci;
910 for (j = 0; j < n; j++, cij += inccij) {
911 c_elem[0] = c_i[cij];
912 c_elem[1] = c_i[cij + 1];
913 {
914 double head_e1, tail_e1;
915 double d1;
916 double d2;
917 /* Real part */
918 d1 = (double) c_elem[0] * beta_i[0];
919 d2 = (double) -c_elem[1] * beta_i[1];
920 {
921 /* Compute double-double = double + double. */
922 double e, t1, t2;
923
924 /* Knuth trick. */
925 t1 = d1 + d2;
926 e = t1 - d1;
927 t2 = ((d2 - e) + (d1 - (t1 - e)));
928
929 /* The result is t1 + t2, after normalization. */
930 head_e1 = t1 + t2;
931 tail_e1 = t2 - (head_e1 - t1);
932 }
933 head_tmp1[0] = head_e1;
934 tail_tmp1[0] = tail_e1;
935 /* imaginary part */
936 d1 = (double) c_elem[0] * beta_i[1];
937 d2 = (double) c_elem[1] * beta_i[0];
938 {
939 /* Compute double-double = double + double. */
940 double e, t1, t2;
941
942 /* Knuth trick. */
943 t1 = d1 + d2;
944 e = t1 - d1;
945 t2 = ((d2 - e) + (d1 - (t1 - e)));
946
947 /* The result is t1 + t2, after normalization. */
948 head_e1 = t1 + t2;
949 tail_e1 = t2 - (head_e1 - t1);
950 }
951 head_tmp1[1] = head_e1;
952 tail_tmp1[1] = tail_e1;
953 }
954 c_i[cij] = head_tmp1[0];
955 c_i[cij + 1] = head_tmp1[1];
956 }
957 }
958
959 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
960
961 /* Case alpha == 1. */
962
963 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
964 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
965
966 ci = 0;
967 ai = 0;
968 for (i = 0; i < m; i++, ci += incci, ai += incai) {
969
970 cij = ci;
971 bj = 0;
972
973 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
974
975 aih = ai;
976 bhj = bj;
977
978 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
979
980 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
981 a_elem[0] = a_i[aih];
982 a_elem[1] = a_i[aih + 1];
983 b_elem[0] = b_i[bhj];
984 b_elem[1] = b_i[bhj + 1];
985 if (transa == blas_conj_trans) {
986 a_elem[1] = -a_elem[1];
987 }
988 if (transb == blas_conj_trans) {
989 b_elem[1] = -b_elem[1];
990 }
991 {
992 double head_e1, tail_e1;
993 double d1;
994 double d2;
995 /* Real part */
996 d1 = (double) a_elem[0] * b_elem[0];
997 d2 = (double) -a_elem[1] * b_elem[1];
998 {
999 /* Compute double-double = double + double. */
1000 double e, t1, t2;
1001
1002 /* Knuth trick. */
1003 t1 = d1 + d2;
1004 e = t1 - d1;
1005 t2 = ((d2 - e) + (d1 - (t1 - e)));
1006
1007 /* The result is t1 + t2, after normalization. */
1008 head_e1 = t1 + t2;
1009 tail_e1 = t2 - (head_e1 - t1);
1010 }
1011 head_prod[0] = head_e1;
1012 tail_prod[0] = tail_e1;
1013 /* imaginary part */
1014 d1 = (double) a_elem[0] * b_elem[1];
1015 d2 = (double) a_elem[1] * b_elem[0];
1016 {
1017 /* Compute double-double = double + double. */
1018 double e, t1, t2;
1019
1020 /* Knuth trick. */
1021 t1 = d1 + d2;
1022 e = t1 - d1;
1023 t2 = ((d2 - e) + (d1 - (t1 - e)));
1024
1025 /* The result is t1 + t2, after normalization. */
1026 head_e1 = t1 + t2;
1027 tail_e1 = t2 - (head_e1 - t1);
1028 }
1029 head_prod[1] = head_e1;
1030 tail_prod[1] = tail_e1;
1031 }
1032 {
1033 double head_t, tail_t;
1034 double head_a, tail_a;
1035 double head_b, tail_b;
1036 /* Real part */
1037 head_a = head_sum[0];
1038 tail_a = tail_sum[0];
1039 head_b = head_prod[0];
1040 tail_b = tail_prod[0];
1041 {
1042 /* Compute double-double = double-double + double-double. */
1043 double bv;
1044 double s1, s2, t1, t2;
1045
1046 /* Add two hi words. */
1047 s1 = head_a + head_b;
1048 bv = s1 - head_a;
1049 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1050
1051 /* Add two lo words. */
1052 t1 = tail_a + tail_b;
1053 bv = t1 - tail_a;
1054 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1055
1056 s2 += t1;
1057
1058 /* Renormalize (s1, s2) to (t1, s2) */
1059 t1 = s1 + s2;
1060 s2 = s2 - (t1 - s1);
1061
1062 t2 += s2;
1063
1064 /* Renormalize (t1, t2) */
1065 head_t = t1 + t2;
1066 tail_t = t2 - (head_t - t1);
1067 }
1068 head_sum[0] = head_t;
1069 tail_sum[0] = tail_t;
1070 /* Imaginary part */
1071 head_a = head_sum[1];
1072 tail_a = tail_sum[1];
1073 head_b = head_prod[1];
1074 tail_b = tail_prod[1];
1075 {
1076 /* Compute double-double = double-double + double-double. */
1077 double bv;
1078 double s1, s2, t1, t2;
1079
1080 /* Add two hi words. */
1081 s1 = head_a + head_b;
1082 bv = s1 - head_a;
1083 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1084
1085 /* Add two lo words. */
1086 t1 = tail_a + tail_b;
1087 bv = t1 - tail_a;
1088 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1089
1090 s2 += t1;
1091
1092 /* Renormalize (s1, s2) to (t1, s2) */
1093 t1 = s1 + s2;
1094 s2 = s2 - (t1 - s1);
1095
1096 t2 += s2;
1097
1098 /* Renormalize (t1, t2) */
1099 head_t = t1 + t2;
1100 tail_t = t2 - (head_t - t1);
1101 }
1102 head_sum[1] = head_t;
1103 tail_sum[1] = tail_t;
1104 }
1105 }
1106 c_i[cij] = head_sum[0];
1107 c_i[cij + 1] = head_sum[1];
1108 }
1109 }
1110
1111 } else {
1112 /* Case alpha == 1, but beta != 0.
1113 We compute C <--- A * B + beta * C */
1114
1115 ci = 0;
1116 ai = 0;
1117 for (i = 0; i < m; i++, ci += incci, ai += incai) {
1118
1119 cij = ci;
1120 bj = 0;
1121
1122 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
1123
1124 aih = ai;
1125 bhj = bj;
1126
1127 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
1128
1129 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
1130 a_elem[0] = a_i[aih];
1131 a_elem[1] = a_i[aih + 1];
1132 b_elem[0] = b_i[bhj];
1133 b_elem[1] = b_i[bhj + 1];
1134 if (transa == blas_conj_trans) {
1135 a_elem[1] = -a_elem[1];
1136 }
1137 if (transb == blas_conj_trans) {
1138 b_elem[1] = -b_elem[1];
1139 }
1140 {
1141 double head_e1, tail_e1;
1142 double d1;
1143 double d2;
1144 /* Real part */
1145 d1 = (double) a_elem[0] * b_elem[0];
1146 d2 = (double) -a_elem[1] * b_elem[1];
1147 {
1148 /* Compute double-double = double + double. */
1149 double e, t1, t2;
1150
1151 /* Knuth trick. */
1152 t1 = d1 + d2;
1153 e = t1 - d1;
1154 t2 = ((d2 - e) + (d1 - (t1 - e)));
1155
1156 /* The result is t1 + t2, after normalization. */
1157 head_e1 = t1 + t2;
1158 tail_e1 = t2 - (head_e1 - t1);
1159 }
1160 head_prod[0] = head_e1;
1161 tail_prod[0] = tail_e1;
1162 /* imaginary part */
1163 d1 = (double) a_elem[0] * b_elem[1];
1164 d2 = (double) a_elem[1] * b_elem[0];
1165 {
1166 /* Compute double-double = double + double. */
1167 double e, t1, t2;
1168
1169 /* Knuth trick. */
1170 t1 = d1 + d2;
1171 e = t1 - d1;
1172 t2 = ((d2 - e) + (d1 - (t1 - e)));
1173
1174 /* The result is t1 + t2, after normalization. */
1175 head_e1 = t1 + t2;
1176 tail_e1 = t2 - (head_e1 - t1);
1177 }
1178 head_prod[1] = head_e1;
1179 tail_prod[1] = tail_e1;
1180 }
1181 {
1182 double head_t, tail_t;
1183 double head_a, tail_a;
1184 double head_b, tail_b;
1185 /* Real part */
1186 head_a = head_sum[0];
1187 tail_a = tail_sum[0];
1188 head_b = head_prod[0];
1189 tail_b = tail_prod[0];
1190 {
1191 /* Compute double-double = double-double + double-double. */
1192 double bv;
1193 double s1, s2, t1, t2;
1194
1195 /* Add two hi words. */
1196 s1 = head_a + head_b;
1197 bv = s1 - head_a;
1198 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1199
1200 /* Add two lo words. */
1201 t1 = tail_a + tail_b;
1202 bv = t1 - tail_a;
1203 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1204
1205 s2 += t1;
1206
1207 /* Renormalize (s1, s2) to (t1, s2) */
1208 t1 = s1 + s2;
1209 s2 = s2 - (t1 - s1);
1210
1211 t2 += s2;
1212
1213 /* Renormalize (t1, t2) */
1214 head_t = t1 + t2;
1215 tail_t = t2 - (head_t - t1);
1216 }
1217 head_sum[0] = head_t;
1218 tail_sum[0] = tail_t;
1219 /* Imaginary part */
1220 head_a = head_sum[1];
1221 tail_a = tail_sum[1];
1222 head_b = head_prod[1];
1223 tail_b = tail_prod[1];
1224 {
1225 /* Compute double-double = double-double + double-double. */
1226 double bv;
1227 double s1, s2, t1, t2;
1228
1229 /* Add two hi words. */
1230 s1 = head_a + head_b;
1231 bv = s1 - head_a;
1232 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1233
1234 /* Add two lo words. */
1235 t1 = tail_a + tail_b;
1236 bv = t1 - tail_a;
1237 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1238
1239 s2 += t1;
1240
1241 /* Renormalize (s1, s2) to (t1, s2) */
1242 t1 = s1 + s2;
1243 s2 = s2 - (t1 - s1);
1244
1245 t2 += s2;
1246
1247 /* Renormalize (t1, t2) */
1248 head_t = t1 + t2;
1249 tail_t = t2 - (head_t - t1);
1250 }
1251 head_sum[1] = head_t;
1252 tail_sum[1] = tail_t;
1253 }
1254 }
1255
1256 c_elem[0] = c_i[cij];
1257 c_elem[1] = c_i[cij + 1];
1258 {
1259 double head_e1, tail_e1;
1260 double d1;
1261 double d2;
1262 /* Real part */
1263 d1 = (double) c_elem[0] * beta_i[0];
1264 d2 = (double) -c_elem[1] * beta_i[1];
1265 {
1266 /* Compute double-double = double + double. */
1267 double e, t1, t2;
1268
1269 /* Knuth trick. */
1270 t1 = d1 + d2;
1271 e = t1 - d1;
1272 t2 = ((d2 - e) + (d1 - (t1 - e)));
1273
1274 /* The result is t1 + t2, after normalization. */
1275 head_e1 = t1 + t2;
1276 tail_e1 = t2 - (head_e1 - t1);
1277 }
1278 head_tmp2[0] = head_e1;
1279 tail_tmp2[0] = tail_e1;
1280 /* imaginary part */
1281 d1 = (double) c_elem[0] * beta_i[1];
1282 d2 = (double) c_elem[1] * beta_i[0];
1283 {
1284 /* Compute double-double = double + double. */
1285 double e, t1, t2;
1286
1287 /* Knuth trick. */
1288 t1 = d1 + d2;
1289 e = t1 - d1;
1290 t2 = ((d2 - e) + (d1 - (t1 - e)));
1291
1292 /* The result is t1 + t2, after normalization. */
1293 head_e1 = t1 + t2;
1294 tail_e1 = t2 - (head_e1 - t1);
1295 }
1296 head_tmp2[1] = head_e1;
1297 tail_tmp2[1] = tail_e1;
1298 }
1299 head_tmp1[0] = head_sum[0];
1300 tail_tmp1[0] = tail_sum[0];
1301 head_tmp1[1] = head_sum[1];
1302 tail_tmp1[1] = tail_sum[1];
1303 {
1304 double head_t, tail_t;
1305 double head_a, tail_a;
1306 double head_b, tail_b;
1307 /* Real part */
1308 head_a = head_tmp2[0];
1309 tail_a = tail_tmp2[0];
1310 head_b = head_tmp1[0];
1311 tail_b = tail_tmp1[0];
1312 {
1313 /* Compute double-double = double-double + double-double. */
1314 double bv;
1315 double s1, s2, t1, t2;
1316
1317 /* Add two hi words. */
1318 s1 = head_a + head_b;
1319 bv = s1 - head_a;
1320 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1321
1322 /* Add two lo words. */
1323 t1 = tail_a + tail_b;
1324 bv = t1 - tail_a;
1325 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1326
1327 s2 += t1;
1328
1329 /* Renormalize (s1, s2) to (t1, s2) */
1330 t1 = s1 + s2;
1331 s2 = s2 - (t1 - s1);
1332
1333 t2 += s2;
1334
1335 /* Renormalize (t1, t2) */
1336 head_t = t1 + t2;
1337 tail_t = t2 - (head_t - t1);
1338 }
1339 head_tmp1[0] = head_t;
1340 tail_tmp1[0] = tail_t;
1341 /* Imaginary part */
1342 head_a = head_tmp2[1];
1343 tail_a = tail_tmp2[1];
1344 head_b = head_tmp1[1];
1345 tail_b = tail_tmp1[1];
1346 {
1347 /* Compute double-double = double-double + double-double. */
1348 double bv;
1349 double s1, s2, t1, t2;
1350
1351 /* Add two hi words. */
1352 s1 = head_a + head_b;
1353 bv = s1 - head_a;
1354 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1355
1356 /* Add two lo words. */
1357 t1 = tail_a + tail_b;
1358 bv = t1 - tail_a;
1359 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1360
1361 s2 += t1;
1362
1363 /* Renormalize (s1, s2) to (t1, s2) */
1364 t1 = s1 + s2;
1365 s2 = s2 - (t1 - s1);
1366
1367 t2 += s2;
1368
1369 /* Renormalize (t1, t2) */
1370 head_t = t1 + t2;
1371 tail_t = t2 - (head_t - t1);
1372 }
1373 head_tmp1[1] = head_t;
1374 tail_tmp1[1] = tail_t;
1375 }
1376 c_i[cij] = head_tmp1[0];
1377 c_i[cij + 1] = head_tmp1[1];
1378 }
1379 }
1380 }
1381
1382 } else {
1383
1384 /* The most general form, C <-- alpha * A * B + beta * C */
1385 ci = 0;
1386 ai = 0;
1387 for (i = 0; i < m; i++, ci += incci, ai += incai) {
1388
1389 cij = ci;
1390 bj = 0;
1391
1392 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
1393
1394 aih = ai;
1395 bhj = bj;
1396
1397 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
1398
1399 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
1400 a_elem[0] = a_i[aih];
1401 a_elem[1] = a_i[aih + 1];
1402 b_elem[0] = b_i[bhj];
1403 b_elem[1] = b_i[bhj + 1];
1404 if (transa == blas_conj_trans) {
1405 a_elem[1] = -a_elem[1];
1406 }
1407 if (transb == blas_conj_trans) {
1408 b_elem[1] = -b_elem[1];
1409 }
1410 {
1411 double head_e1, tail_e1;
1412 double d1;
1413 double d2;
1414 /* Real part */
1415 d1 = (double) a_elem[0] * b_elem[0];
1416 d2 = (double) -a_elem[1] * b_elem[1];
1417 {
1418 /* Compute double-double = double + double. */
1419 double e, t1, t2;
1420
1421 /* Knuth trick. */
1422 t1 = d1 + d2;
1423 e = t1 - d1;
1424 t2 = ((d2 - e) + (d1 - (t1 - e)));
1425
1426 /* The result is t1 + t2, after normalization. */
1427 head_e1 = t1 + t2;
1428 tail_e1 = t2 - (head_e1 - t1);
1429 }
1430 head_prod[0] = head_e1;
1431 tail_prod[0] = tail_e1;
1432 /* imaginary part */
1433 d1 = (double) a_elem[0] * b_elem[1];
1434 d2 = (double) a_elem[1] * b_elem[0];
1435 {
1436 /* Compute double-double = double + double. */
1437 double e, t1, t2;
1438
1439 /* Knuth trick. */
1440 t1 = d1 + d2;
1441 e = t1 - d1;
1442 t2 = ((d2 - e) + (d1 - (t1 - e)));
1443
1444 /* The result is t1 + t2, after normalization. */
1445 head_e1 = t1 + t2;
1446 tail_e1 = t2 - (head_e1 - t1);
1447 }
1448 head_prod[1] = head_e1;
1449 tail_prod[1] = tail_e1;
1450 }
1451 {
1452 double head_t, tail_t;
1453 double head_a, tail_a;
1454 double head_b, tail_b;
1455 /* Real part */
1456 head_a = head_sum[0];
1457 tail_a = tail_sum[0];
1458 head_b = head_prod[0];
1459 tail_b = tail_prod[0];
1460 {
1461 /* Compute double-double = double-double + double-double. */
1462 double bv;
1463 double s1, s2, t1, t2;
1464
1465 /* Add two hi words. */
1466 s1 = head_a + head_b;
1467 bv = s1 - head_a;
1468 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1469
1470 /* Add two lo words. */
1471 t1 = tail_a + tail_b;
1472 bv = t1 - tail_a;
1473 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1474
1475 s2 += t1;
1476
1477 /* Renormalize (s1, s2) to (t1, s2) */
1478 t1 = s1 + s2;
1479 s2 = s2 - (t1 - s1);
1480
1481 t2 += s2;
1482
1483 /* Renormalize (t1, t2) */
1484 head_t = t1 + t2;
1485 tail_t = t2 - (head_t - t1);
1486 }
1487 head_sum[0] = head_t;
1488 tail_sum[0] = tail_t;
1489 /* Imaginary part */
1490 head_a = head_sum[1];
1491 tail_a = tail_sum[1];
1492 head_b = head_prod[1];
1493 tail_b = tail_prod[1];
1494 {
1495 /* Compute double-double = double-double + double-double. */
1496 double bv;
1497 double s1, s2, t1, t2;
1498
1499 /* Add two hi words. */
1500 s1 = head_a + head_b;
1501 bv = s1 - head_a;
1502 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1503
1504 /* Add two lo words. */
1505 t1 = tail_a + tail_b;
1506 bv = t1 - tail_a;
1507 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1508
1509 s2 += t1;
1510
1511 /* Renormalize (s1, s2) to (t1, s2) */
1512 t1 = s1 + s2;
1513 s2 = s2 - (t1 - s1);
1514
1515 t2 += s2;
1516
1517 /* Renormalize (t1, t2) */
1518 head_t = t1 + t2;
1519 tail_t = t2 - (head_t - t1);
1520 }
1521 head_sum[1] = head_t;
1522 tail_sum[1] = tail_t;
1523 }
1524 }
1525
1526 {
1527 double cd[2];
1528 cd[0] = (double) alpha_i[0];
1529 cd[1] = (double) alpha_i[1];
1530 {
1531 /* Compute complex-extra = complex-extra * complex-double. */
1532 double head_a0, tail_a0;
1533 double head_a1, tail_a1;
1534 double head_t1, tail_t1;
1535 double head_t2, tail_t2;
1536 head_a0 = head_sum[0];
1537 tail_a0 = tail_sum[0];
1538 head_a1 = head_sum[1];
1539 tail_a1 = tail_sum[1];
1540 /* real part */
1541 {
1542 /* Compute double-double = double-double * double. */
1543 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1544
1545 con = head_a0 * split;
1546 a11 = con - head_a0;
1547 a11 = con - a11;
1548 a21 = head_a0 - a11;
1549 con = cd[0] * split;
1550 b1 = con - cd[0];
1551 b1 = con - b1;
1552 b2 = cd[0] - b1;
1553
1554 c11 = head_a0 * cd[0];
1555 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1556
1557 c2 = tail_a0 * cd[0];
1558 t1 = c11 + c2;
1559 t2 = (c2 - (t1 - c11)) + c21;
1560
1561 head_t1 = t1 + t2;
1562 tail_t1 = t2 - (head_t1 - t1);
1563 }
1564 {
1565 /* Compute double-double = double-double * double. */
1566 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1567
1568 con = head_a1 * split;
1569 a11 = con - head_a1;
1570 a11 = con - a11;
1571 a21 = head_a1 - a11;
1572 con = cd[1] * split;
1573 b1 = con - cd[1];
1574 b1 = con - b1;
1575 b2 = cd[1] - b1;
1576
1577 c11 = head_a1 * cd[1];
1578 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1579
1580 c2 = tail_a1 * cd[1];
1581 t1 = c11 + c2;
1582 t2 = (c2 - (t1 - c11)) + c21;
1583
1584 head_t2 = t1 + t2;
1585 tail_t2 = t2 - (head_t2 - t1);
1586 }
1587 head_t2 = -head_t2;
1588 tail_t2 = -tail_t2;
1589 {
1590 /* Compute double-double = double-double + double-double. */
1591 double bv;
1592 double s1, s2, t1, t2;
1593
1594 /* Add two hi words. */
1595 s1 = head_t1 + head_t2;
1596 bv = s1 - head_t1;
1597 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1598
1599 /* Add two lo words. */
1600 t1 = tail_t1 + tail_t2;
1601 bv = t1 - tail_t1;
1602 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1603
1604 s2 += t1;
1605
1606 /* Renormalize (s1, s2) to (t1, s2) */
1607 t1 = s1 + s2;
1608 s2 = s2 - (t1 - s1);
1609
1610 t2 += s2;
1611
1612 /* Renormalize (t1, t2) */
1613 head_t1 = t1 + t2;
1614 tail_t1 = t2 - (head_t1 - t1);
1615 }
1616 head_tmp1[0] = head_t1;
1617 tail_tmp1[0] = tail_t1;
1618 /* imaginary part */
1619 {
1620 /* Compute double-double = double-double * double. */
1621 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1622
1623 con = head_a1 * split;
1624 a11 = con - head_a1;
1625 a11 = con - a11;
1626 a21 = head_a1 - a11;
1627 con = cd[0] * split;
1628 b1 = con - cd[0];
1629 b1 = con - b1;
1630 b2 = cd[0] - b1;
1631
1632 c11 = head_a1 * cd[0];
1633 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1634
1635 c2 = tail_a1 * cd[0];
1636 t1 = c11 + c2;
1637 t2 = (c2 - (t1 - c11)) + c21;
1638
1639 head_t1 = t1 + t2;
1640 tail_t1 = t2 - (head_t1 - t1);
1641 }
1642 {
1643 /* Compute double-double = double-double * double. */
1644 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1645
1646 con = head_a0 * split;
1647 a11 = con - head_a0;
1648 a11 = con - a11;
1649 a21 = head_a0 - a11;
1650 con = cd[1] * split;
1651 b1 = con - cd[1];
1652 b1 = con - b1;
1653 b2 = cd[1] - b1;
1654
1655 c11 = head_a0 * cd[1];
1656 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1657
1658 c2 = tail_a0 * cd[1];
1659 t1 = c11 + c2;
1660 t2 = (c2 - (t1 - c11)) + c21;
1661
1662 head_t2 = t1 + t2;
1663 tail_t2 = t2 - (head_t2 - t1);
1664 }
1665 {
1666 /* Compute double-double = double-double + double-double. */
1667 double bv;
1668 double s1, s2, t1, t2;
1669
1670 /* Add two hi words. */
1671 s1 = head_t1 + head_t2;
1672 bv = s1 - head_t1;
1673 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1674
1675 /* Add two lo words. */
1676 t1 = tail_t1 + tail_t2;
1677 bv = t1 - tail_t1;
1678 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1679
1680 s2 += t1;
1681
1682 /* Renormalize (s1, s2) to (t1, s2) */
1683 t1 = s1 + s2;
1684 s2 = s2 - (t1 - s1);
1685
1686 t2 += s2;
1687
1688 /* Renormalize (t1, t2) */
1689 head_t1 = t1 + t2;
1690 tail_t1 = t2 - (head_t1 - t1);
1691 }
1692 head_tmp1[1] = head_t1;
1693 tail_tmp1[1] = tail_t1;
1694 }
1695
1696 }
1697 c_elem[0] = c_i[cij];
1698 c_elem[1] = c_i[cij + 1];
1699 {
1700 double head_e1, tail_e1;
1701 double d1;
1702 double d2;
1703 /* Real part */
1704 d1 = (double) c_elem[0] * beta_i[0];
1705 d2 = (double) -c_elem[1] * beta_i[1];
1706 {
1707 /* Compute double-double = double + double. */
1708 double e, t1, t2;
1709
1710 /* Knuth trick. */
1711 t1 = d1 + d2;
1712 e = t1 - d1;
1713 t2 = ((d2 - e) + (d1 - (t1 - e)));
1714
1715 /* The result is t1 + t2, after normalization. */
1716 head_e1 = t1 + t2;
1717 tail_e1 = t2 - (head_e1 - t1);
1718 }
1719 head_tmp2[0] = head_e1;
1720 tail_tmp2[0] = tail_e1;
1721 /* imaginary part */
1722 d1 = (double) c_elem[0] * beta_i[1];
1723 d2 = (double) c_elem[1] * beta_i[0];
1724 {
1725 /* Compute double-double = double + double. */
1726 double e, t1, t2;
1727
1728 /* Knuth trick. */
1729 t1 = d1 + d2;
1730 e = t1 - d1;
1731 t2 = ((d2 - e) + (d1 - (t1 - e)));
1732
1733 /* The result is t1 + t2, after normalization. */
1734 head_e1 = t1 + t2;
1735 tail_e1 = t2 - (head_e1 - t1);
1736 }
1737 head_tmp2[1] = head_e1;
1738 tail_tmp2[1] = tail_e1;
1739 }
1740 {
1741 double head_t, tail_t;
1742 double head_a, tail_a;
1743 double head_b, tail_b;
1744 /* Real part */
1745 head_a = head_tmp1[0];
1746 tail_a = tail_tmp1[0];
1747 head_b = head_tmp2[0];
1748 tail_b = tail_tmp2[0];
1749 {
1750 /* Compute double-double = double-double + double-double. */
1751 double bv;
1752 double s1, s2, t1, t2;
1753
1754 /* Add two hi words. */
1755 s1 = head_a + head_b;
1756 bv = s1 - head_a;
1757 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1758
1759 /* Add two lo words. */
1760 t1 = tail_a + tail_b;
1761 bv = t1 - tail_a;
1762 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1763
1764 s2 += t1;
1765
1766 /* Renormalize (s1, s2) to (t1, s2) */
1767 t1 = s1 + s2;
1768 s2 = s2 - (t1 - s1);
1769
1770 t2 += s2;
1771
1772 /* Renormalize (t1, t2) */
1773 head_t = t1 + t2;
1774 tail_t = t2 - (head_t - t1);
1775 }
1776 head_tmp1[0] = head_t;
1777 tail_tmp1[0] = tail_t;
1778 /* Imaginary part */
1779 head_a = head_tmp1[1];
1780 tail_a = tail_tmp1[1];
1781 head_b = head_tmp2[1];
1782 tail_b = tail_tmp2[1];
1783 {
1784 /* Compute double-double = double-double + double-double. */
1785 double bv;
1786 double s1, s2, t1, t2;
1787
1788 /* Add two hi words. */
1789 s1 = head_a + head_b;
1790 bv = s1 - head_a;
1791 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1792
1793 /* Add two lo words. */
1794 t1 = tail_a + tail_b;
1795 bv = t1 - tail_a;
1796 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1797
1798 s2 += t1;
1799
1800 /* Renormalize (s1, s2) to (t1, s2) */
1801 t1 = s1 + s2;
1802 s2 = s2 - (t1 - s1);
1803
1804 t2 += s2;
1805
1806 /* Renormalize (t1, t2) */
1807 head_t = t1 + t2;
1808 tail_t = t2 - (head_t - t1);
1809 }
1810 head_tmp1[1] = head_t;
1811 tail_tmp1[1] = tail_t;
1812 }
1813 c_i[cij] = head_tmp1[0];
1814 c_i[cij + 1] = head_tmp1[1];
1815 }
1816 }
1817
1818 }
1819
1820 FPU_FIX_STOP;
1821
1822 break;
1823 }
1824 }
1825 }
1826