1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_cgemm_x(enum blas_order_type order,enum blas_trans_type transa,enum blas_trans_type transb,int m,int n,int k,const void * alpha,const void * a,int lda,const void * b,int ldb,const void * beta,void * c,int ldc,enum blas_prec_type prec)3 void BLAS_cgemm_x(enum blas_order_type order, enum blas_trans_type transa,
4 		  enum blas_trans_type transb, int m, int n, int k,
5 		  const void *alpha, const void *a, int lda, const void *b,
6 		  int ldb, const void *beta, void *c, int ldc,
7 		  enum blas_prec_type prec)
8 
9 /*
10  * Purpose
11  * =======
12  *
13  * This routine computes the matrix product:
14  *
15  *      C   <-  alpha * op(A) * op(B)  +  beta * C .
16  *
17  * where op(M) represents either M, M transpose,
18  * or M conjugate transpose.
19  *
20  * Arguments
21  * =========
22  *
23  * order   (input) enum blas_order_type
24  *         Storage format of input matrices A, B, and C.
25  *
26  * transa  (input) enum blas_trans_type
27  *         Operation to be done on matrix A before multiplication.
28  *         Can be no operation, transposition, or conjugate transposition.
29  *
30  * transb  (input) enum blas_trans_type
31  *         Operation to be done on matrix B before multiplication.
32  *         Can be no operation, transposition, or conjugate transposition.
33  *
34  * m n k   (input) int
35  *         The dimensions of matrices A, B, and C.
36  *         Matrix C is m-by-n matrix.
37  *         Matrix A is m-by-k if A is not transposed,
38  *                     k-by-m otherwise.
39  *         Matrix B is k-by-n if B is not transposed,
40  *                     n-by-k otherwise.
41  *
42  * alpha   (input) const void*
43  *
44  * a       (input) const void*
45  *         matrix A.
46  *
47  * lda     (input) int
48  *         leading dimension of A.
49  *
50  * b       (input) const void*
51  *         matrix B
52  *
53  * ldb     (input) int
54  *         leading dimension of B.
55  *
56  * beta    (input) const void*
57  *
58  * c       (input/output) void*
59  *         matrix C
60  *
61  * ldc     (input) int
62  *         leading dimension of C.
63  *
64  * prec   (input) enum blas_prec_type
65  *        Specifies the internal precision to be used.
66  *        = blas_prec_single: single precision.
67  *        = blas_prec_double: double precision.
68  *        = blas_prec_extra : anything at least 1.5 times as accurate
69  *                            than double, and wider than 80-bits.
70  *                            We use double-double in our implementation.
71  *
72  */
73 {
74   static const char routine_name[] = "BLAS_cgemm_x";
75   switch (prec) {
76 
77   case blas_prec_single:{
78 
79 
80       /* Integer Index Variables */
81       int i, j, h;
82 
83       int ai, bj, ci;
84       int aih, bhj, cij;	/* Index into matrices a, b, c during multiply */
85 
86       int incai, incaih;	/* Index increments for matrix a */
87       int incbj, incbhj;	/* Index increments for matrix b */
88       int incci, inccij;	/* Index increments for matrix c */
89 
90       /* Input Matrices */
91       const float *a_i = (float *) a;
92       const float *b_i = (float *) b;
93 
94       /* Output Matrix */
95       float *c_i = (float *) c;
96 
97       /* Input Scalars */
98       float *alpha_i = (float *) alpha;
99       float *beta_i = (float *) beta;
100 
101       /* Temporary Floating-Point Variables */
102       float a_elem[2];
103       float b_elem[2];
104       float c_elem[2];
105       float prod[2];
106       float sum[2];
107       float tmp1[2];
108       float tmp2[2];
109 
110 
111 
112       /* Test for error conditions */
113       if (m < 0)
114 	BLAS_error(routine_name, -4, m, NULL);
115       if (n < 0)
116 	BLAS_error(routine_name, -5, n, NULL);
117       if (k < 0)
118 	BLAS_error(routine_name, -6, k, NULL);
119 
120       if (order == blas_colmajor) {
121 
122 	if (ldc < m)
123 	  BLAS_error(routine_name, -14, ldc, NULL);
124 
125 	if (transa == blas_no_trans) {
126 	  if (lda < m)
127 	    BLAS_error(routine_name, -9, lda, NULL);
128 	} else {
129 	  if (lda < k)
130 	    BLAS_error(routine_name, -9, lda, NULL);
131 	}
132 
133 	if (transb == blas_no_trans) {
134 	  if (ldb < k)
135 	    BLAS_error(routine_name, -11, ldb, NULL);
136 	} else {
137 	  if (ldb < n)
138 	    BLAS_error(routine_name, -11, ldb, NULL);
139 	}
140 
141       } else {
142 	/* row major */
143 	if (ldc < n)
144 	  BLAS_error(routine_name, -14, ldc, NULL);
145 
146 	if (transa == blas_no_trans) {
147 	  if (lda < k)
148 	    BLAS_error(routine_name, -9, lda, NULL);
149 	} else {
150 	  if (lda < m)
151 	    BLAS_error(routine_name, -9, lda, NULL);
152 	}
153 
154 	if (transb == blas_no_trans) {
155 	  if (ldb < n)
156 	    BLAS_error(routine_name, -11, ldb, NULL);
157 	} else {
158 	  if (ldb < k)
159 	    BLAS_error(routine_name, -11, ldb, NULL);
160 	}
161       }
162 
163       /* Test for no-op */
164       if (n == 0 || m == 0 || k == 0)
165 	return;
166       if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
167 	  && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
168 	return;
169       }
170 
171       /* Set Index Parameters */
172       if (order == blas_colmajor) {
173 	incci = 1;
174 	inccij = ldc;
175 
176 	if (transa == blas_no_trans) {
177 	  incai = 1;
178 	  incaih = lda;
179 	} else {
180 	  incai = lda;
181 	  incaih = 1;
182 	}
183 
184 	if (transb == blas_no_trans) {
185 	  incbj = ldb;
186 	  incbhj = 1;
187 	} else {
188 	  incbj = 1;
189 	  incbhj = ldb;
190 	}
191 
192       } else {
193 	/* row major */
194 	incci = ldc;
195 	inccij = 1;
196 
197 	if (transa == blas_no_trans) {
198 	  incai = lda;
199 	  incaih = 1;
200 	} else {
201 	  incai = 1;
202 	  incaih = lda;
203 	}
204 
205 	if (transb == blas_no_trans) {
206 	  incbj = 1;
207 	  incbhj = ldb;
208 	} else {
209 	  incbj = ldb;
210 	  incbhj = 1;
211 	}
212 
213       }
214 
215 
216 
217       /* Ajustment to increments */
218       incci *= 2;
219       inccij *= 2;
220       incai *= 2;
221       incaih *= 2;
222       incbj *= 2;
223       incbhj *= 2;
224 
225       /* alpha = 0.  In this case, just return beta * C */
226       if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
227 
228 	ci = 0;
229 	for (i = 0; i < m; i++, ci += incci) {
230 	  cij = ci;
231 	  for (j = 0; j < n; j++, cij += inccij) {
232 	    c_elem[0] = c_i[cij];
233 	    c_elem[1] = c_i[cij + 1];
234 	    {
235 	      tmp1[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
236 	      tmp1[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
237 	    }
238 
239 	    c_i[cij] = tmp1[0];
240 	    c_i[cij + 1] = tmp1[1];
241 	  }
242 	}
243 
244       } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
245 
246 	/* Case alpha == 1. */
247 
248 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
249 	  /* Case alpha == 1, beta == 0.   We compute  C <--- A * B */
250 
251 	  ci = 0;
252 	  ai = 0;
253 	  for (i = 0; i < m; i++, ci += incci, ai += incai) {
254 
255 	    cij = ci;
256 	    bj = 0;
257 
258 	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
259 
260 	      aih = ai;
261 	      bhj = bj;
262 
263 	      sum[0] = sum[1] = 0.0;
264 
265 	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
266 		a_elem[0] = a_i[aih];
267 		a_elem[1] = a_i[aih + 1];
268 		b_elem[0] = b_i[bhj];
269 		b_elem[1] = b_i[bhj + 1];
270 		if (transa == blas_conj_trans) {
271 		  a_elem[1] = -a_elem[1];
272 		}
273 		if (transb == blas_conj_trans) {
274 		  b_elem[1] = -b_elem[1];
275 		}
276 		{
277 		  prod[0] = a_elem[0] * b_elem[0] - a_elem[1] * b_elem[1];
278 		  prod[1] = a_elem[0] * b_elem[1] + a_elem[1] * b_elem[0];
279 		}
280 
281 		sum[0] = sum[0] + prod[0];
282 		sum[1] = sum[1] + prod[1];
283 	      }
284 	      c_i[cij] = sum[0];
285 	      c_i[cij + 1] = sum[1];
286 	    }
287 	  }
288 
289 	} else {
290 	  /* Case alpha == 1, but beta != 0.
291 	     We compute   C <--- A * B + beta * C   */
292 
293 	  ci = 0;
294 	  ai = 0;
295 	  for (i = 0; i < m; i++, ci += incci, ai += incai) {
296 
297 	    cij = ci;
298 	    bj = 0;
299 
300 	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
301 
302 	      aih = ai;
303 	      bhj = bj;
304 
305 	      sum[0] = sum[1] = 0.0;
306 
307 	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
308 		a_elem[0] = a_i[aih];
309 		a_elem[1] = a_i[aih + 1];
310 		b_elem[0] = b_i[bhj];
311 		b_elem[1] = b_i[bhj + 1];
312 		if (transa == blas_conj_trans) {
313 		  a_elem[1] = -a_elem[1];
314 		}
315 		if (transb == blas_conj_trans) {
316 		  b_elem[1] = -b_elem[1];
317 		}
318 		{
319 		  prod[0] = a_elem[0] * b_elem[0] - a_elem[1] * b_elem[1];
320 		  prod[1] = a_elem[0] * b_elem[1] + a_elem[1] * b_elem[0];
321 		}
322 
323 		sum[0] = sum[0] + prod[0];
324 		sum[1] = sum[1] + prod[1];
325 	      }
326 
327 	      c_elem[0] = c_i[cij];
328 	      c_elem[1] = c_i[cij + 1];
329 	      {
330 		tmp2[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
331 		tmp2[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
332 	      }
333 
334 	      tmp1[0] = sum[0];
335 	      tmp1[1] = sum[1];
336 	      tmp1[0] = tmp2[0] + tmp1[0];
337 	      tmp1[1] = tmp2[1] + tmp1[1];
338 	      c_i[cij] = tmp1[0];
339 	      c_i[cij + 1] = tmp1[1];
340 	    }
341 	  }
342 	}
343 
344       } else {
345 
346 	/* The most general form,   C <-- alpha * A * B + beta * C  */
347 	ci = 0;
348 	ai = 0;
349 	for (i = 0; i < m; i++, ci += incci, ai += incai) {
350 
351 	  cij = ci;
352 	  bj = 0;
353 
354 	  for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
355 
356 	    aih = ai;
357 	    bhj = bj;
358 
359 	    sum[0] = sum[1] = 0.0;
360 
361 	    for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
362 	      a_elem[0] = a_i[aih];
363 	      a_elem[1] = a_i[aih + 1];
364 	      b_elem[0] = b_i[bhj];
365 	      b_elem[1] = b_i[bhj + 1];
366 	      if (transa == blas_conj_trans) {
367 		a_elem[1] = -a_elem[1];
368 	      }
369 	      if (transb == blas_conj_trans) {
370 		b_elem[1] = -b_elem[1];
371 	      }
372 	      {
373 		prod[0] = a_elem[0] * b_elem[0] - a_elem[1] * b_elem[1];
374 		prod[1] = a_elem[0] * b_elem[1] + a_elem[1] * b_elem[0];
375 	      }
376 
377 	      sum[0] = sum[0] + prod[0];
378 	      sum[1] = sum[1] + prod[1];
379 	    }
380 
381 	    {
382 	      tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
383 	      tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
384 	    }
385 
386 	    c_elem[0] = c_i[cij];
387 	    c_elem[1] = c_i[cij + 1];
388 	    {
389 	      tmp2[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
390 	      tmp2[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
391 	    }
392 
393 	    tmp1[0] = tmp1[0] + tmp2[0];
394 	    tmp1[1] = tmp1[1] + tmp2[1];
395 	    c_i[cij] = tmp1[0];
396 	    c_i[cij + 1] = tmp1[1];
397 	  }
398 	}
399 
400       }
401 
402 
403 
404       break;
405     }
406   case blas_prec_double:
407   case blas_prec_indigenous:{
408 
409 
410       /* Integer Index Variables */
411       int i, j, h;
412 
413       int ai, bj, ci;
414       int aih, bhj, cij;	/* Index into matrices a, b, c during multiply */
415 
416       int incai, incaih;	/* Index increments for matrix a */
417       int incbj, incbhj;	/* Index increments for matrix b */
418       int incci, inccij;	/* Index increments for matrix c */
419 
420       /* Input Matrices */
421       const float *a_i = (float *) a;
422       const float *b_i = (float *) b;
423 
424       /* Output Matrix */
425       float *c_i = (float *) c;
426 
427       /* Input Scalars */
428       float *alpha_i = (float *) alpha;
429       float *beta_i = (float *) beta;
430 
431       /* Temporary Floating-Point Variables */
432       float a_elem[2];
433       float b_elem[2];
434       float c_elem[2];
435       double prod[2];
436       double sum[2];
437       double tmp1[2];
438       double tmp2[2];
439 
440 
441 
442       /* Test for error conditions */
443       if (m < 0)
444 	BLAS_error(routine_name, -4, m, NULL);
445       if (n < 0)
446 	BLAS_error(routine_name, -5, n, NULL);
447       if (k < 0)
448 	BLAS_error(routine_name, -6, k, NULL);
449 
450       if (order == blas_colmajor) {
451 
452 	if (ldc < m)
453 	  BLAS_error(routine_name, -14, ldc, NULL);
454 
455 	if (transa == blas_no_trans) {
456 	  if (lda < m)
457 	    BLAS_error(routine_name, -9, lda, NULL);
458 	} else {
459 	  if (lda < k)
460 	    BLAS_error(routine_name, -9, lda, NULL);
461 	}
462 
463 	if (transb == blas_no_trans) {
464 	  if (ldb < k)
465 	    BLAS_error(routine_name, -11, ldb, NULL);
466 	} else {
467 	  if (ldb < n)
468 	    BLAS_error(routine_name, -11, ldb, NULL);
469 	}
470 
471       } else {
472 	/* row major */
473 	if (ldc < n)
474 	  BLAS_error(routine_name, -14, ldc, NULL);
475 
476 	if (transa == blas_no_trans) {
477 	  if (lda < k)
478 	    BLAS_error(routine_name, -9, lda, NULL);
479 	} else {
480 	  if (lda < m)
481 	    BLAS_error(routine_name, -9, lda, NULL);
482 	}
483 
484 	if (transb == blas_no_trans) {
485 	  if (ldb < n)
486 	    BLAS_error(routine_name, -11, ldb, NULL);
487 	} else {
488 	  if (ldb < k)
489 	    BLAS_error(routine_name, -11, ldb, NULL);
490 	}
491       }
492 
493       /* Test for no-op */
494       if (n == 0 || m == 0 || k == 0)
495 	return;
496       if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
497 	  && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
498 	return;
499       }
500 
501       /* Set Index Parameters */
502       if (order == blas_colmajor) {
503 	incci = 1;
504 	inccij = ldc;
505 
506 	if (transa == blas_no_trans) {
507 	  incai = 1;
508 	  incaih = lda;
509 	} else {
510 	  incai = lda;
511 	  incaih = 1;
512 	}
513 
514 	if (transb == blas_no_trans) {
515 	  incbj = ldb;
516 	  incbhj = 1;
517 	} else {
518 	  incbj = 1;
519 	  incbhj = ldb;
520 	}
521 
522       } else {
523 	/* row major */
524 	incci = ldc;
525 	inccij = 1;
526 
527 	if (transa == blas_no_trans) {
528 	  incai = lda;
529 	  incaih = 1;
530 	} else {
531 	  incai = 1;
532 	  incaih = lda;
533 	}
534 
535 	if (transb == blas_no_trans) {
536 	  incbj = 1;
537 	  incbhj = ldb;
538 	} else {
539 	  incbj = ldb;
540 	  incbhj = 1;
541 	}
542 
543       }
544 
545 
546 
547       /* Ajustment to increments */
548       incci *= 2;
549       inccij *= 2;
550       incai *= 2;
551       incaih *= 2;
552       incbj *= 2;
553       incbhj *= 2;
554 
555       /* alpha = 0.  In this case, just return beta * C */
556       if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
557 
558 	ci = 0;
559 	for (i = 0; i < m; i++, ci += incci) {
560 	  cij = ci;
561 	  for (j = 0; j < n; j++, cij += inccij) {
562 	    c_elem[0] = c_i[cij];
563 	    c_elem[1] = c_i[cij + 1];
564 	    {
565 	      tmp1[0] =
566 		(double) c_elem[0] * beta_i[0] -
567 		(double) c_elem[1] * beta_i[1];
568 	      tmp1[1] =
569 		(double) c_elem[0] * beta_i[1] +
570 		(double) c_elem[1] * beta_i[0];
571 	    }
572 	    c_i[cij] = tmp1[0];
573 	    c_i[cij + 1] = tmp1[1];
574 	  }
575 	}
576 
577       } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
578 
579 	/* Case alpha == 1. */
580 
581 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
582 	  /* Case alpha == 1, beta == 0.   We compute  C <--- A * B */
583 
584 	  ci = 0;
585 	  ai = 0;
586 	  for (i = 0; i < m; i++, ci += incci, ai += incai) {
587 
588 	    cij = ci;
589 	    bj = 0;
590 
591 	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
592 
593 	      aih = ai;
594 	      bhj = bj;
595 
596 	      sum[0] = sum[1] = 0.0;
597 
598 	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
599 		a_elem[0] = a_i[aih];
600 		a_elem[1] = a_i[aih + 1];
601 		b_elem[0] = b_i[bhj];
602 		b_elem[1] = b_i[bhj + 1];
603 		if (transa == blas_conj_trans) {
604 		  a_elem[1] = -a_elem[1];
605 		}
606 		if (transb == blas_conj_trans) {
607 		  b_elem[1] = -b_elem[1];
608 		}
609 		{
610 		  prod[0] =
611 		    (double) a_elem[0] * b_elem[0] -
612 		    (double) a_elem[1] * b_elem[1];
613 		  prod[1] =
614 		    (double) a_elem[0] * b_elem[1] +
615 		    (double) a_elem[1] * b_elem[0];
616 		}
617 		sum[0] = sum[0] + prod[0];
618 		sum[1] = sum[1] + prod[1];
619 	      }
620 	      c_i[cij] = sum[0];
621 	      c_i[cij + 1] = sum[1];
622 	    }
623 	  }
624 
625 	} else {
626 	  /* Case alpha == 1, but beta != 0.
627 	     We compute   C <--- A * B + beta * C   */
628 
629 	  ci = 0;
630 	  ai = 0;
631 	  for (i = 0; i < m; i++, ci += incci, ai += incai) {
632 
633 	    cij = ci;
634 	    bj = 0;
635 
636 	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
637 
638 	      aih = ai;
639 	      bhj = bj;
640 
641 	      sum[0] = sum[1] = 0.0;
642 
643 	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
644 		a_elem[0] = a_i[aih];
645 		a_elem[1] = a_i[aih + 1];
646 		b_elem[0] = b_i[bhj];
647 		b_elem[1] = b_i[bhj + 1];
648 		if (transa == blas_conj_trans) {
649 		  a_elem[1] = -a_elem[1];
650 		}
651 		if (transb == blas_conj_trans) {
652 		  b_elem[1] = -b_elem[1];
653 		}
654 		{
655 		  prod[0] =
656 		    (double) a_elem[0] * b_elem[0] -
657 		    (double) a_elem[1] * b_elem[1];
658 		  prod[1] =
659 		    (double) a_elem[0] * b_elem[1] +
660 		    (double) a_elem[1] * b_elem[0];
661 		}
662 		sum[0] = sum[0] + prod[0];
663 		sum[1] = sum[1] + prod[1];
664 	      }
665 
666 	      c_elem[0] = c_i[cij];
667 	      c_elem[1] = c_i[cij + 1];
668 	      {
669 		tmp2[0] =
670 		  (double) c_elem[0] * beta_i[0] -
671 		  (double) c_elem[1] * beta_i[1];
672 		tmp2[1] =
673 		  (double) c_elem[0] * beta_i[1] +
674 		  (double) c_elem[1] * beta_i[0];
675 	      }
676 	      tmp1[0] = sum[0];
677 	      tmp1[1] = sum[1];
678 	      tmp1[0] = tmp2[0] + tmp1[0];
679 	      tmp1[1] = tmp2[1] + tmp1[1];
680 	      c_i[cij] = tmp1[0];
681 	      c_i[cij + 1] = tmp1[1];
682 	    }
683 	  }
684 	}
685 
686       } else {
687 
688 	/* The most general form,   C <-- alpha * A * B + beta * C  */
689 	ci = 0;
690 	ai = 0;
691 	for (i = 0; i < m; i++, ci += incci, ai += incai) {
692 
693 	  cij = ci;
694 	  bj = 0;
695 
696 	  for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
697 
698 	    aih = ai;
699 	    bhj = bj;
700 
701 	    sum[0] = sum[1] = 0.0;
702 
703 	    for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
704 	      a_elem[0] = a_i[aih];
705 	      a_elem[1] = a_i[aih + 1];
706 	      b_elem[0] = b_i[bhj];
707 	      b_elem[1] = b_i[bhj + 1];
708 	      if (transa == blas_conj_trans) {
709 		a_elem[1] = -a_elem[1];
710 	      }
711 	      if (transb == blas_conj_trans) {
712 		b_elem[1] = -b_elem[1];
713 	      }
714 	      {
715 		prod[0] =
716 		  (double) a_elem[0] * b_elem[0] -
717 		  (double) a_elem[1] * b_elem[1];
718 		prod[1] =
719 		  (double) a_elem[0] * b_elem[1] +
720 		  (double) a_elem[1] * b_elem[0];
721 	      }
722 	      sum[0] = sum[0] + prod[0];
723 	      sum[1] = sum[1] + prod[1];
724 	    }
725 
726 	    {
727 	      tmp1[0] =
728 		(double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
729 	      tmp1[1] =
730 		(double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
731 	    }
732 	    c_elem[0] = c_i[cij];
733 	    c_elem[1] = c_i[cij + 1];
734 	    {
735 	      tmp2[0] =
736 		(double) c_elem[0] * beta_i[0] -
737 		(double) c_elem[1] * beta_i[1];
738 	      tmp2[1] =
739 		(double) c_elem[0] * beta_i[1] +
740 		(double) c_elem[1] * beta_i[0];
741 	    }
742 	    tmp1[0] = tmp1[0] + tmp2[0];
743 	    tmp1[1] = tmp1[1] + tmp2[1];
744 	    c_i[cij] = tmp1[0];
745 	    c_i[cij + 1] = tmp1[1];
746 	  }
747 	}
748 
749       }
750 
751 
752 
753       break;
754     }
755 
756   case blas_prec_extra:{
757 
758 
759       /* Integer Index Variables */
760       int i, j, h;
761 
762       int ai, bj, ci;
763       int aih, bhj, cij;	/* Index into matrices a, b, c during multiply */
764 
765       int incai, incaih;	/* Index increments for matrix a */
766       int incbj, incbhj;	/* Index increments for matrix b */
767       int incci, inccij;	/* Index increments for matrix c */
768 
769       /* Input Matrices */
770       const float *a_i = (float *) a;
771       const float *b_i = (float *) b;
772 
773       /* Output Matrix */
774       float *c_i = (float *) c;
775 
776       /* Input Scalars */
777       float *alpha_i = (float *) alpha;
778       float *beta_i = (float *) beta;
779 
780       /* Temporary Floating-Point Variables */
781       float a_elem[2];
782       float b_elem[2];
783       float c_elem[2];
784       double head_prod[2], tail_prod[2];
785       double head_sum[2], tail_sum[2];
786       double head_tmp1[2], tail_tmp1[2];
787       double head_tmp2[2], tail_tmp2[2];
788 
789       FPU_FIX_DECL;
790 
791       /* Test for error conditions */
792       if (m < 0)
793 	BLAS_error(routine_name, -4, m, NULL);
794       if (n < 0)
795 	BLAS_error(routine_name, -5, n, NULL);
796       if (k < 0)
797 	BLAS_error(routine_name, -6, k, NULL);
798 
799       if (order == blas_colmajor) {
800 
801 	if (ldc < m)
802 	  BLAS_error(routine_name, -14, ldc, NULL);
803 
804 	if (transa == blas_no_trans) {
805 	  if (lda < m)
806 	    BLAS_error(routine_name, -9, lda, NULL);
807 	} else {
808 	  if (lda < k)
809 	    BLAS_error(routine_name, -9, lda, NULL);
810 	}
811 
812 	if (transb == blas_no_trans) {
813 	  if (ldb < k)
814 	    BLAS_error(routine_name, -11, ldb, NULL);
815 	} else {
816 	  if (ldb < n)
817 	    BLAS_error(routine_name, -11, ldb, NULL);
818 	}
819 
820       } else {
821 	/* row major */
822 	if (ldc < n)
823 	  BLAS_error(routine_name, -14, ldc, NULL);
824 
825 	if (transa == blas_no_trans) {
826 	  if (lda < k)
827 	    BLAS_error(routine_name, -9, lda, NULL);
828 	} else {
829 	  if (lda < m)
830 	    BLAS_error(routine_name, -9, lda, NULL);
831 	}
832 
833 	if (transb == blas_no_trans) {
834 	  if (ldb < n)
835 	    BLAS_error(routine_name, -11, ldb, NULL);
836 	} else {
837 	  if (ldb < k)
838 	    BLAS_error(routine_name, -11, ldb, NULL);
839 	}
840       }
841 
842       /* Test for no-op */
843       if (n == 0 || m == 0 || k == 0)
844 	return;
845       if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
846 	  && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
847 	return;
848       }
849 
850       /* Set Index Parameters */
851       if (order == blas_colmajor) {
852 	incci = 1;
853 	inccij = ldc;
854 
855 	if (transa == blas_no_trans) {
856 	  incai = 1;
857 	  incaih = lda;
858 	} else {
859 	  incai = lda;
860 	  incaih = 1;
861 	}
862 
863 	if (transb == blas_no_trans) {
864 	  incbj = ldb;
865 	  incbhj = 1;
866 	} else {
867 	  incbj = 1;
868 	  incbhj = ldb;
869 	}
870 
871       } else {
872 	/* row major */
873 	incci = ldc;
874 	inccij = 1;
875 
876 	if (transa == blas_no_trans) {
877 	  incai = lda;
878 	  incaih = 1;
879 	} else {
880 	  incai = 1;
881 	  incaih = lda;
882 	}
883 
884 	if (transb == blas_no_trans) {
885 	  incbj = 1;
886 	  incbhj = ldb;
887 	} else {
888 	  incbj = ldb;
889 	  incbhj = 1;
890 	}
891 
892       }
893 
894       FPU_FIX_START;
895 
896       /* Ajustment to increments */
897       incci *= 2;
898       inccij *= 2;
899       incai *= 2;
900       incaih *= 2;
901       incbj *= 2;
902       incbhj *= 2;
903 
904       /* alpha = 0.  In this case, just return beta * C */
905       if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
906 
907 	ci = 0;
908 	for (i = 0; i < m; i++, ci += incci) {
909 	  cij = ci;
910 	  for (j = 0; j < n; j++, cij += inccij) {
911 	    c_elem[0] = c_i[cij];
912 	    c_elem[1] = c_i[cij + 1];
913 	    {
914 	      double head_e1, tail_e1;
915 	      double d1;
916 	      double d2;
917 	      /* Real part */
918 	      d1 = (double) c_elem[0] * beta_i[0];
919 	      d2 = (double) -c_elem[1] * beta_i[1];
920 	      {
921 		/* Compute double-double = double + double. */
922 		double e, t1, t2;
923 
924 		/* Knuth trick. */
925 		t1 = d1 + d2;
926 		e = t1 - d1;
927 		t2 = ((d2 - e) + (d1 - (t1 - e)));
928 
929 		/* The result is t1 + t2, after normalization. */
930 		head_e1 = t1 + t2;
931 		tail_e1 = t2 - (head_e1 - t1);
932 	      }
933 	      head_tmp1[0] = head_e1;
934 	      tail_tmp1[0] = tail_e1;
935 	      /* imaginary part */
936 	      d1 = (double) c_elem[0] * beta_i[1];
937 	      d2 = (double) c_elem[1] * beta_i[0];
938 	      {
939 		/* Compute double-double = double + double. */
940 		double e, t1, t2;
941 
942 		/* Knuth trick. */
943 		t1 = d1 + d2;
944 		e = t1 - d1;
945 		t2 = ((d2 - e) + (d1 - (t1 - e)));
946 
947 		/* The result is t1 + t2, after normalization. */
948 		head_e1 = t1 + t2;
949 		tail_e1 = t2 - (head_e1 - t1);
950 	      }
951 	      head_tmp1[1] = head_e1;
952 	      tail_tmp1[1] = tail_e1;
953 	    }
954 	    c_i[cij] = head_tmp1[0];
955 	    c_i[cij + 1] = head_tmp1[1];
956 	  }
957 	}
958 
959       } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
960 
961 	/* Case alpha == 1. */
962 
963 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
964 	  /* Case alpha == 1, beta == 0.   We compute  C <--- A * B */
965 
966 	  ci = 0;
967 	  ai = 0;
968 	  for (i = 0; i < m; i++, ci += incci, ai += incai) {
969 
970 	    cij = ci;
971 	    bj = 0;
972 
973 	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
974 
975 	      aih = ai;
976 	      bhj = bj;
977 
978 	      head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
979 
980 	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
981 		a_elem[0] = a_i[aih];
982 		a_elem[1] = a_i[aih + 1];
983 		b_elem[0] = b_i[bhj];
984 		b_elem[1] = b_i[bhj + 1];
985 		if (transa == blas_conj_trans) {
986 		  a_elem[1] = -a_elem[1];
987 		}
988 		if (transb == blas_conj_trans) {
989 		  b_elem[1] = -b_elem[1];
990 		}
991 		{
992 		  double head_e1, tail_e1;
993 		  double d1;
994 		  double d2;
995 		  /* Real part */
996 		  d1 = (double) a_elem[0] * b_elem[0];
997 		  d2 = (double) -a_elem[1] * b_elem[1];
998 		  {
999 		    /* Compute double-double = double + double. */
1000 		    double e, t1, t2;
1001 
1002 		    /* Knuth trick. */
1003 		    t1 = d1 + d2;
1004 		    e = t1 - d1;
1005 		    t2 = ((d2 - e) + (d1 - (t1 - e)));
1006 
1007 		    /* The result is t1 + t2, after normalization. */
1008 		    head_e1 = t1 + t2;
1009 		    tail_e1 = t2 - (head_e1 - t1);
1010 		  }
1011 		  head_prod[0] = head_e1;
1012 		  tail_prod[0] = tail_e1;
1013 		  /* imaginary part */
1014 		  d1 = (double) a_elem[0] * b_elem[1];
1015 		  d2 = (double) a_elem[1] * b_elem[0];
1016 		  {
1017 		    /* Compute double-double = double + double. */
1018 		    double e, t1, t2;
1019 
1020 		    /* Knuth trick. */
1021 		    t1 = d1 + d2;
1022 		    e = t1 - d1;
1023 		    t2 = ((d2 - e) + (d1 - (t1 - e)));
1024 
1025 		    /* The result is t1 + t2, after normalization. */
1026 		    head_e1 = t1 + t2;
1027 		    tail_e1 = t2 - (head_e1 - t1);
1028 		  }
1029 		  head_prod[1] = head_e1;
1030 		  tail_prod[1] = tail_e1;
1031 		}
1032 		{
1033 		  double head_t, tail_t;
1034 		  double head_a, tail_a;
1035 		  double head_b, tail_b;
1036 		  /* Real part */
1037 		  head_a = head_sum[0];
1038 		  tail_a = tail_sum[0];
1039 		  head_b = head_prod[0];
1040 		  tail_b = tail_prod[0];
1041 		  {
1042 		    /* Compute double-double = double-double + double-double. */
1043 		    double bv;
1044 		    double s1, s2, t1, t2;
1045 
1046 		    /* Add two hi words. */
1047 		    s1 = head_a + head_b;
1048 		    bv = s1 - head_a;
1049 		    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1050 
1051 		    /* Add two lo words. */
1052 		    t1 = tail_a + tail_b;
1053 		    bv = t1 - tail_a;
1054 		    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1055 
1056 		    s2 += t1;
1057 
1058 		    /* Renormalize (s1, s2)  to  (t1, s2) */
1059 		    t1 = s1 + s2;
1060 		    s2 = s2 - (t1 - s1);
1061 
1062 		    t2 += s2;
1063 
1064 		    /* Renormalize (t1, t2)  */
1065 		    head_t = t1 + t2;
1066 		    tail_t = t2 - (head_t - t1);
1067 		  }
1068 		  head_sum[0] = head_t;
1069 		  tail_sum[0] = tail_t;
1070 		  /* Imaginary part */
1071 		  head_a = head_sum[1];
1072 		  tail_a = tail_sum[1];
1073 		  head_b = head_prod[1];
1074 		  tail_b = tail_prod[1];
1075 		  {
1076 		    /* Compute double-double = double-double + double-double. */
1077 		    double bv;
1078 		    double s1, s2, t1, t2;
1079 
1080 		    /* Add two hi words. */
1081 		    s1 = head_a + head_b;
1082 		    bv = s1 - head_a;
1083 		    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1084 
1085 		    /* Add two lo words. */
1086 		    t1 = tail_a + tail_b;
1087 		    bv = t1 - tail_a;
1088 		    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1089 
1090 		    s2 += t1;
1091 
1092 		    /* Renormalize (s1, s2)  to  (t1, s2) */
1093 		    t1 = s1 + s2;
1094 		    s2 = s2 - (t1 - s1);
1095 
1096 		    t2 += s2;
1097 
1098 		    /* Renormalize (t1, t2)  */
1099 		    head_t = t1 + t2;
1100 		    tail_t = t2 - (head_t - t1);
1101 		  }
1102 		  head_sum[1] = head_t;
1103 		  tail_sum[1] = tail_t;
1104 		}
1105 	      }
1106 	      c_i[cij] = head_sum[0];
1107 	      c_i[cij + 1] = head_sum[1];
1108 	    }
1109 	  }
1110 
1111 	} else {
1112 	  /* Case alpha == 1, but beta != 0.
1113 	     We compute   C <--- A * B + beta * C   */
1114 
1115 	  ci = 0;
1116 	  ai = 0;
1117 	  for (i = 0; i < m; i++, ci += incci, ai += incai) {
1118 
1119 	    cij = ci;
1120 	    bj = 0;
1121 
1122 	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
1123 
1124 	      aih = ai;
1125 	      bhj = bj;
1126 
1127 	      head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
1128 
1129 	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
1130 		a_elem[0] = a_i[aih];
1131 		a_elem[1] = a_i[aih + 1];
1132 		b_elem[0] = b_i[bhj];
1133 		b_elem[1] = b_i[bhj + 1];
1134 		if (transa == blas_conj_trans) {
1135 		  a_elem[1] = -a_elem[1];
1136 		}
1137 		if (transb == blas_conj_trans) {
1138 		  b_elem[1] = -b_elem[1];
1139 		}
1140 		{
1141 		  double head_e1, tail_e1;
1142 		  double d1;
1143 		  double d2;
1144 		  /* Real part */
1145 		  d1 = (double) a_elem[0] * b_elem[0];
1146 		  d2 = (double) -a_elem[1] * b_elem[1];
1147 		  {
1148 		    /* Compute double-double = double + double. */
1149 		    double e, t1, t2;
1150 
1151 		    /* Knuth trick. */
1152 		    t1 = d1 + d2;
1153 		    e = t1 - d1;
1154 		    t2 = ((d2 - e) + (d1 - (t1 - e)));
1155 
1156 		    /* The result is t1 + t2, after normalization. */
1157 		    head_e1 = t1 + t2;
1158 		    tail_e1 = t2 - (head_e1 - t1);
1159 		  }
1160 		  head_prod[0] = head_e1;
1161 		  tail_prod[0] = tail_e1;
1162 		  /* imaginary part */
1163 		  d1 = (double) a_elem[0] * b_elem[1];
1164 		  d2 = (double) a_elem[1] * b_elem[0];
1165 		  {
1166 		    /* Compute double-double = double + double. */
1167 		    double e, t1, t2;
1168 
1169 		    /* Knuth trick. */
1170 		    t1 = d1 + d2;
1171 		    e = t1 - d1;
1172 		    t2 = ((d2 - e) + (d1 - (t1 - e)));
1173 
1174 		    /* The result is t1 + t2, after normalization. */
1175 		    head_e1 = t1 + t2;
1176 		    tail_e1 = t2 - (head_e1 - t1);
1177 		  }
1178 		  head_prod[1] = head_e1;
1179 		  tail_prod[1] = tail_e1;
1180 		}
1181 		{
1182 		  double head_t, tail_t;
1183 		  double head_a, tail_a;
1184 		  double head_b, tail_b;
1185 		  /* Real part */
1186 		  head_a = head_sum[0];
1187 		  tail_a = tail_sum[0];
1188 		  head_b = head_prod[0];
1189 		  tail_b = tail_prod[0];
1190 		  {
1191 		    /* Compute double-double = double-double + double-double. */
1192 		    double bv;
1193 		    double s1, s2, t1, t2;
1194 
1195 		    /* Add two hi words. */
1196 		    s1 = head_a + head_b;
1197 		    bv = s1 - head_a;
1198 		    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1199 
1200 		    /* Add two lo words. */
1201 		    t1 = tail_a + tail_b;
1202 		    bv = t1 - tail_a;
1203 		    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1204 
1205 		    s2 += t1;
1206 
1207 		    /* Renormalize (s1, s2)  to  (t1, s2) */
1208 		    t1 = s1 + s2;
1209 		    s2 = s2 - (t1 - s1);
1210 
1211 		    t2 += s2;
1212 
1213 		    /* Renormalize (t1, t2)  */
1214 		    head_t = t1 + t2;
1215 		    tail_t = t2 - (head_t - t1);
1216 		  }
1217 		  head_sum[0] = head_t;
1218 		  tail_sum[0] = tail_t;
1219 		  /* Imaginary part */
1220 		  head_a = head_sum[1];
1221 		  tail_a = tail_sum[1];
1222 		  head_b = head_prod[1];
1223 		  tail_b = tail_prod[1];
1224 		  {
1225 		    /* Compute double-double = double-double + double-double. */
1226 		    double bv;
1227 		    double s1, s2, t1, t2;
1228 
1229 		    /* Add two hi words. */
1230 		    s1 = head_a + head_b;
1231 		    bv = s1 - head_a;
1232 		    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1233 
1234 		    /* Add two lo words. */
1235 		    t1 = tail_a + tail_b;
1236 		    bv = t1 - tail_a;
1237 		    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1238 
1239 		    s2 += t1;
1240 
1241 		    /* Renormalize (s1, s2)  to  (t1, s2) */
1242 		    t1 = s1 + s2;
1243 		    s2 = s2 - (t1 - s1);
1244 
1245 		    t2 += s2;
1246 
1247 		    /* Renormalize (t1, t2)  */
1248 		    head_t = t1 + t2;
1249 		    tail_t = t2 - (head_t - t1);
1250 		  }
1251 		  head_sum[1] = head_t;
1252 		  tail_sum[1] = tail_t;
1253 		}
1254 	      }
1255 
1256 	      c_elem[0] = c_i[cij];
1257 	      c_elem[1] = c_i[cij + 1];
1258 	      {
1259 		double head_e1, tail_e1;
1260 		double d1;
1261 		double d2;
1262 		/* Real part */
1263 		d1 = (double) c_elem[0] * beta_i[0];
1264 		d2 = (double) -c_elem[1] * beta_i[1];
1265 		{
1266 		  /* Compute double-double = double + double. */
1267 		  double e, t1, t2;
1268 
1269 		  /* Knuth trick. */
1270 		  t1 = d1 + d2;
1271 		  e = t1 - d1;
1272 		  t2 = ((d2 - e) + (d1 - (t1 - e)));
1273 
1274 		  /* The result is t1 + t2, after normalization. */
1275 		  head_e1 = t1 + t2;
1276 		  tail_e1 = t2 - (head_e1 - t1);
1277 		}
1278 		head_tmp2[0] = head_e1;
1279 		tail_tmp2[0] = tail_e1;
1280 		/* imaginary part */
1281 		d1 = (double) c_elem[0] * beta_i[1];
1282 		d2 = (double) c_elem[1] * beta_i[0];
1283 		{
1284 		  /* Compute double-double = double + double. */
1285 		  double e, t1, t2;
1286 
1287 		  /* Knuth trick. */
1288 		  t1 = d1 + d2;
1289 		  e = t1 - d1;
1290 		  t2 = ((d2 - e) + (d1 - (t1 - e)));
1291 
1292 		  /* The result is t1 + t2, after normalization. */
1293 		  head_e1 = t1 + t2;
1294 		  tail_e1 = t2 - (head_e1 - t1);
1295 		}
1296 		head_tmp2[1] = head_e1;
1297 		tail_tmp2[1] = tail_e1;
1298 	      }
1299 	      head_tmp1[0] = head_sum[0];
1300 	      tail_tmp1[0] = tail_sum[0];
1301 	      head_tmp1[1] = head_sum[1];
1302 	      tail_tmp1[1] = tail_sum[1];
1303 	      {
1304 		double head_t, tail_t;
1305 		double head_a, tail_a;
1306 		double head_b, tail_b;
1307 		/* Real part */
1308 		head_a = head_tmp2[0];
1309 		tail_a = tail_tmp2[0];
1310 		head_b = head_tmp1[0];
1311 		tail_b = tail_tmp1[0];
1312 		{
1313 		  /* Compute double-double = double-double + double-double. */
1314 		  double bv;
1315 		  double s1, s2, t1, t2;
1316 
1317 		  /* Add two hi words. */
1318 		  s1 = head_a + head_b;
1319 		  bv = s1 - head_a;
1320 		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1321 
1322 		  /* Add two lo words. */
1323 		  t1 = tail_a + tail_b;
1324 		  bv = t1 - tail_a;
1325 		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1326 
1327 		  s2 += t1;
1328 
1329 		  /* Renormalize (s1, s2)  to  (t1, s2) */
1330 		  t1 = s1 + s2;
1331 		  s2 = s2 - (t1 - s1);
1332 
1333 		  t2 += s2;
1334 
1335 		  /* Renormalize (t1, t2)  */
1336 		  head_t = t1 + t2;
1337 		  tail_t = t2 - (head_t - t1);
1338 		}
1339 		head_tmp1[0] = head_t;
1340 		tail_tmp1[0] = tail_t;
1341 		/* Imaginary part */
1342 		head_a = head_tmp2[1];
1343 		tail_a = tail_tmp2[1];
1344 		head_b = head_tmp1[1];
1345 		tail_b = tail_tmp1[1];
1346 		{
1347 		  /* Compute double-double = double-double + double-double. */
1348 		  double bv;
1349 		  double s1, s2, t1, t2;
1350 
1351 		  /* Add two hi words. */
1352 		  s1 = head_a + head_b;
1353 		  bv = s1 - head_a;
1354 		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1355 
1356 		  /* Add two lo words. */
1357 		  t1 = tail_a + tail_b;
1358 		  bv = t1 - tail_a;
1359 		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1360 
1361 		  s2 += t1;
1362 
1363 		  /* Renormalize (s1, s2)  to  (t1, s2) */
1364 		  t1 = s1 + s2;
1365 		  s2 = s2 - (t1 - s1);
1366 
1367 		  t2 += s2;
1368 
1369 		  /* Renormalize (t1, t2)  */
1370 		  head_t = t1 + t2;
1371 		  tail_t = t2 - (head_t - t1);
1372 		}
1373 		head_tmp1[1] = head_t;
1374 		tail_tmp1[1] = tail_t;
1375 	      }
1376 	      c_i[cij] = head_tmp1[0];
1377 	      c_i[cij + 1] = head_tmp1[1];
1378 	    }
1379 	  }
1380 	}
1381 
1382       } else {
1383 
1384 	/* The most general form,   C <-- alpha * A * B + beta * C  */
1385 	ci = 0;
1386 	ai = 0;
1387 	for (i = 0; i < m; i++, ci += incci, ai += incai) {
1388 
1389 	  cij = ci;
1390 	  bj = 0;
1391 
1392 	  for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
1393 
1394 	    aih = ai;
1395 	    bhj = bj;
1396 
1397 	    head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
1398 
1399 	    for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
1400 	      a_elem[0] = a_i[aih];
1401 	      a_elem[1] = a_i[aih + 1];
1402 	      b_elem[0] = b_i[bhj];
1403 	      b_elem[1] = b_i[bhj + 1];
1404 	      if (transa == blas_conj_trans) {
1405 		a_elem[1] = -a_elem[1];
1406 	      }
1407 	      if (transb == blas_conj_trans) {
1408 		b_elem[1] = -b_elem[1];
1409 	      }
1410 	      {
1411 		double head_e1, tail_e1;
1412 		double d1;
1413 		double d2;
1414 		/* Real part */
1415 		d1 = (double) a_elem[0] * b_elem[0];
1416 		d2 = (double) -a_elem[1] * b_elem[1];
1417 		{
1418 		  /* Compute double-double = double + double. */
1419 		  double e, t1, t2;
1420 
1421 		  /* Knuth trick. */
1422 		  t1 = d1 + d2;
1423 		  e = t1 - d1;
1424 		  t2 = ((d2 - e) + (d1 - (t1 - e)));
1425 
1426 		  /* The result is t1 + t2, after normalization. */
1427 		  head_e1 = t1 + t2;
1428 		  tail_e1 = t2 - (head_e1 - t1);
1429 		}
1430 		head_prod[0] = head_e1;
1431 		tail_prod[0] = tail_e1;
1432 		/* imaginary part */
1433 		d1 = (double) a_elem[0] * b_elem[1];
1434 		d2 = (double) a_elem[1] * b_elem[0];
1435 		{
1436 		  /* Compute double-double = double + double. */
1437 		  double e, t1, t2;
1438 
1439 		  /* Knuth trick. */
1440 		  t1 = d1 + d2;
1441 		  e = t1 - d1;
1442 		  t2 = ((d2 - e) + (d1 - (t1 - e)));
1443 
1444 		  /* The result is t1 + t2, after normalization. */
1445 		  head_e1 = t1 + t2;
1446 		  tail_e1 = t2 - (head_e1 - t1);
1447 		}
1448 		head_prod[1] = head_e1;
1449 		tail_prod[1] = tail_e1;
1450 	      }
1451 	      {
1452 		double head_t, tail_t;
1453 		double head_a, tail_a;
1454 		double head_b, tail_b;
1455 		/* Real part */
1456 		head_a = head_sum[0];
1457 		tail_a = tail_sum[0];
1458 		head_b = head_prod[0];
1459 		tail_b = tail_prod[0];
1460 		{
1461 		  /* Compute double-double = double-double + double-double. */
1462 		  double bv;
1463 		  double s1, s2, t1, t2;
1464 
1465 		  /* Add two hi words. */
1466 		  s1 = head_a + head_b;
1467 		  bv = s1 - head_a;
1468 		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1469 
1470 		  /* Add two lo words. */
1471 		  t1 = tail_a + tail_b;
1472 		  bv = t1 - tail_a;
1473 		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1474 
1475 		  s2 += t1;
1476 
1477 		  /* Renormalize (s1, s2)  to  (t1, s2) */
1478 		  t1 = s1 + s2;
1479 		  s2 = s2 - (t1 - s1);
1480 
1481 		  t2 += s2;
1482 
1483 		  /* Renormalize (t1, t2)  */
1484 		  head_t = t1 + t2;
1485 		  tail_t = t2 - (head_t - t1);
1486 		}
1487 		head_sum[0] = head_t;
1488 		tail_sum[0] = tail_t;
1489 		/* Imaginary part */
1490 		head_a = head_sum[1];
1491 		tail_a = tail_sum[1];
1492 		head_b = head_prod[1];
1493 		tail_b = tail_prod[1];
1494 		{
1495 		  /* Compute double-double = double-double + double-double. */
1496 		  double bv;
1497 		  double s1, s2, t1, t2;
1498 
1499 		  /* Add two hi words. */
1500 		  s1 = head_a + head_b;
1501 		  bv = s1 - head_a;
1502 		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1503 
1504 		  /* Add two lo words. */
1505 		  t1 = tail_a + tail_b;
1506 		  bv = t1 - tail_a;
1507 		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1508 
1509 		  s2 += t1;
1510 
1511 		  /* Renormalize (s1, s2)  to  (t1, s2) */
1512 		  t1 = s1 + s2;
1513 		  s2 = s2 - (t1 - s1);
1514 
1515 		  t2 += s2;
1516 
1517 		  /* Renormalize (t1, t2)  */
1518 		  head_t = t1 + t2;
1519 		  tail_t = t2 - (head_t - t1);
1520 		}
1521 		head_sum[1] = head_t;
1522 		tail_sum[1] = tail_t;
1523 	      }
1524 	    }
1525 
1526 	    {
1527 	      double cd[2];
1528 	      cd[0] = (double) alpha_i[0];
1529 	      cd[1] = (double) alpha_i[1];
1530 	      {
1531 		/* Compute complex-extra = complex-extra * complex-double. */
1532 		double head_a0, tail_a0;
1533 		double head_a1, tail_a1;
1534 		double head_t1, tail_t1;
1535 		double head_t2, tail_t2;
1536 		head_a0 = head_sum[0];
1537 		tail_a0 = tail_sum[0];
1538 		head_a1 = head_sum[1];
1539 		tail_a1 = tail_sum[1];
1540 		/* real part */
1541 		{
1542 		  /* Compute double-double = double-double * double. */
1543 		  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1544 
1545 		  con = head_a0 * split;
1546 		  a11 = con - head_a0;
1547 		  a11 = con - a11;
1548 		  a21 = head_a0 - a11;
1549 		  con = cd[0] * split;
1550 		  b1 = con - cd[0];
1551 		  b1 = con - b1;
1552 		  b2 = cd[0] - b1;
1553 
1554 		  c11 = head_a0 * cd[0];
1555 		  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1556 
1557 		  c2 = tail_a0 * cd[0];
1558 		  t1 = c11 + c2;
1559 		  t2 = (c2 - (t1 - c11)) + c21;
1560 
1561 		  head_t1 = t1 + t2;
1562 		  tail_t1 = t2 - (head_t1 - t1);
1563 		}
1564 		{
1565 		  /* Compute double-double = double-double * double. */
1566 		  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1567 
1568 		  con = head_a1 * split;
1569 		  a11 = con - head_a1;
1570 		  a11 = con - a11;
1571 		  a21 = head_a1 - a11;
1572 		  con = cd[1] * split;
1573 		  b1 = con - cd[1];
1574 		  b1 = con - b1;
1575 		  b2 = cd[1] - b1;
1576 
1577 		  c11 = head_a1 * cd[1];
1578 		  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1579 
1580 		  c2 = tail_a1 * cd[1];
1581 		  t1 = c11 + c2;
1582 		  t2 = (c2 - (t1 - c11)) + c21;
1583 
1584 		  head_t2 = t1 + t2;
1585 		  tail_t2 = t2 - (head_t2 - t1);
1586 		}
1587 		head_t2 = -head_t2;
1588 		tail_t2 = -tail_t2;
1589 		{
1590 		  /* Compute double-double = double-double + double-double. */
1591 		  double bv;
1592 		  double s1, s2, t1, t2;
1593 
1594 		  /* Add two hi words. */
1595 		  s1 = head_t1 + head_t2;
1596 		  bv = s1 - head_t1;
1597 		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1598 
1599 		  /* Add two lo words. */
1600 		  t1 = tail_t1 + tail_t2;
1601 		  bv = t1 - tail_t1;
1602 		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1603 
1604 		  s2 += t1;
1605 
1606 		  /* Renormalize (s1, s2)  to  (t1, s2) */
1607 		  t1 = s1 + s2;
1608 		  s2 = s2 - (t1 - s1);
1609 
1610 		  t2 += s2;
1611 
1612 		  /* Renormalize (t1, t2)  */
1613 		  head_t1 = t1 + t2;
1614 		  tail_t1 = t2 - (head_t1 - t1);
1615 		}
1616 		head_tmp1[0] = head_t1;
1617 		tail_tmp1[0] = tail_t1;
1618 		/* imaginary part */
1619 		{
1620 		  /* Compute double-double = double-double * double. */
1621 		  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1622 
1623 		  con = head_a1 * split;
1624 		  a11 = con - head_a1;
1625 		  a11 = con - a11;
1626 		  a21 = head_a1 - a11;
1627 		  con = cd[0] * split;
1628 		  b1 = con - cd[0];
1629 		  b1 = con - b1;
1630 		  b2 = cd[0] - b1;
1631 
1632 		  c11 = head_a1 * cd[0];
1633 		  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1634 
1635 		  c2 = tail_a1 * cd[0];
1636 		  t1 = c11 + c2;
1637 		  t2 = (c2 - (t1 - c11)) + c21;
1638 
1639 		  head_t1 = t1 + t2;
1640 		  tail_t1 = t2 - (head_t1 - t1);
1641 		}
1642 		{
1643 		  /* Compute double-double = double-double * double. */
1644 		  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1645 
1646 		  con = head_a0 * split;
1647 		  a11 = con - head_a0;
1648 		  a11 = con - a11;
1649 		  a21 = head_a0 - a11;
1650 		  con = cd[1] * split;
1651 		  b1 = con - cd[1];
1652 		  b1 = con - b1;
1653 		  b2 = cd[1] - b1;
1654 
1655 		  c11 = head_a0 * cd[1];
1656 		  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1657 
1658 		  c2 = tail_a0 * cd[1];
1659 		  t1 = c11 + c2;
1660 		  t2 = (c2 - (t1 - c11)) + c21;
1661 
1662 		  head_t2 = t1 + t2;
1663 		  tail_t2 = t2 - (head_t2 - t1);
1664 		}
1665 		{
1666 		  /* Compute double-double = double-double + double-double. */
1667 		  double bv;
1668 		  double s1, s2, t1, t2;
1669 
1670 		  /* Add two hi words. */
1671 		  s1 = head_t1 + head_t2;
1672 		  bv = s1 - head_t1;
1673 		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1674 
1675 		  /* Add two lo words. */
1676 		  t1 = tail_t1 + tail_t2;
1677 		  bv = t1 - tail_t1;
1678 		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1679 
1680 		  s2 += t1;
1681 
1682 		  /* Renormalize (s1, s2)  to  (t1, s2) */
1683 		  t1 = s1 + s2;
1684 		  s2 = s2 - (t1 - s1);
1685 
1686 		  t2 += s2;
1687 
1688 		  /* Renormalize (t1, t2)  */
1689 		  head_t1 = t1 + t2;
1690 		  tail_t1 = t2 - (head_t1 - t1);
1691 		}
1692 		head_tmp1[1] = head_t1;
1693 		tail_tmp1[1] = tail_t1;
1694 	      }
1695 
1696 	    }
1697 	    c_elem[0] = c_i[cij];
1698 	    c_elem[1] = c_i[cij + 1];
1699 	    {
1700 	      double head_e1, tail_e1;
1701 	      double d1;
1702 	      double d2;
1703 	      /* Real part */
1704 	      d1 = (double) c_elem[0] * beta_i[0];
1705 	      d2 = (double) -c_elem[1] * beta_i[1];
1706 	      {
1707 		/* Compute double-double = double + double. */
1708 		double e, t1, t2;
1709 
1710 		/* Knuth trick. */
1711 		t1 = d1 + d2;
1712 		e = t1 - d1;
1713 		t2 = ((d2 - e) + (d1 - (t1 - e)));
1714 
1715 		/* The result is t1 + t2, after normalization. */
1716 		head_e1 = t1 + t2;
1717 		tail_e1 = t2 - (head_e1 - t1);
1718 	      }
1719 	      head_tmp2[0] = head_e1;
1720 	      tail_tmp2[0] = tail_e1;
1721 	      /* imaginary part */
1722 	      d1 = (double) c_elem[0] * beta_i[1];
1723 	      d2 = (double) c_elem[1] * beta_i[0];
1724 	      {
1725 		/* Compute double-double = double + double. */
1726 		double e, t1, t2;
1727 
1728 		/* Knuth trick. */
1729 		t1 = d1 + d2;
1730 		e = t1 - d1;
1731 		t2 = ((d2 - e) + (d1 - (t1 - e)));
1732 
1733 		/* The result is t1 + t2, after normalization. */
1734 		head_e1 = t1 + t2;
1735 		tail_e1 = t2 - (head_e1 - t1);
1736 	      }
1737 	      head_tmp2[1] = head_e1;
1738 	      tail_tmp2[1] = tail_e1;
1739 	    }
1740 	    {
1741 	      double head_t, tail_t;
1742 	      double head_a, tail_a;
1743 	      double head_b, tail_b;
1744 	      /* Real part */
1745 	      head_a = head_tmp1[0];
1746 	      tail_a = tail_tmp1[0];
1747 	      head_b = head_tmp2[0];
1748 	      tail_b = tail_tmp2[0];
1749 	      {
1750 		/* Compute double-double = double-double + double-double. */
1751 		double bv;
1752 		double s1, s2, t1, t2;
1753 
1754 		/* Add two hi words. */
1755 		s1 = head_a + head_b;
1756 		bv = s1 - head_a;
1757 		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1758 
1759 		/* Add two lo words. */
1760 		t1 = tail_a + tail_b;
1761 		bv = t1 - tail_a;
1762 		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1763 
1764 		s2 += t1;
1765 
1766 		/* Renormalize (s1, s2)  to  (t1, s2) */
1767 		t1 = s1 + s2;
1768 		s2 = s2 - (t1 - s1);
1769 
1770 		t2 += s2;
1771 
1772 		/* Renormalize (t1, t2)  */
1773 		head_t = t1 + t2;
1774 		tail_t = t2 - (head_t - t1);
1775 	      }
1776 	      head_tmp1[0] = head_t;
1777 	      tail_tmp1[0] = tail_t;
1778 	      /* Imaginary part */
1779 	      head_a = head_tmp1[1];
1780 	      tail_a = tail_tmp1[1];
1781 	      head_b = head_tmp2[1];
1782 	      tail_b = tail_tmp2[1];
1783 	      {
1784 		/* Compute double-double = double-double + double-double. */
1785 		double bv;
1786 		double s1, s2, t1, t2;
1787 
1788 		/* Add two hi words. */
1789 		s1 = head_a + head_b;
1790 		bv = s1 - head_a;
1791 		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1792 
1793 		/* Add two lo words. */
1794 		t1 = tail_a + tail_b;
1795 		bv = t1 - tail_a;
1796 		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1797 
1798 		s2 += t1;
1799 
1800 		/* Renormalize (s1, s2)  to  (t1, s2) */
1801 		t1 = s1 + s2;
1802 		s2 = s2 - (t1 - s1);
1803 
1804 		t2 += s2;
1805 
1806 		/* Renormalize (t1, t2)  */
1807 		head_t = t1 + t2;
1808 		tail_t = t2 - (head_t - t1);
1809 	      }
1810 	      head_tmp1[1] = head_t;
1811 	      tail_tmp1[1] = tail_t;
1812 	    }
1813 	    c_i[cij] = head_tmp1[0];
1814 	    c_i[cij + 1] = head_tmp1[1];
1815 	  }
1816 	}
1817 
1818       }
1819 
1820       FPU_FIX_STOP;
1821 
1822       break;
1823     }
1824   }
1825 }
1826