1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_dgemm_x(enum blas_order_type order,enum blas_trans_type transa,enum blas_trans_type transb,int m,int n,int k,double alpha,const double * a,int lda,const double * b,int ldb,double beta,double * c,int ldc,enum blas_prec_type prec)3 void BLAS_dgemm_x(enum blas_order_type order, enum blas_trans_type transa,
4 		  enum blas_trans_type transb, int m, int n, int k,
5 		  double alpha, const double *a, int lda, const double *b,
6 		  int ldb, double beta, double *c, int ldc,
7 		  enum blas_prec_type prec)
8 
9 /*
10  * Purpose
11  * =======
12  *
13  * This routine computes the matrix product:
14  *
15  *      C   <-  alpha * op(A) * op(B)  +  beta * C .
16  *
17  * where op(M) represents either M, M transpose,
18  * or M conjugate transpose.
19  *
20  * Arguments
21  * =========
22  *
23  * order   (input) enum blas_order_type
24  *         Storage format of input matrices A, B, and C.
25  *
26  * transa  (input) enum blas_trans_type
27  *         Operation to be done on matrix A before multiplication.
28  *         Can be no operation, transposition, or conjugate transposition.
29  *
30  * transb  (input) enum blas_trans_type
31  *         Operation to be done on matrix B before multiplication.
32  *         Can be no operation, transposition, or conjugate transposition.
33  *
34  * m n k   (input) int
35  *         The dimensions of matrices A, B, and C.
36  *         Matrix C is m-by-n matrix.
37  *         Matrix A is m-by-k if A is not transposed,
38  *                     k-by-m otherwise.
39  *         Matrix B is k-by-n if B is not transposed,
40  *                     n-by-k otherwise.
41  *
42  * alpha   (input) double
43  *
44  * a       (input) const double*
45  *         matrix A.
46  *
47  * lda     (input) int
48  *         leading dimension of A.
49  *
50  * b       (input) const double*
51  *         matrix B
52  *
53  * ldb     (input) int
54  *         leading dimension of B.
55  *
56  * beta    (input) double
57  *
58  * c       (input/output) double*
59  *         matrix C
60  *
61  * ldc     (input) int
62  *         leading dimension of C.
63  *
64  * prec   (input) enum blas_prec_type
65  *        Specifies the internal precision to be used.
66  *        = blas_prec_single: single precision.
67  *        = blas_prec_double: double precision.
68  *        = blas_prec_extra : anything at least 1.5 times as accurate
69  *                            than double, and wider than 80-bits.
70  *                            We use double-double in our implementation.
71  *
72  */
73 {
74   static const char routine_name[] = "BLAS_dgemm_x";
75   switch (prec) {
76 
77   case blas_prec_single:
78   case blas_prec_double:
79   case blas_prec_indigenous:{
80 
81 
82       /* Integer Index Variables */
83       int i, j, h;
84 
85       int ai, bj, ci;
86       int aih, bhj, cij;	/* Index into matrices a, b, c during multiply */
87 
88       int incai, incaih;	/* Index increments for matrix a */
89       int incbj, incbhj;	/* Index increments for matrix b */
90       int incci, inccij;	/* Index increments for matrix c */
91 
92       /* Input Matrices */
93       const double *a_i = a;
94       const double *b_i = b;
95 
96       /* Output Matrix */
97       double *c_i = c;
98 
99       /* Input Scalars */
100       double alpha_i = alpha;
101       double beta_i = beta;
102 
103       /* Temporary Floating-Point Variables */
104       double a_elem;
105       double b_elem;
106       double c_elem;
107       double prod;
108       double sum;
109       double tmp1;
110       double tmp2;
111 
112 
113 
114       /* Test for error conditions */
115       if (m < 0)
116 	BLAS_error(routine_name, -4, m, NULL);
117       if (n < 0)
118 	BLAS_error(routine_name, -5, n, NULL);
119       if (k < 0)
120 	BLAS_error(routine_name, -6, k, NULL);
121 
122       if (order == blas_colmajor) {
123 
124 	if (ldc < m)
125 	  BLAS_error(routine_name, -14, ldc, NULL);
126 
127 	if (transa == blas_no_trans) {
128 	  if (lda < m)
129 	    BLAS_error(routine_name, -9, lda, NULL);
130 	} else {
131 	  if (lda < k)
132 	    BLAS_error(routine_name, -9, lda, NULL);
133 	}
134 
135 	if (transb == blas_no_trans) {
136 	  if (ldb < k)
137 	    BLAS_error(routine_name, -11, ldb, NULL);
138 	} else {
139 	  if (ldb < n)
140 	    BLAS_error(routine_name, -11, ldb, NULL);
141 	}
142 
143       } else {
144 	/* row major */
145 	if (ldc < n)
146 	  BLAS_error(routine_name, -14, ldc, NULL);
147 
148 	if (transa == blas_no_trans) {
149 	  if (lda < k)
150 	    BLAS_error(routine_name, -9, lda, NULL);
151 	} else {
152 	  if (lda < m)
153 	    BLAS_error(routine_name, -9, lda, NULL);
154 	}
155 
156 	if (transb == blas_no_trans) {
157 	  if (ldb < n)
158 	    BLAS_error(routine_name, -11, ldb, NULL);
159 	} else {
160 	  if (ldb < k)
161 	    BLAS_error(routine_name, -11, ldb, NULL);
162 	}
163       }
164 
165       /* Test for no-op */
166       if (n == 0 || m == 0 || k == 0)
167 	return;
168       if (alpha_i == 0.0 && beta_i == 1.0) {
169 	return;
170       }
171 
172       /* Set Index Parameters */
173       if (order == blas_colmajor) {
174 	incci = 1;
175 	inccij = ldc;
176 
177 	if (transa == blas_no_trans) {
178 	  incai = 1;
179 	  incaih = lda;
180 	} else {
181 	  incai = lda;
182 	  incaih = 1;
183 	}
184 
185 	if (transb == blas_no_trans) {
186 	  incbj = ldb;
187 	  incbhj = 1;
188 	} else {
189 	  incbj = 1;
190 	  incbhj = ldb;
191 	}
192 
193       } else {
194 	/* row major */
195 	incci = ldc;
196 	inccij = 1;
197 
198 	if (transa == blas_no_trans) {
199 	  incai = lda;
200 	  incaih = 1;
201 	} else {
202 	  incai = 1;
203 	  incaih = lda;
204 	}
205 
206 	if (transb == blas_no_trans) {
207 	  incbj = 1;
208 	  incbhj = ldb;
209 	} else {
210 	  incbj = ldb;
211 	  incbhj = 1;
212 	}
213 
214       }
215 
216 
217 
218       /* Ajustment to increments */
219 
220 
221 
222 
223 
224 
225 
226       /* alpha = 0.  In this case, just return beta * C */
227       if (alpha_i == 0.0) {
228 
229 	ci = 0;
230 	for (i = 0; i < m; i++, ci += incci) {
231 	  cij = ci;
232 	  for (j = 0; j < n; j++, cij += inccij) {
233 	    c_elem = c_i[cij];
234 	    tmp1 = c_elem * beta_i;
235 	    c_i[cij] = tmp1;
236 	  }
237 	}
238 
239       } else if (alpha_i == 1.0) {
240 
241 	/* Case alpha == 1. */
242 
243 	if (beta_i == 0.0) {
244 	  /* Case alpha == 1, beta == 0.   We compute  C <--- A * B */
245 
246 	  ci = 0;
247 	  ai = 0;
248 	  for (i = 0; i < m; i++, ci += incci, ai += incai) {
249 
250 	    cij = ci;
251 	    bj = 0;
252 
253 	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
254 
255 	      aih = ai;
256 	      bhj = bj;
257 
258 	      sum = 0.0;
259 
260 	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
261 		a_elem = a_i[aih];
262 		b_elem = b_i[bhj];
263 		if (transa == blas_conj_trans) {
264 
265 		}
266 		if (transb == blas_conj_trans) {
267 
268 		}
269 		prod = a_elem * b_elem;
270 		sum = sum + prod;
271 	      }
272 	      c_i[cij] = sum;
273 	    }
274 	  }
275 
276 	} else {
277 	  /* Case alpha == 1, but beta != 0.
278 	     We compute   C <--- A * B + beta * C   */
279 
280 	  ci = 0;
281 	  ai = 0;
282 	  for (i = 0; i < m; i++, ci += incci, ai += incai) {
283 
284 	    cij = ci;
285 	    bj = 0;
286 
287 	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
288 
289 	      aih = ai;
290 	      bhj = bj;
291 
292 	      sum = 0.0;
293 
294 	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
295 		a_elem = a_i[aih];
296 		b_elem = b_i[bhj];
297 		if (transa == blas_conj_trans) {
298 
299 		}
300 		if (transb == blas_conj_trans) {
301 
302 		}
303 		prod = a_elem * b_elem;
304 		sum = sum + prod;
305 	      }
306 
307 	      c_elem = c_i[cij];
308 	      tmp2 = c_elem * beta_i;
309 	      tmp1 = sum;
310 	      tmp1 = tmp2 + tmp1;
311 	      c_i[cij] = tmp1;
312 	    }
313 	  }
314 	}
315 
316       } else {
317 
318 	/* The most general form,   C <-- alpha * A * B + beta * C  */
319 	ci = 0;
320 	ai = 0;
321 	for (i = 0; i < m; i++, ci += incci, ai += incai) {
322 
323 	  cij = ci;
324 	  bj = 0;
325 
326 	  for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
327 
328 	    aih = ai;
329 	    bhj = bj;
330 
331 	    sum = 0.0;
332 
333 	    for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
334 	      a_elem = a_i[aih];
335 	      b_elem = b_i[bhj];
336 	      if (transa == blas_conj_trans) {
337 
338 	      }
339 	      if (transb == blas_conj_trans) {
340 
341 	      }
342 	      prod = a_elem * b_elem;
343 	      sum = sum + prod;
344 	    }
345 
346 	    tmp1 = sum * alpha_i;
347 	    c_elem = c_i[cij];
348 	    tmp2 = c_elem * beta_i;
349 	    tmp1 = tmp1 + tmp2;
350 	    c_i[cij] = tmp1;
351 	  }
352 	}
353 
354       }
355 
356 
357 
358       break;
359     }
360 
361   case blas_prec_extra:{
362 
363 
364       /* Integer Index Variables */
365       int i, j, h;
366 
367       int ai, bj, ci;
368       int aih, bhj, cij;	/* Index into matrices a, b, c during multiply */
369 
370       int incai, incaih;	/* Index increments for matrix a */
371       int incbj, incbhj;	/* Index increments for matrix b */
372       int incci, inccij;	/* Index increments for matrix c */
373 
374       /* Input Matrices */
375       const double *a_i = a;
376       const double *b_i = b;
377 
378       /* Output Matrix */
379       double *c_i = c;
380 
381       /* Input Scalars */
382       double alpha_i = alpha;
383       double beta_i = beta;
384 
385       /* Temporary Floating-Point Variables */
386       double a_elem;
387       double b_elem;
388       double c_elem;
389       double head_prod, tail_prod;
390       double head_sum, tail_sum;
391       double head_tmp1, tail_tmp1;
392       double head_tmp2, tail_tmp2;
393 
394       FPU_FIX_DECL;
395 
396       /* Test for error conditions */
397       if (m < 0)
398 	BLAS_error(routine_name, -4, m, NULL);
399       if (n < 0)
400 	BLAS_error(routine_name, -5, n, NULL);
401       if (k < 0)
402 	BLAS_error(routine_name, -6, k, NULL);
403 
404       if (order == blas_colmajor) {
405 
406 	if (ldc < m)
407 	  BLAS_error(routine_name, -14, ldc, NULL);
408 
409 	if (transa == blas_no_trans) {
410 	  if (lda < m)
411 	    BLAS_error(routine_name, -9, lda, NULL);
412 	} else {
413 	  if (lda < k)
414 	    BLAS_error(routine_name, -9, lda, NULL);
415 	}
416 
417 	if (transb == blas_no_trans) {
418 	  if (ldb < k)
419 	    BLAS_error(routine_name, -11, ldb, NULL);
420 	} else {
421 	  if (ldb < n)
422 	    BLAS_error(routine_name, -11, ldb, NULL);
423 	}
424 
425       } else {
426 	/* row major */
427 	if (ldc < n)
428 	  BLAS_error(routine_name, -14, ldc, NULL);
429 
430 	if (transa == blas_no_trans) {
431 	  if (lda < k)
432 	    BLAS_error(routine_name, -9, lda, NULL);
433 	} else {
434 	  if (lda < m)
435 	    BLAS_error(routine_name, -9, lda, NULL);
436 	}
437 
438 	if (transb == blas_no_trans) {
439 	  if (ldb < n)
440 	    BLAS_error(routine_name, -11, ldb, NULL);
441 	} else {
442 	  if (ldb < k)
443 	    BLAS_error(routine_name, -11, ldb, NULL);
444 	}
445       }
446 
447       /* Test for no-op */
448       if (n == 0 || m == 0 || k == 0)
449 	return;
450       if (alpha_i == 0.0 && beta_i == 1.0) {
451 	return;
452       }
453 
454       /* Set Index Parameters */
455       if (order == blas_colmajor) {
456 	incci = 1;
457 	inccij = ldc;
458 
459 	if (transa == blas_no_trans) {
460 	  incai = 1;
461 	  incaih = lda;
462 	} else {
463 	  incai = lda;
464 	  incaih = 1;
465 	}
466 
467 	if (transb == blas_no_trans) {
468 	  incbj = ldb;
469 	  incbhj = 1;
470 	} else {
471 	  incbj = 1;
472 	  incbhj = ldb;
473 	}
474 
475       } else {
476 	/* row major */
477 	incci = ldc;
478 	inccij = 1;
479 
480 	if (transa == blas_no_trans) {
481 	  incai = lda;
482 	  incaih = 1;
483 	} else {
484 	  incai = 1;
485 	  incaih = lda;
486 	}
487 
488 	if (transb == blas_no_trans) {
489 	  incbj = 1;
490 	  incbhj = ldb;
491 	} else {
492 	  incbj = ldb;
493 	  incbhj = 1;
494 	}
495 
496       }
497 
498       FPU_FIX_START;
499 
500       /* Ajustment to increments */
501 
502 
503 
504 
505 
506 
507 
508       /* alpha = 0.  In this case, just return beta * C */
509       if (alpha_i == 0.0) {
510 
511 	ci = 0;
512 	for (i = 0; i < m; i++, ci += incci) {
513 	  cij = ci;
514 	  for (j = 0; j < n; j++, cij += inccij) {
515 	    c_elem = c_i[cij];
516 	    {
517 	      /* Compute double_double = double * double. */
518 	      double a1, a2, b1, b2, con;
519 
520 	      con = c_elem * split;
521 	      a1 = con - c_elem;
522 	      a1 = con - a1;
523 	      a2 = c_elem - a1;
524 	      con = beta_i * split;
525 	      b1 = con - beta_i;
526 	      b1 = con - b1;
527 	      b2 = beta_i - b1;
528 
529 	      head_tmp1 = c_elem * beta_i;
530 	      tail_tmp1 =
531 		(((a1 * b1 - head_tmp1) + a1 * b2) + a2 * b1) + a2 * b2;
532 	    }
533 	    c_i[cij] = head_tmp1;
534 	  }
535 	}
536 
537       } else if (alpha_i == 1.0) {
538 
539 	/* Case alpha == 1. */
540 
541 	if (beta_i == 0.0) {
542 	  /* Case alpha == 1, beta == 0.   We compute  C <--- A * B */
543 
544 	  ci = 0;
545 	  ai = 0;
546 	  for (i = 0; i < m; i++, ci += incci, ai += incai) {
547 
548 	    cij = ci;
549 	    bj = 0;
550 
551 	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
552 
553 	      aih = ai;
554 	      bhj = bj;
555 
556 	      head_sum = tail_sum = 0.0;
557 
558 	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
559 		a_elem = a_i[aih];
560 		b_elem = b_i[bhj];
561 		if (transa == blas_conj_trans) {
562 
563 		}
564 		if (transb == blas_conj_trans) {
565 
566 		}
567 		{
568 		  /* Compute double_double = double * double. */
569 		  double a1, a2, b1, b2, con;
570 
571 		  con = a_elem * split;
572 		  a1 = con - a_elem;
573 		  a1 = con - a1;
574 		  a2 = a_elem - a1;
575 		  con = b_elem * split;
576 		  b1 = con - b_elem;
577 		  b1 = con - b1;
578 		  b2 = b_elem - b1;
579 
580 		  head_prod = a_elem * b_elem;
581 		  tail_prod =
582 		    (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
583 		}
584 		{
585 		  /* Compute double-double = double-double + double-double. */
586 		  double bv;
587 		  double s1, s2, t1, t2;
588 
589 		  /* Add two hi words. */
590 		  s1 = head_sum + head_prod;
591 		  bv = s1 - head_sum;
592 		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
593 
594 		  /* Add two lo words. */
595 		  t1 = tail_sum + tail_prod;
596 		  bv = t1 - tail_sum;
597 		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
598 
599 		  s2 += t1;
600 
601 		  /* Renormalize (s1, s2)  to  (t1, s2) */
602 		  t1 = s1 + s2;
603 		  s2 = s2 - (t1 - s1);
604 
605 		  t2 += s2;
606 
607 		  /* Renormalize (t1, t2)  */
608 		  head_sum = t1 + t2;
609 		  tail_sum = t2 - (head_sum - t1);
610 		}
611 	      }
612 	      c_i[cij] = head_sum;
613 	    }
614 	  }
615 
616 	} else {
617 	  /* Case alpha == 1, but beta != 0.
618 	     We compute   C <--- A * B + beta * C   */
619 
620 	  ci = 0;
621 	  ai = 0;
622 	  for (i = 0; i < m; i++, ci += incci, ai += incai) {
623 
624 	    cij = ci;
625 	    bj = 0;
626 
627 	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
628 
629 	      aih = ai;
630 	      bhj = bj;
631 
632 	      head_sum = tail_sum = 0.0;
633 
634 	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
635 		a_elem = a_i[aih];
636 		b_elem = b_i[bhj];
637 		if (transa == blas_conj_trans) {
638 
639 		}
640 		if (transb == blas_conj_trans) {
641 
642 		}
643 		{
644 		  /* Compute double_double = double * double. */
645 		  double a1, a2, b1, b2, con;
646 
647 		  con = a_elem * split;
648 		  a1 = con - a_elem;
649 		  a1 = con - a1;
650 		  a2 = a_elem - a1;
651 		  con = b_elem * split;
652 		  b1 = con - b_elem;
653 		  b1 = con - b1;
654 		  b2 = b_elem - b1;
655 
656 		  head_prod = a_elem * b_elem;
657 		  tail_prod =
658 		    (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
659 		}
660 		{
661 		  /* Compute double-double = double-double + double-double. */
662 		  double bv;
663 		  double s1, s2, t1, t2;
664 
665 		  /* Add two hi words. */
666 		  s1 = head_sum + head_prod;
667 		  bv = s1 - head_sum;
668 		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
669 
670 		  /* Add two lo words. */
671 		  t1 = tail_sum + tail_prod;
672 		  bv = t1 - tail_sum;
673 		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
674 
675 		  s2 += t1;
676 
677 		  /* Renormalize (s1, s2)  to  (t1, s2) */
678 		  t1 = s1 + s2;
679 		  s2 = s2 - (t1 - s1);
680 
681 		  t2 += s2;
682 
683 		  /* Renormalize (t1, t2)  */
684 		  head_sum = t1 + t2;
685 		  tail_sum = t2 - (head_sum - t1);
686 		}
687 	      }
688 
689 	      c_elem = c_i[cij];
690 	      {
691 		/* Compute double_double = double * double. */
692 		double a1, a2, b1, b2, con;
693 
694 		con = c_elem * split;
695 		a1 = con - c_elem;
696 		a1 = con - a1;
697 		a2 = c_elem - a1;
698 		con = beta_i * split;
699 		b1 = con - beta_i;
700 		b1 = con - b1;
701 		b2 = beta_i - b1;
702 
703 		head_tmp2 = c_elem * beta_i;
704 		tail_tmp2 =
705 		  (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
706 	      }
707 	      head_tmp1 = head_sum;
708 	      tail_tmp1 = tail_sum;
709 	      {
710 		/* Compute double-double = double-double + double-double. */
711 		double bv;
712 		double s1, s2, t1, t2;
713 
714 		/* Add two hi words. */
715 		s1 = head_tmp2 + head_tmp1;
716 		bv = s1 - head_tmp2;
717 		s2 = ((head_tmp1 - bv) + (head_tmp2 - (s1 - bv)));
718 
719 		/* Add two lo words. */
720 		t1 = tail_tmp2 + tail_tmp1;
721 		bv = t1 - tail_tmp2;
722 		t2 = ((tail_tmp1 - bv) + (tail_tmp2 - (t1 - bv)));
723 
724 		s2 += t1;
725 
726 		/* Renormalize (s1, s2)  to  (t1, s2) */
727 		t1 = s1 + s2;
728 		s2 = s2 - (t1 - s1);
729 
730 		t2 += s2;
731 
732 		/* Renormalize (t1, t2)  */
733 		head_tmp1 = t1 + t2;
734 		tail_tmp1 = t2 - (head_tmp1 - t1);
735 	      }
736 	      c_i[cij] = head_tmp1;
737 	    }
738 	  }
739 	}
740 
741       } else {
742 
743 	/* The most general form,   C <-- alpha * A * B + beta * C  */
744 	ci = 0;
745 	ai = 0;
746 	for (i = 0; i < m; i++, ci += incci, ai += incai) {
747 
748 	  cij = ci;
749 	  bj = 0;
750 
751 	  for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
752 
753 	    aih = ai;
754 	    bhj = bj;
755 
756 	    head_sum = tail_sum = 0.0;
757 
758 	    for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
759 	      a_elem = a_i[aih];
760 	      b_elem = b_i[bhj];
761 	      if (transa == blas_conj_trans) {
762 
763 	      }
764 	      if (transb == blas_conj_trans) {
765 
766 	      }
767 	      {
768 		/* Compute double_double = double * double. */
769 		double a1, a2, b1, b2, con;
770 
771 		con = a_elem * split;
772 		a1 = con - a_elem;
773 		a1 = con - a1;
774 		a2 = a_elem - a1;
775 		con = b_elem * split;
776 		b1 = con - b_elem;
777 		b1 = con - b1;
778 		b2 = b_elem - b1;
779 
780 		head_prod = a_elem * b_elem;
781 		tail_prod =
782 		  (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
783 	      }
784 	      {
785 		/* Compute double-double = double-double + double-double. */
786 		double bv;
787 		double s1, s2, t1, t2;
788 
789 		/* Add two hi words. */
790 		s1 = head_sum + head_prod;
791 		bv = s1 - head_sum;
792 		s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
793 
794 		/* Add two lo words. */
795 		t1 = tail_sum + tail_prod;
796 		bv = t1 - tail_sum;
797 		t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
798 
799 		s2 += t1;
800 
801 		/* Renormalize (s1, s2)  to  (t1, s2) */
802 		t1 = s1 + s2;
803 		s2 = s2 - (t1 - s1);
804 
805 		t2 += s2;
806 
807 		/* Renormalize (t1, t2)  */
808 		head_sum = t1 + t2;
809 		tail_sum = t2 - (head_sum - t1);
810 	      }
811 	    }
812 
813 	    {
814 	      /* Compute double-double = double-double * double. */
815 	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
816 
817 	      con = head_sum * split;
818 	      a11 = con - head_sum;
819 	      a11 = con - a11;
820 	      a21 = head_sum - a11;
821 	      con = alpha_i * split;
822 	      b1 = con - alpha_i;
823 	      b1 = con - b1;
824 	      b2 = alpha_i - b1;
825 
826 	      c11 = head_sum * alpha_i;
827 	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
828 
829 	      c2 = tail_sum * alpha_i;
830 	      t1 = c11 + c2;
831 	      t2 = (c2 - (t1 - c11)) + c21;
832 
833 	      head_tmp1 = t1 + t2;
834 	      tail_tmp1 = t2 - (head_tmp1 - t1);
835 	    }
836 	    c_elem = c_i[cij];
837 	    {
838 	      /* Compute double_double = double * double. */
839 	      double a1, a2, b1, b2, con;
840 
841 	      con = c_elem * split;
842 	      a1 = con - c_elem;
843 	      a1 = con - a1;
844 	      a2 = c_elem - a1;
845 	      con = beta_i * split;
846 	      b1 = con - beta_i;
847 	      b1 = con - b1;
848 	      b2 = beta_i - b1;
849 
850 	      head_tmp2 = c_elem * beta_i;
851 	      tail_tmp2 =
852 		(((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
853 	    }
854 	    {
855 	      /* Compute double-double = double-double + double-double. */
856 	      double bv;
857 	      double s1, s2, t1, t2;
858 
859 	      /* Add two hi words. */
860 	      s1 = head_tmp1 + head_tmp2;
861 	      bv = s1 - head_tmp1;
862 	      s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
863 
864 	      /* Add two lo words. */
865 	      t1 = tail_tmp1 + tail_tmp2;
866 	      bv = t1 - tail_tmp1;
867 	      t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
868 
869 	      s2 += t1;
870 
871 	      /* Renormalize (s1, s2)  to  (t1, s2) */
872 	      t1 = s1 + s2;
873 	      s2 = s2 - (t1 - s1);
874 
875 	      t2 += s2;
876 
877 	      /* Renormalize (t1, t2)  */
878 	      head_tmp1 = t1 + t2;
879 	      tail_tmp1 = t2 - (head_tmp1 - t1);
880 	    }
881 	    c_i[cij] = head_tmp1;
882 	  }
883 	}
884 
885       }
886 
887       FPU_FIX_STOP;
888 
889       break;
890     }
891   }
892 }
893