1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_dgemv2_d_s_x(enum blas_order_type order,enum blas_trans_type trans,int m,int n,double alpha,const double * a,int lda,const float * head_x,const float * tail_x,int incx,double beta,double * y,int incy,enum blas_prec_type prec)3 void BLAS_dgemv2_d_s_x(enum blas_order_type order, enum blas_trans_type trans,
4 		       int m, int n, double alpha, const double *a, int lda,
5 		       const float *head_x, const float *tail_x, int incx,
6 		       double beta, double *y, int incy,
7 		       enum blas_prec_type prec)
8 
9 /*
10  * Purpose
11  * =======
12  *
13  * Computes y = alpha * op(A) * head_x + alpha * op(A) * tail_x + beta * y,
14  * where A is a general matrix.
15  *
16  * Arguments
17  * =========
18  *
19  * order        (input) blas_order_type
20  *              Order of A; row or column major
21  *
22  * trans        (input) blas_trans_type
23  *              Transpose of A: no trans, trans, or conjugate trans
24  *
25  * m            (input) int
26  *              Dimension of A
27  *
28  * n            (input) int
29  *              Dimension of A and the length of vector x and z
30  *
31  * alpha        (input) double
32  *
33  * A            (input) const double*
34  *
35  * lda          (input) int
36  *              Leading dimension of A
37  *
38  * head_x
39  * tail_x       (input) const float*
40  *
41  * incx         (input) int
42  *              The stride for vector x.
43  *
44  * beta         (input) double
45  *
46  * y            (input) const double*
47  *
48  * incy         (input) int
49  *              The stride for vector y.
50  *
51  * prec   (input) enum blas_prec_type
52  *        Specifies the internal precision to be used.
53  *        = blas_prec_single: single precision.
54  *        = blas_prec_double: double precision.
55  *        = blas_prec_extra : anything at least 1.5 times as accurate
56  *                            than double, and wider than 80-bits.
57  *                            We use double-double in our implementation.
58  *
59  */
60 {
61   static const char routine_name[] = "BLAS_dgemv2_d_s_x";
62   switch (prec) {
63   case blas_prec_single:
64   case blas_prec_double:
65   case blas_prec_indigenous:{
66 
67       int i, j;
68       int iy, jx, kx, ky;
69       int lenx, leny;
70       int ai, aij;
71       int incai, incaij;
72 
73       const double *a_i = a;
74       const float *head_x_i = head_x;
75       const float *tail_x_i = tail_x;
76       double *y_i = y;
77       double alpha_i = alpha;
78       double beta_i = beta;
79       double a_elem;
80       float x_elem;
81       double y_elem;
82       double prod;
83       double sum;
84       double sum2;
85       double tmp1;
86       double tmp2;
87 
88 
89       /* all error calls */
90       if (m < 0)
91 	BLAS_error(routine_name, -3, m, 0);
92       else if (n <= 0)
93 	BLAS_error(routine_name, -4, n, 0);
94       else if (incx == 0)
95 	BLAS_error(routine_name, -10, incx, 0);
96       else if (incy == 0)
97 	BLAS_error(routine_name, -13, incy, 0);
98 
99       if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
100 	lenx = n;
101 	leny = m;
102 	incai = lda;
103 	incaij = 1;
104       } else if ((order == blas_rowmajor) && (trans != blas_no_trans)) {
105 	lenx = m;
106 	leny = n;
107 	incai = 1;
108 	incaij = lda;
109       } else if ((order == blas_colmajor) && (trans == blas_no_trans)) {
110 	lenx = n;
111 	leny = m;
112 	incai = 1;
113 	incaij = lda;
114       } else {			/* colmajor and blas_trans */
115 	lenx = m;
116 	leny = n;
117 	incai = lda;
118 	incaij = 1;
119       }
120 
121       if (lda < leny)
122 	BLAS_error(routine_name, -7, lda, NULL);
123 
124 
125 
126 
127 
128 
129 
130 
131       if (incx > 0)
132 	kx = 0;
133       else
134 	kx = (1 - lenx) * incx;
135       if (incy > 0)
136 	ky = 0;
137       else
138 	ky = (1 - leny) * incy;
139 
140       /* No extra-precision needed for alpha = 0 */
141       if (alpha_i == 0.0) {
142 	if (beta_i == 0.0) {
143 	  iy = ky;
144 	  for (i = 0; i < leny; i++) {
145 	    y_i[iy] = 0.0;
146 	    iy += incy;
147 	  }
148 	} else if (!(beta_i == 0.0)) {
149 	  iy = ky;
150 	  for (i = 0; i < leny; i++) {
151 	    y_elem = y_i[iy];
152 	    tmp1 = y_elem * beta_i;
153 	    y_i[iy] = tmp1;
154 	    iy += incy;
155 	  }
156 	}
157       } else {			/* alpha != 0 */
158 
159 	/* if beta = 0, we can save m multiplies:
160 	   y = alpha*A*head_x + alpha*A*tail_x  */
161 	if (beta_i == 0.0) {
162 	  if (alpha_i == 1.0) {
163 	    /* save m more multiplies if alpha = 1 */
164 	    ai = 0;
165 	    iy = ky;
166 	    for (i = 0; i < leny; i++) {
167 	      sum = 0.0;
168 	      sum2 = 0.0;
169 	      aij = ai;
170 	      jx = kx;
171 	      for (j = 0; j < lenx; j++) {
172 		a_elem = a_i[aij];
173 
174 		x_elem = head_x_i[jx];
175 		prod = a_elem * x_elem;
176 		sum = sum + prod;
177 		x_elem = tail_x_i[jx];
178 		prod = a_elem * x_elem;
179 		sum2 = sum2 + prod;
180 		aij += incaij;
181 		jx += incx;
182 	      }
183 	      sum = sum + sum2;
184 	      y_i[iy] = sum;
185 	      ai += incai;
186 	      iy += incy;
187 	    }			/* end for */
188 	  } else {		/* alpha != 1 */
189 	    ai = 0;
190 	    iy = ky;
191 	    for (i = 0; i < leny; i++) {
192 	      sum = 0.0;
193 	      sum2 = 0.0;
194 	      aij = ai;
195 	      jx = kx;
196 	      for (j = 0; j < lenx; j++) {
197 		a_elem = a_i[aij];
198 
199 		x_elem = head_x_i[jx];
200 		prod = a_elem * x_elem;
201 		sum = sum + prod;
202 		x_elem = tail_x_i[jx];
203 		prod = a_elem * x_elem;
204 		sum2 = sum2 + prod;
205 		aij += incaij;
206 		jx += incx;
207 	      }
208 	      tmp1 = sum * alpha_i;
209 	      tmp2 = sum2 * alpha_i;
210 	      tmp1 = tmp1 + tmp2;
211 	      y_i[iy] = tmp1;
212 	      ai += incai;
213 	      iy += incy;
214 	    }
215 	  }
216 	} else {		/* beta != 0 */
217 	  if (alpha_i == 1.0) {
218 	    /* save m multiplies if alpha = 1 */
219 	    ai = 0;
220 	    iy = ky;
221 	    for (i = 0; i < leny; i++) {
222 	      sum = 0.0;;
223 	      sum2 = 0.0;;
224 	      aij = ai;
225 	      jx = kx;
226 	      for (j = 0; j < lenx; j++) {
227 		a_elem = a_i[aij];
228 
229 		x_elem = head_x_i[jx];
230 		prod = a_elem * x_elem;
231 		sum = sum + prod;
232 		x_elem = tail_x_i[jx];
233 		prod = a_elem * x_elem;
234 		sum2 = sum2 + prod;
235 		aij += incaij;
236 		jx += incx;
237 	      }
238 	      sum = sum + sum2;
239 	      y_elem = y_i[iy];
240 	      tmp1 = y_elem * beta_i;
241 	      tmp2 = sum + tmp1;
242 	      y_i[iy] = tmp2;
243 	      ai += incai;
244 	      iy += incy;
245 	    }
246 	  } else {		/* alpha != 1, the most general form:
247 				   y = alpha*A*head_x + alpha*A*tail_x + beta*y */
248 	    ai = 0;
249 	    iy = ky;
250 	    for (i = 0; i < leny; i++) {
251 	      sum = 0.0;;
252 	      sum2 = 0.0;;
253 	      aij = ai;
254 	      jx = kx;
255 	      for (j = 0; j < lenx; j++) {
256 		a_elem = a_i[aij];
257 
258 		x_elem = head_x_i[jx];
259 		prod = a_elem * x_elem;
260 		sum = sum + prod;
261 		x_elem = tail_x_i[jx];
262 		prod = a_elem * x_elem;
263 		sum2 = sum2 + prod;
264 		aij += incaij;
265 		jx += incx;
266 	      }
267 	      tmp1 = sum * alpha_i;
268 	      tmp2 = sum2 * alpha_i;
269 	      tmp1 = tmp1 + tmp2;
270 	      y_elem = y_i[iy];
271 	      tmp2 = y_elem * beta_i;
272 	      tmp1 = tmp1 + tmp2;
273 	      y_i[iy] = tmp1;
274 	      ai += incai;
275 	      iy += incy;
276 	    }
277 	  }
278 	}
279 
280       }
281 
282 
283 
284       break;
285     }
286   case blas_prec_extra:{
287 
288       int i, j;
289       int iy, jx, kx, ky;
290       int lenx, leny;
291       int ai, aij;
292       int incai, incaij;
293 
294       const double *a_i = a;
295       const float *head_x_i = head_x;
296       const float *tail_x_i = tail_x;
297       double *y_i = y;
298       double alpha_i = alpha;
299       double beta_i = beta;
300       double a_elem;
301       float x_elem;
302       double y_elem;
303       double head_prod, tail_prod;
304       double head_sum, tail_sum;
305       double head_sum2, tail_sum2;
306       double head_tmp1, tail_tmp1;
307       double head_tmp2, tail_tmp2;
308       FPU_FIX_DECL;
309 
310       /* all error calls */
311       if (m < 0)
312 	BLAS_error(routine_name, -3, m, 0);
313       else if (n <= 0)
314 	BLAS_error(routine_name, -4, n, 0);
315       else if (incx == 0)
316 	BLAS_error(routine_name, -10, incx, 0);
317       else if (incy == 0)
318 	BLAS_error(routine_name, -13, incy, 0);
319 
320       if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
321 	lenx = n;
322 	leny = m;
323 	incai = lda;
324 	incaij = 1;
325       } else if ((order == blas_rowmajor) && (trans != blas_no_trans)) {
326 	lenx = m;
327 	leny = n;
328 	incai = 1;
329 	incaij = lda;
330       } else if ((order == blas_colmajor) && (trans == blas_no_trans)) {
331 	lenx = n;
332 	leny = m;
333 	incai = 1;
334 	incaij = lda;
335       } else {			/* colmajor and blas_trans */
336 	lenx = m;
337 	leny = n;
338 	incai = lda;
339 	incaij = 1;
340       }
341 
342       if (lda < leny)
343 	BLAS_error(routine_name, -7, lda, NULL);
344 
345       FPU_FIX_START;
346 
347 
348 
349 
350 
351 
352       if (incx > 0)
353 	kx = 0;
354       else
355 	kx = (1 - lenx) * incx;
356       if (incy > 0)
357 	ky = 0;
358       else
359 	ky = (1 - leny) * incy;
360 
361       /* No extra-precision needed for alpha = 0 */
362       if (alpha_i == 0.0) {
363 	if (beta_i == 0.0) {
364 	  iy = ky;
365 	  for (i = 0; i < leny; i++) {
366 	    y_i[iy] = 0.0;
367 	    iy += incy;
368 	  }
369 	} else if (!(beta_i == 0.0)) {
370 	  iy = ky;
371 	  for (i = 0; i < leny; i++) {
372 	    y_elem = y_i[iy];
373 	    {
374 	      /* Compute double_double = double * double. */
375 	      double a1, a2, b1, b2, con;
376 
377 	      con = y_elem * split;
378 	      a1 = con - y_elem;
379 	      a1 = con - a1;
380 	      a2 = y_elem - a1;
381 	      con = beta_i * split;
382 	      b1 = con - beta_i;
383 	      b1 = con - b1;
384 	      b2 = beta_i - b1;
385 
386 	      head_tmp1 = y_elem * beta_i;
387 	      tail_tmp1 =
388 		(((a1 * b1 - head_tmp1) + a1 * b2) + a2 * b1) + a2 * b2;
389 	    }
390 	    y_i[iy] = head_tmp1;
391 	    iy += incy;
392 	  }
393 	}
394       } else {			/* alpha != 0 */
395 
396 	/* if beta = 0, we can save m multiplies:
397 	   y = alpha*A*head_x + alpha*A*tail_x  */
398 	if (beta_i == 0.0) {
399 	  if (alpha_i == 1.0) {
400 	    /* save m more multiplies if alpha = 1 */
401 	    ai = 0;
402 	    iy = ky;
403 	    for (i = 0; i < leny; i++) {
404 	      head_sum = tail_sum = 0.0;
405 	      head_sum2 = tail_sum2 = 0.0;
406 	      aij = ai;
407 	      jx = kx;
408 	      for (j = 0; j < lenx; j++) {
409 		a_elem = a_i[aij];
410 
411 		x_elem = head_x_i[jx];
412 		{
413 		  double dt = (double) x_elem;
414 		  {
415 		    /* Compute double_double = double * double. */
416 		    double a1, a2, b1, b2, con;
417 
418 		    con = a_elem * split;
419 		    a1 = con - a_elem;
420 		    a1 = con - a1;
421 		    a2 = a_elem - a1;
422 		    con = dt * split;
423 		    b1 = con - dt;
424 		    b1 = con - b1;
425 		    b2 = dt - b1;
426 
427 		    head_prod = a_elem * dt;
428 		    tail_prod =
429 		      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
430 		  }
431 		}
432 		{
433 		  /* Compute double-double = double-double + double-double. */
434 		  double bv;
435 		  double s1, s2, t1, t2;
436 
437 		  /* Add two hi words. */
438 		  s1 = head_sum + head_prod;
439 		  bv = s1 - head_sum;
440 		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
441 
442 		  /* Add two lo words. */
443 		  t1 = tail_sum + tail_prod;
444 		  bv = t1 - tail_sum;
445 		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
446 
447 		  s2 += t1;
448 
449 		  /* Renormalize (s1, s2)  to  (t1, s2) */
450 		  t1 = s1 + s2;
451 		  s2 = s2 - (t1 - s1);
452 
453 		  t2 += s2;
454 
455 		  /* Renormalize (t1, t2)  */
456 		  head_sum = t1 + t2;
457 		  tail_sum = t2 - (head_sum - t1);
458 		}
459 		x_elem = tail_x_i[jx];
460 		{
461 		  double dt = (double) x_elem;
462 		  {
463 		    /* Compute double_double = double * double. */
464 		    double a1, a2, b1, b2, con;
465 
466 		    con = a_elem * split;
467 		    a1 = con - a_elem;
468 		    a1 = con - a1;
469 		    a2 = a_elem - a1;
470 		    con = dt * split;
471 		    b1 = con - dt;
472 		    b1 = con - b1;
473 		    b2 = dt - b1;
474 
475 		    head_prod = a_elem * dt;
476 		    tail_prod =
477 		      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
478 		  }
479 		}
480 		{
481 		  /* Compute double-double = double-double + double-double. */
482 		  double bv;
483 		  double s1, s2, t1, t2;
484 
485 		  /* Add two hi words. */
486 		  s1 = head_sum2 + head_prod;
487 		  bv = s1 - head_sum2;
488 		  s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));
489 
490 		  /* Add two lo words. */
491 		  t1 = tail_sum2 + tail_prod;
492 		  bv = t1 - tail_sum2;
493 		  t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));
494 
495 		  s2 += t1;
496 
497 		  /* Renormalize (s1, s2)  to  (t1, s2) */
498 		  t1 = s1 + s2;
499 		  s2 = s2 - (t1 - s1);
500 
501 		  t2 += s2;
502 
503 		  /* Renormalize (t1, t2)  */
504 		  head_sum2 = t1 + t2;
505 		  tail_sum2 = t2 - (head_sum2 - t1);
506 		}
507 		aij += incaij;
508 		jx += incx;
509 	      }
510 	      {
511 		/* Compute double-double = double-double + double-double. */
512 		double bv;
513 		double s1, s2, t1, t2;
514 
515 		/* Add two hi words. */
516 		s1 = head_sum + head_sum2;
517 		bv = s1 - head_sum;
518 		s2 = ((head_sum2 - bv) + (head_sum - (s1 - bv)));
519 
520 		/* Add two lo words. */
521 		t1 = tail_sum + tail_sum2;
522 		bv = t1 - tail_sum;
523 		t2 = ((tail_sum2 - bv) + (tail_sum - (t1 - bv)));
524 
525 		s2 += t1;
526 
527 		/* Renormalize (s1, s2)  to  (t1, s2) */
528 		t1 = s1 + s2;
529 		s2 = s2 - (t1 - s1);
530 
531 		t2 += s2;
532 
533 		/* Renormalize (t1, t2)  */
534 		head_sum = t1 + t2;
535 		tail_sum = t2 - (head_sum - t1);
536 	      }
537 	      y_i[iy] = head_sum;
538 	      ai += incai;
539 	      iy += incy;
540 	    }			/* end for */
541 	  } else {		/* alpha != 1 */
542 	    ai = 0;
543 	    iy = ky;
544 	    for (i = 0; i < leny; i++) {
545 	      head_sum = tail_sum = 0.0;
546 	      head_sum2 = tail_sum2 = 0.0;
547 	      aij = ai;
548 	      jx = kx;
549 	      for (j = 0; j < lenx; j++) {
550 		a_elem = a_i[aij];
551 
552 		x_elem = head_x_i[jx];
553 		{
554 		  double dt = (double) x_elem;
555 		  {
556 		    /* Compute double_double = double * double. */
557 		    double a1, a2, b1, b2, con;
558 
559 		    con = a_elem * split;
560 		    a1 = con - a_elem;
561 		    a1 = con - a1;
562 		    a2 = a_elem - a1;
563 		    con = dt * split;
564 		    b1 = con - dt;
565 		    b1 = con - b1;
566 		    b2 = dt - b1;
567 
568 		    head_prod = a_elem * dt;
569 		    tail_prod =
570 		      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
571 		  }
572 		}
573 		{
574 		  /* Compute double-double = double-double + double-double. */
575 		  double bv;
576 		  double s1, s2, t1, t2;
577 
578 		  /* Add two hi words. */
579 		  s1 = head_sum + head_prod;
580 		  bv = s1 - head_sum;
581 		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
582 
583 		  /* Add two lo words. */
584 		  t1 = tail_sum + tail_prod;
585 		  bv = t1 - tail_sum;
586 		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
587 
588 		  s2 += t1;
589 
590 		  /* Renormalize (s1, s2)  to  (t1, s2) */
591 		  t1 = s1 + s2;
592 		  s2 = s2 - (t1 - s1);
593 
594 		  t2 += s2;
595 
596 		  /* Renormalize (t1, t2)  */
597 		  head_sum = t1 + t2;
598 		  tail_sum = t2 - (head_sum - t1);
599 		}
600 		x_elem = tail_x_i[jx];
601 		{
602 		  double dt = (double) x_elem;
603 		  {
604 		    /* Compute double_double = double * double. */
605 		    double a1, a2, b1, b2, con;
606 
607 		    con = a_elem * split;
608 		    a1 = con - a_elem;
609 		    a1 = con - a1;
610 		    a2 = a_elem - a1;
611 		    con = dt * split;
612 		    b1 = con - dt;
613 		    b1 = con - b1;
614 		    b2 = dt - b1;
615 
616 		    head_prod = a_elem * dt;
617 		    tail_prod =
618 		      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
619 		  }
620 		}
621 		{
622 		  /* Compute double-double = double-double + double-double. */
623 		  double bv;
624 		  double s1, s2, t1, t2;
625 
626 		  /* Add two hi words. */
627 		  s1 = head_sum2 + head_prod;
628 		  bv = s1 - head_sum2;
629 		  s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));
630 
631 		  /* Add two lo words. */
632 		  t1 = tail_sum2 + tail_prod;
633 		  bv = t1 - tail_sum2;
634 		  t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));
635 
636 		  s2 += t1;
637 
638 		  /* Renormalize (s1, s2)  to  (t1, s2) */
639 		  t1 = s1 + s2;
640 		  s2 = s2 - (t1 - s1);
641 
642 		  t2 += s2;
643 
644 		  /* Renormalize (t1, t2)  */
645 		  head_sum2 = t1 + t2;
646 		  tail_sum2 = t2 - (head_sum2 - t1);
647 		}
648 		aij += incaij;
649 		jx += incx;
650 	      }
651 	      {
652 		/* Compute double-double = double-double * double. */
653 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
654 
655 		con = head_sum * split;
656 		a11 = con - head_sum;
657 		a11 = con - a11;
658 		a21 = head_sum - a11;
659 		con = alpha_i * split;
660 		b1 = con - alpha_i;
661 		b1 = con - b1;
662 		b2 = alpha_i - b1;
663 
664 		c11 = head_sum * alpha_i;
665 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
666 
667 		c2 = tail_sum * alpha_i;
668 		t1 = c11 + c2;
669 		t2 = (c2 - (t1 - c11)) + c21;
670 
671 		head_tmp1 = t1 + t2;
672 		tail_tmp1 = t2 - (head_tmp1 - t1);
673 	      }
674 	      {
675 		/* Compute double-double = double-double * double. */
676 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
677 
678 		con = head_sum2 * split;
679 		a11 = con - head_sum2;
680 		a11 = con - a11;
681 		a21 = head_sum2 - a11;
682 		con = alpha_i * split;
683 		b1 = con - alpha_i;
684 		b1 = con - b1;
685 		b2 = alpha_i - b1;
686 
687 		c11 = head_sum2 * alpha_i;
688 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
689 
690 		c2 = tail_sum2 * alpha_i;
691 		t1 = c11 + c2;
692 		t2 = (c2 - (t1 - c11)) + c21;
693 
694 		head_tmp2 = t1 + t2;
695 		tail_tmp2 = t2 - (head_tmp2 - t1);
696 	      }
697 	      {
698 		/* Compute double-double = double-double + double-double. */
699 		double bv;
700 		double s1, s2, t1, t2;
701 
702 		/* Add two hi words. */
703 		s1 = head_tmp1 + head_tmp2;
704 		bv = s1 - head_tmp1;
705 		s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
706 
707 		/* Add two lo words. */
708 		t1 = tail_tmp1 + tail_tmp2;
709 		bv = t1 - tail_tmp1;
710 		t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
711 
712 		s2 += t1;
713 
714 		/* Renormalize (s1, s2)  to  (t1, s2) */
715 		t1 = s1 + s2;
716 		s2 = s2 - (t1 - s1);
717 
718 		t2 += s2;
719 
720 		/* Renormalize (t1, t2)  */
721 		head_tmp1 = t1 + t2;
722 		tail_tmp1 = t2 - (head_tmp1 - t1);
723 	      }
724 	      y_i[iy] = head_tmp1;
725 	      ai += incai;
726 	      iy += incy;
727 	    }
728 	  }
729 	} else {		/* beta != 0 */
730 	  if (alpha_i == 1.0) {
731 	    /* save m multiplies if alpha = 1 */
732 	    ai = 0;
733 	    iy = ky;
734 	    for (i = 0; i < leny; i++) {
735 	      head_sum = tail_sum = 0.0;;
736 	      head_sum2 = tail_sum2 = 0.0;;
737 	      aij = ai;
738 	      jx = kx;
739 	      for (j = 0; j < lenx; j++) {
740 		a_elem = a_i[aij];
741 
742 		x_elem = head_x_i[jx];
743 		{
744 		  double dt = (double) x_elem;
745 		  {
746 		    /* Compute double_double = double * double. */
747 		    double a1, a2, b1, b2, con;
748 
749 		    con = a_elem * split;
750 		    a1 = con - a_elem;
751 		    a1 = con - a1;
752 		    a2 = a_elem - a1;
753 		    con = dt * split;
754 		    b1 = con - dt;
755 		    b1 = con - b1;
756 		    b2 = dt - b1;
757 
758 		    head_prod = a_elem * dt;
759 		    tail_prod =
760 		      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
761 		  }
762 		}
763 		{
764 		  /* Compute double-double = double-double + double-double. */
765 		  double bv;
766 		  double s1, s2, t1, t2;
767 
768 		  /* Add two hi words. */
769 		  s1 = head_sum + head_prod;
770 		  bv = s1 - head_sum;
771 		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
772 
773 		  /* Add two lo words. */
774 		  t1 = tail_sum + tail_prod;
775 		  bv = t1 - tail_sum;
776 		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
777 
778 		  s2 += t1;
779 
780 		  /* Renormalize (s1, s2)  to  (t1, s2) */
781 		  t1 = s1 + s2;
782 		  s2 = s2 - (t1 - s1);
783 
784 		  t2 += s2;
785 
786 		  /* Renormalize (t1, t2)  */
787 		  head_sum = t1 + t2;
788 		  tail_sum = t2 - (head_sum - t1);
789 		}
790 		x_elem = tail_x_i[jx];
791 		{
792 		  double dt = (double) x_elem;
793 		  {
794 		    /* Compute double_double = double * double. */
795 		    double a1, a2, b1, b2, con;
796 
797 		    con = a_elem * split;
798 		    a1 = con - a_elem;
799 		    a1 = con - a1;
800 		    a2 = a_elem - a1;
801 		    con = dt * split;
802 		    b1 = con - dt;
803 		    b1 = con - b1;
804 		    b2 = dt - b1;
805 
806 		    head_prod = a_elem * dt;
807 		    tail_prod =
808 		      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
809 		  }
810 		}
811 		{
812 		  /* Compute double-double = double-double + double-double. */
813 		  double bv;
814 		  double s1, s2, t1, t2;
815 
816 		  /* Add two hi words. */
817 		  s1 = head_sum2 + head_prod;
818 		  bv = s1 - head_sum2;
819 		  s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));
820 
821 		  /* Add two lo words. */
822 		  t1 = tail_sum2 + tail_prod;
823 		  bv = t1 - tail_sum2;
824 		  t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));
825 
826 		  s2 += t1;
827 
828 		  /* Renormalize (s1, s2)  to  (t1, s2) */
829 		  t1 = s1 + s2;
830 		  s2 = s2 - (t1 - s1);
831 
832 		  t2 += s2;
833 
834 		  /* Renormalize (t1, t2)  */
835 		  head_sum2 = t1 + t2;
836 		  tail_sum2 = t2 - (head_sum2 - t1);
837 		}
838 		aij += incaij;
839 		jx += incx;
840 	      }
841 	      {
842 		/* Compute double-double = double-double + double-double. */
843 		double bv;
844 		double s1, s2, t1, t2;
845 
846 		/* Add two hi words. */
847 		s1 = head_sum + head_sum2;
848 		bv = s1 - head_sum;
849 		s2 = ((head_sum2 - bv) + (head_sum - (s1 - bv)));
850 
851 		/* Add two lo words. */
852 		t1 = tail_sum + tail_sum2;
853 		bv = t1 - tail_sum;
854 		t2 = ((tail_sum2 - bv) + (tail_sum - (t1 - bv)));
855 
856 		s2 += t1;
857 
858 		/* Renormalize (s1, s2)  to  (t1, s2) */
859 		t1 = s1 + s2;
860 		s2 = s2 - (t1 - s1);
861 
862 		t2 += s2;
863 
864 		/* Renormalize (t1, t2)  */
865 		head_sum = t1 + t2;
866 		tail_sum = t2 - (head_sum - t1);
867 	      }
868 	      y_elem = y_i[iy];
869 	      {
870 		/* Compute double_double = double * double. */
871 		double a1, a2, b1, b2, con;
872 
873 		con = y_elem * split;
874 		a1 = con - y_elem;
875 		a1 = con - a1;
876 		a2 = y_elem - a1;
877 		con = beta_i * split;
878 		b1 = con - beta_i;
879 		b1 = con - b1;
880 		b2 = beta_i - b1;
881 
882 		head_tmp1 = y_elem * beta_i;
883 		tail_tmp1 =
884 		  (((a1 * b1 - head_tmp1) + a1 * b2) + a2 * b1) + a2 * b2;
885 	      }
886 	      {
887 		/* Compute double-double = double-double + double-double. */
888 		double bv;
889 		double s1, s2, t1, t2;
890 
891 		/* Add two hi words. */
892 		s1 = head_sum + head_tmp1;
893 		bv = s1 - head_sum;
894 		s2 = ((head_tmp1 - bv) + (head_sum - (s1 - bv)));
895 
896 		/* Add two lo words. */
897 		t1 = tail_sum + tail_tmp1;
898 		bv = t1 - tail_sum;
899 		t2 = ((tail_tmp1 - bv) + (tail_sum - (t1 - bv)));
900 
901 		s2 += t1;
902 
903 		/* Renormalize (s1, s2)  to  (t1, s2) */
904 		t1 = s1 + s2;
905 		s2 = s2 - (t1 - s1);
906 
907 		t2 += s2;
908 
909 		/* Renormalize (t1, t2)  */
910 		head_tmp2 = t1 + t2;
911 		tail_tmp2 = t2 - (head_tmp2 - t1);
912 	      }
913 	      y_i[iy] = head_tmp2;
914 	      ai += incai;
915 	      iy += incy;
916 	    }
917 	  } else {		/* alpha != 1, the most general form:
918 				   y = alpha*A*head_x + alpha*A*tail_x + beta*y */
919 	    ai = 0;
920 	    iy = ky;
921 	    for (i = 0; i < leny; i++) {
922 	      head_sum = tail_sum = 0.0;;
923 	      head_sum2 = tail_sum2 = 0.0;;
924 	      aij = ai;
925 	      jx = kx;
926 	      for (j = 0; j < lenx; j++) {
927 		a_elem = a_i[aij];
928 
929 		x_elem = head_x_i[jx];
930 		{
931 		  double dt = (double) x_elem;
932 		  {
933 		    /* Compute double_double = double * double. */
934 		    double a1, a2, b1, b2, con;
935 
936 		    con = a_elem * split;
937 		    a1 = con - a_elem;
938 		    a1 = con - a1;
939 		    a2 = a_elem - a1;
940 		    con = dt * split;
941 		    b1 = con - dt;
942 		    b1 = con - b1;
943 		    b2 = dt - b1;
944 
945 		    head_prod = a_elem * dt;
946 		    tail_prod =
947 		      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
948 		  }
949 		}
950 		{
951 		  /* Compute double-double = double-double + double-double. */
952 		  double bv;
953 		  double s1, s2, t1, t2;
954 
955 		  /* Add two hi words. */
956 		  s1 = head_sum + head_prod;
957 		  bv = s1 - head_sum;
958 		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));
959 
960 		  /* Add two lo words. */
961 		  t1 = tail_sum + tail_prod;
962 		  bv = t1 - tail_sum;
963 		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));
964 
965 		  s2 += t1;
966 
967 		  /* Renormalize (s1, s2)  to  (t1, s2) */
968 		  t1 = s1 + s2;
969 		  s2 = s2 - (t1 - s1);
970 
971 		  t2 += s2;
972 
973 		  /* Renormalize (t1, t2)  */
974 		  head_sum = t1 + t2;
975 		  tail_sum = t2 - (head_sum - t1);
976 		}
977 		x_elem = tail_x_i[jx];
978 		{
979 		  double dt = (double) x_elem;
980 		  {
981 		    /* Compute double_double = double * double. */
982 		    double a1, a2, b1, b2, con;
983 
984 		    con = a_elem * split;
985 		    a1 = con - a_elem;
986 		    a1 = con - a1;
987 		    a2 = a_elem - a1;
988 		    con = dt * split;
989 		    b1 = con - dt;
990 		    b1 = con - b1;
991 		    b2 = dt - b1;
992 
993 		    head_prod = a_elem * dt;
994 		    tail_prod =
995 		      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
996 		  }
997 		}
998 		{
999 		  /* Compute double-double = double-double + double-double. */
1000 		  double bv;
1001 		  double s1, s2, t1, t2;
1002 
1003 		  /* Add two hi words. */
1004 		  s1 = head_sum2 + head_prod;
1005 		  bv = s1 - head_sum2;
1006 		  s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));
1007 
1008 		  /* Add two lo words. */
1009 		  t1 = tail_sum2 + tail_prod;
1010 		  bv = t1 - tail_sum2;
1011 		  t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));
1012 
1013 		  s2 += t1;
1014 
1015 		  /* Renormalize (s1, s2)  to  (t1, s2) */
1016 		  t1 = s1 + s2;
1017 		  s2 = s2 - (t1 - s1);
1018 
1019 		  t2 += s2;
1020 
1021 		  /* Renormalize (t1, t2)  */
1022 		  head_sum2 = t1 + t2;
1023 		  tail_sum2 = t2 - (head_sum2 - t1);
1024 		}
1025 		aij += incaij;
1026 		jx += incx;
1027 	      }
1028 	      {
1029 		/* Compute double-double = double-double * double. */
1030 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1031 
1032 		con = head_sum * split;
1033 		a11 = con - head_sum;
1034 		a11 = con - a11;
1035 		a21 = head_sum - a11;
1036 		con = alpha_i * split;
1037 		b1 = con - alpha_i;
1038 		b1 = con - b1;
1039 		b2 = alpha_i - b1;
1040 
1041 		c11 = head_sum * alpha_i;
1042 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1043 
1044 		c2 = tail_sum * alpha_i;
1045 		t1 = c11 + c2;
1046 		t2 = (c2 - (t1 - c11)) + c21;
1047 
1048 		head_tmp1 = t1 + t2;
1049 		tail_tmp1 = t2 - (head_tmp1 - t1);
1050 	      }
1051 	      {
1052 		/* Compute double-double = double-double * double. */
1053 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1054 
1055 		con = head_sum2 * split;
1056 		a11 = con - head_sum2;
1057 		a11 = con - a11;
1058 		a21 = head_sum2 - a11;
1059 		con = alpha_i * split;
1060 		b1 = con - alpha_i;
1061 		b1 = con - b1;
1062 		b2 = alpha_i - b1;
1063 
1064 		c11 = head_sum2 * alpha_i;
1065 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1066 
1067 		c2 = tail_sum2 * alpha_i;
1068 		t1 = c11 + c2;
1069 		t2 = (c2 - (t1 - c11)) + c21;
1070 
1071 		head_tmp2 = t1 + t2;
1072 		tail_tmp2 = t2 - (head_tmp2 - t1);
1073 	      }
1074 	      {
1075 		/* Compute double-double = double-double + double-double. */
1076 		double bv;
1077 		double s1, s2, t1, t2;
1078 
1079 		/* Add two hi words. */
1080 		s1 = head_tmp1 + head_tmp2;
1081 		bv = s1 - head_tmp1;
1082 		s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
1083 
1084 		/* Add two lo words. */
1085 		t1 = tail_tmp1 + tail_tmp2;
1086 		bv = t1 - tail_tmp1;
1087 		t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
1088 
1089 		s2 += t1;
1090 
1091 		/* Renormalize (s1, s2)  to  (t1, s2) */
1092 		t1 = s1 + s2;
1093 		s2 = s2 - (t1 - s1);
1094 
1095 		t2 += s2;
1096 
1097 		/* Renormalize (t1, t2)  */
1098 		head_tmp1 = t1 + t2;
1099 		tail_tmp1 = t2 - (head_tmp1 - t1);
1100 	      }
1101 	      y_elem = y_i[iy];
1102 	      {
1103 		/* Compute double_double = double * double. */
1104 		double a1, a2, b1, b2, con;
1105 
1106 		con = y_elem * split;
1107 		a1 = con - y_elem;
1108 		a1 = con - a1;
1109 		a2 = y_elem - a1;
1110 		con = beta_i * split;
1111 		b1 = con - beta_i;
1112 		b1 = con - b1;
1113 		b2 = beta_i - b1;
1114 
1115 		head_tmp2 = y_elem * beta_i;
1116 		tail_tmp2 =
1117 		  (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
1118 	      }
1119 	      {
1120 		/* Compute double-double = double-double + double-double. */
1121 		double bv;
1122 		double s1, s2, t1, t2;
1123 
1124 		/* Add two hi words. */
1125 		s1 = head_tmp1 + head_tmp2;
1126 		bv = s1 - head_tmp1;
1127 		s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
1128 
1129 		/* Add two lo words. */
1130 		t1 = tail_tmp1 + tail_tmp2;
1131 		bv = t1 - tail_tmp1;
1132 		t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
1133 
1134 		s2 += t1;
1135 
1136 		/* Renormalize (s1, s2)  to  (t1, s2) */
1137 		t1 = s1 + s2;
1138 		s2 = s2 - (t1 - s1);
1139 
1140 		t2 += s2;
1141 
1142 		/* Renormalize (t1, t2)  */
1143 		head_tmp1 = t1 + t2;
1144 		tail_tmp1 = t2 - (head_tmp1 - t1);
1145 	      }
1146 	      y_i[iy] = head_tmp1;
1147 	      ai += incai;
1148 	      iy += incy;
1149 	    }
1150 	  }
1151 	}
1152 
1153       }
1154 
1155       FPU_FIX_STOP;
1156     }
1157     break;
1158   }
1159 }
1160