1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_cge_sum_mv_s_s_x(enum blas_order_type order,int m,int n,const void * alpha,const float * a,int lda,const float * x,int incx,const void * beta,const float * b,int ldb,void * y,int incy,enum blas_prec_type prec)3 void BLAS_cge_sum_mv_s_s_x(enum blas_order_type order, int m, int n,
4 			   const void *alpha, const float *a, int lda,
5 			   const float *x, int incx,
6 			   const void *beta, const float *b, int ldb,
7 			   void *y, int incy, enum blas_prec_type prec)
8 
9 /*
10  * Purpose
11  * =======
12  *
13  * Computes y = alpha * A * x + beta * B * y,
14  *     where A, B are general matricies.
15  *
16  * Arguments
17  * =========
18  *
19  * order        (input) blas_order_type
20  *              Order of A; row or column major
21  *
22  * m            (input) int
23  *              Row Dimension of A, B, length of output vector y
24  *
25  * n            (input) int
26  *              Column Dimension of A, B and the length of vector x
27  *
28  * alpha        (input) const void*
29  *
30  * A            (input) const float*
31  *
32  * lda          (input) int
33  *              Leading dimension of A
34  *
35  * x            (input) const float*
36  *
37  * incx         (input) int
38  *              The stride for vector x.
39  *
40  * beta         (input) const void*
41  *
42  * b            (input) const float*
43  *
44  * ldb          (input) int
45  *              Leading dimension of B
46  *
47  * y            (input/output) void*
48  *
49  * incy         (input) int
50  *              The stride for vector y.
51  *
52  * prec   (input) enum blas_prec_type
53  *        Specifies the internal precision to be used.
54  *        = blas_prec_single: single precision.
55  *        = blas_prec_double: double precision.
56  *        = blas_prec_extra : anything at least 1.5 times as accurate
57  *                            than double, and wider than 80-bits.
58  *                            We use double-double in our implementation.
59  *
60  */
61 {
62   /* Routine name */
63   static const char routine_name[] = "BLAS_cge_sum_mv_s_s";
64   switch (prec) {
65   case blas_prec_single:{
66       int i, j;
67       int xi, yi;
68       int x_starti, y_starti, incxi, incyi;
69       int lda_min;
70       int ai;
71       int incai;
72       int aij;
73       int incaij;
74       int bi;
75       int incbi;
76       int bij;
77       int incbij;
78 
79       const float *a_i = a;
80       const float *b_i = b;
81       const float *x_i = x;
82       float *y_i = (float *) y;
83       float *alpha_i = (float *) alpha;
84       float *beta_i = (float *) beta;
85       float a_elem;
86       float b_elem;
87       float x_elem;
88       float prod;
89       float sumA;
90       float sumB;
91       float tmp1[2];
92       float tmp2[2];
93 
94 
95 
96       /* m is number of rows */
97       /* n is number of columns */
98 
99       if (m == 0 || n == 0)
100 	return;
101 
102 
103       /* all error calls */
104       if (order == blas_rowmajor) {
105 	lda_min = n;
106 	incai = lda;		/* row stride */
107 	incbi = ldb;
108 	incbij = incaij = 1;	/* column stride */
109       } else if (order == blas_colmajor) {
110 	lda_min = m;
111 	incai = incbi = 1;	/*row stride */
112 	incaij = lda;		/* column stride */
113 	incbij = ldb;
114       } else {
115 	/* error, order not blas_colmajor not blas_rowmajor */
116 	BLAS_error(routine_name, -1, order, 0);
117 	return;
118       }
119 
120       if (m < 0)
121 	BLAS_error(routine_name, -2, m, 0);
122       else if (n < 0)
123 	BLAS_error(routine_name, -3, n, 0);
124       if (lda < lda_min)
125 	BLAS_error(routine_name, -6, lda, 0);
126       else if (ldb < lda_min)
127 	BLAS_error(routine_name, -11, ldb, 0);
128       else if (incx == 0)
129 	BLAS_error(routine_name, -8, incx, 0);
130       else if (incy == 0)
131 	BLAS_error(routine_name, -13, incy, 0);
132 
133       incxi = incx;
134       incyi = incy;
135 
136       incyi *= 2;
137 
138 
139 
140 
141 
142       if (incxi > 0)
143 	x_starti = 0;
144       else
145 	x_starti = (1 - n) * incxi;
146 
147       if (incyi > 0)
148 	y_starti = 0;
149       else
150 	y_starti = (1 - m) * incyi;
151 
152 
153 
154       if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
155 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
156 	  /* alpha, beta are 0.0 */
157 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
158 	    y_i[yi] = 0.0;
159 	    y_i[yi + 1] = 0.0;
160 	  }
161 	} else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
162 	  /* alpha is 0.0, beta is 1.0 */
163 
164 
165 	  bi = 0;
166 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
167 
168 	    sumB = 0.0;
169 	    bij = bi;
170 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
171 	      x_elem = x_i[xi];
172 
173 	      b_elem = b_i[bij];
174 	      prod = b_elem * x_elem;
175 	      sumB = sumB + prod;
176 	      bij += incbij;
177 	    }
178 	    /* now put the result into y_i */
179 	    tmp1[0] = sumB;
180 	    tmp1[1] = 0.0;
181 	    y_i[yi] = tmp1[0];
182 	    y_i[yi + 1] = tmp1[1];
183 
184 	    bi += incbi;
185 	  }
186 	} else {
187 	  /* alpha is 0.0, beta not 1.0 nor 0.0 */
188 
189 
190 	  bi = 0;
191 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
192 
193 	    sumB = 0.0;
194 	    bij = bi;
195 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
196 	      x_elem = x_i[xi];
197 
198 	      b_elem = b_i[bij];
199 	      prod = b_elem * x_elem;
200 	      sumB = sumB + prod;
201 	      bij += incbij;
202 	    }
203 	    /* now put the result into y_i */
204 	    {
205 	      tmp1[0] = beta_i[0] * sumB;
206 	      tmp1[1] = beta_i[1] * sumB;
207 	    }
208 	    y_i[yi] = tmp1[0];
209 	    y_i[yi + 1] = tmp1[1];
210 
211 	    bi += incbi;
212 	  }
213 	}
214       } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
215 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
216 	  /* alpha is 1.0, beta is 0.0 */
217 
218 	  ai = 0;
219 
220 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
221 	    sumA = 0.0;
222 	    aij = ai;
223 
224 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
225 	      x_elem = x_i[xi];
226 	      a_elem = a_i[aij];
227 	      prod = a_elem * x_elem;
228 	      sumA = sumA + prod;
229 	      aij += incaij;
230 
231 	    }
232 	    /* now put the result into y_i */
233 	    tmp1[0] = sumA;
234 	    tmp1[1] = 0.0;
235 	    y_i[yi] = tmp1[0];
236 	    y_i[yi + 1] = tmp1[1];
237 	    ai += incai;
238 
239 	  }
240 	} else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
241 	  /* alpha is 1.0, beta is 1.0 */
242 
243 	  ai = 0;
244 	  bi = 0;
245 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
246 	    sumA = 0.0;
247 	    aij = ai;
248 	    sumB = 0.0;
249 	    bij = bi;
250 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
251 	      x_elem = x_i[xi];
252 	      a_elem = a_i[aij];
253 	      prod = a_elem * x_elem;
254 	      sumA = sumA + prod;
255 	      aij += incaij;
256 	      b_elem = b_i[bij];
257 	      prod = b_elem * x_elem;
258 	      sumB = sumB + prod;
259 	      bij += incbij;
260 	    }
261 	    /* now put the result into y_i */
262 	    tmp1[0] = sumA;
263 	    tmp1[1] = 0.0;
264 	    tmp2[0] = sumB;
265 	    tmp2[1] = 0.0;
266 	    tmp1[0] = tmp1[0] + tmp2[0];
267 	    tmp1[1] = tmp1[1] + tmp2[1];
268 	    y_i[yi] = tmp1[0];
269 	    y_i[yi + 1] = tmp1[1];
270 	    ai += incai;
271 	    bi += incbi;
272 	  }
273 	} else {
274 	  /* alpha is 1.0, beta is other */
275 
276 	  ai = 0;
277 	  bi = 0;
278 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
279 	    sumA = 0.0;
280 	    aij = ai;
281 	    sumB = 0.0;
282 	    bij = bi;
283 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
284 	      x_elem = x_i[xi];
285 	      a_elem = a_i[aij];
286 	      prod = a_elem * x_elem;
287 	      sumA = sumA + prod;
288 	      aij += incaij;
289 	      b_elem = b_i[bij];
290 	      prod = b_elem * x_elem;
291 	      sumB = sumB + prod;
292 	      bij += incbij;
293 	    }
294 	    /* now put the result into y_i */
295 	    tmp1[0] = sumA;
296 	    tmp1[1] = 0.0;
297 	    {
298 	      tmp2[0] = beta_i[0] * sumB;
299 	      tmp2[1] = beta_i[1] * sumB;
300 	    }
301 	    tmp1[0] = tmp1[0] + tmp2[0];
302 	    tmp1[1] = tmp1[1] + tmp2[1];
303 	    y_i[yi] = tmp1[0];
304 	    y_i[yi + 1] = tmp1[1];
305 	    ai += incai;
306 	    bi += incbi;
307 	  }
308 	}
309       } else {
310 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
311 	  /* alpha is other, beta is 0.0 */
312 
313 	  ai = 0;
314 
315 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
316 	    sumA = 0.0;
317 	    aij = ai;
318 
319 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
320 	      x_elem = x_i[xi];
321 	      a_elem = a_i[aij];
322 	      prod = a_elem * x_elem;
323 	      sumA = sumA + prod;
324 	      aij += incaij;
325 
326 	    }
327 	    /* now put the result into y_i */
328 	    {
329 	      tmp1[0] = alpha_i[0] * sumA;
330 	      tmp1[1] = alpha_i[1] * sumA;
331 	    }
332 	    y_i[yi] = tmp1[0];
333 	    y_i[yi + 1] = tmp1[1];
334 	    ai += incai;
335 
336 	  }
337 	} else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
338 	  /* alpha is other, beta is 1.0 */
339 
340 	  ai = 0;
341 	  bi = 0;
342 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
343 	    sumA = 0.0;
344 	    aij = ai;
345 	    sumB = 0.0;
346 	    bij = bi;
347 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
348 	      x_elem = x_i[xi];
349 	      a_elem = a_i[aij];
350 	      prod = a_elem * x_elem;
351 	      sumA = sumA + prod;
352 	      aij += incaij;
353 	      b_elem = b_i[bij];
354 	      prod = b_elem * x_elem;
355 	      sumB = sumB + prod;
356 	      bij += incbij;
357 	    }
358 	    /* now put the result into y_i */
359 	    {
360 	      tmp1[0] = alpha_i[0] * sumA;
361 	      tmp1[1] = alpha_i[1] * sumA;
362 	    }
363 	    tmp2[0] = sumB;
364 	    tmp2[1] = 0.0;
365 	    tmp1[0] = tmp1[0] + tmp2[0];
366 	    tmp1[1] = tmp1[1] + tmp2[1];
367 	    y_i[yi] = tmp1[0];
368 	    y_i[yi + 1] = tmp1[1];
369 	    ai += incai;
370 	    bi += incbi;
371 	  }
372 	} else {
373 	  /* most general form, alpha, beta are other */
374 
375 	  ai = 0;
376 	  bi = 0;
377 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
378 	    sumA = 0.0;
379 	    aij = ai;
380 	    sumB = 0.0;
381 	    bij = bi;
382 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
383 	      x_elem = x_i[xi];
384 	      a_elem = a_i[aij];
385 	      prod = a_elem * x_elem;
386 	      sumA = sumA + prod;
387 	      aij += incaij;
388 	      b_elem = b_i[bij];
389 	      prod = b_elem * x_elem;
390 	      sumB = sumB + prod;
391 	      bij += incbij;
392 	    }
393 	    /* now put the result into y_i */
394 	    {
395 	      tmp1[0] = alpha_i[0] * sumA;
396 	      tmp1[1] = alpha_i[1] * sumA;
397 	    }
398 	    {
399 	      tmp2[0] = beta_i[0] * sumB;
400 	      tmp2[1] = beta_i[1] * sumB;
401 	    }
402 	    tmp1[0] = tmp1[0] + tmp2[0];
403 	    tmp1[1] = tmp1[1] + tmp2[1];
404 	    y_i[yi] = tmp1[0];
405 	    y_i[yi + 1] = tmp1[1];
406 	    ai += incai;
407 	    bi += incbi;
408 	  }
409 	}
410       }
411 
412 
413       break;
414     }
415   case blas_prec_indigenous:
416   case blas_prec_double:
417     {
418       int i, j;
419       int xi, yi;
420       int x_starti, y_starti, incxi, incyi;
421       int lda_min;
422       int ai;
423       int incai;
424       int aij;
425       int incaij;
426       int bi;
427       int incbi;
428       int bij;
429       int incbij;
430 
431       const float *a_i = a;
432       const float *b_i = b;
433       const float *x_i = x;
434       float *y_i = (float *) y;
435       float *alpha_i = (float *) alpha;
436       float *beta_i = (float *) beta;
437       float a_elem;
438       float b_elem;
439       float x_elem;
440       double prod;
441       double sumA;
442       double sumB;
443       double tmp1[2];
444       double tmp2[2];
445 
446 
447 
448       /* m is number of rows */
449       /* n is number of columns */
450 
451       if (m == 0 || n == 0)
452 	return;
453 
454 
455       /* all error calls */
456       if (order == blas_rowmajor) {
457 	lda_min = n;
458 	incai = lda;		/* row stride */
459 	incbi = ldb;
460 	incbij = incaij = 1;	/* column stride */
461       } else if (order == blas_colmajor) {
462 	lda_min = m;
463 	incai = incbi = 1;	/*row stride */
464 	incaij = lda;		/* column stride */
465 	incbij = ldb;
466       } else {
467 	/* error, order not blas_colmajor not blas_rowmajor */
468 	BLAS_error(routine_name, -1, order, 0);
469 	return;
470       }
471 
472       if (m < 0)
473 	BLAS_error(routine_name, -2, m, 0);
474       else if (n < 0)
475 	BLAS_error(routine_name, -3, n, 0);
476       if (lda < lda_min)
477 	BLAS_error(routine_name, -6, lda, 0);
478       else if (ldb < lda_min)
479 	BLAS_error(routine_name, -11, ldb, 0);
480       else if (incx == 0)
481 	BLAS_error(routine_name, -8, incx, 0);
482       else if (incy == 0)
483 	BLAS_error(routine_name, -13, incy, 0);
484 
485       incxi = incx;
486       incyi = incy;
487 
488       incyi *= 2;
489 
490 
491 
492 
493 
494       if (incxi > 0)
495 	x_starti = 0;
496       else
497 	x_starti = (1 - n) * incxi;
498 
499       if (incyi > 0)
500 	y_starti = 0;
501       else
502 	y_starti = (1 - m) * incyi;
503 
504 
505 
506       if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
507 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
508 	  /* alpha, beta are 0.0 */
509 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
510 	    y_i[yi] = 0.0;
511 	    y_i[yi + 1] = 0.0;
512 	  }
513 	} else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
514 	  /* alpha is 0.0, beta is 1.0 */
515 
516 
517 	  bi = 0;
518 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
519 
520 	    sumB = 0.0;
521 	    bij = bi;
522 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
523 	      x_elem = x_i[xi];
524 
525 	      b_elem = b_i[bij];
526 	      prod = (double) b_elem *x_elem;
527 	      sumB = sumB + prod;
528 	      bij += incbij;
529 	    }
530 	    /* now put the result into y_i */
531 	    tmp1[0] = sumB;
532 	    tmp1[1] = 0.0;
533 	    y_i[yi] = tmp1[0];
534 	    y_i[yi + 1] = tmp1[1];
535 
536 	    bi += incbi;
537 	  }
538 	} else {
539 	  /* alpha is 0.0, beta not 1.0 nor 0.0 */
540 
541 
542 	  bi = 0;
543 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
544 
545 	    sumB = 0.0;
546 	    bij = bi;
547 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
548 	      x_elem = x_i[xi];
549 
550 	      b_elem = b_i[bij];
551 	      prod = (double) b_elem *x_elem;
552 	      sumB = sumB + prod;
553 	      bij += incbij;
554 	    }
555 	    /* now put the result into y_i */
556 	    {
557 	      tmp1[0] = beta_i[0] * sumB;
558 	      tmp1[1] = beta_i[1] * sumB;
559 	    }
560 	    y_i[yi] = tmp1[0];
561 	    y_i[yi + 1] = tmp1[1];
562 
563 	    bi += incbi;
564 	  }
565 	}
566       } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
567 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
568 	  /* alpha is 1.0, beta is 0.0 */
569 
570 	  ai = 0;
571 
572 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
573 	    sumA = 0.0;
574 	    aij = ai;
575 
576 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
577 	      x_elem = x_i[xi];
578 	      a_elem = a_i[aij];
579 	      prod = (double) a_elem *x_elem;
580 	      sumA = sumA + prod;
581 	      aij += incaij;
582 
583 	    }
584 	    /* now put the result into y_i */
585 	    tmp1[0] = sumA;
586 	    tmp1[1] = 0.0;
587 	    y_i[yi] = tmp1[0];
588 	    y_i[yi + 1] = tmp1[1];
589 	    ai += incai;
590 
591 	  }
592 	} else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
593 	  /* alpha is 1.0, beta is 1.0 */
594 
595 	  ai = 0;
596 	  bi = 0;
597 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
598 	    sumA = 0.0;
599 	    aij = ai;
600 	    sumB = 0.0;
601 	    bij = bi;
602 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
603 	      x_elem = x_i[xi];
604 	      a_elem = a_i[aij];
605 	      prod = (double) a_elem *x_elem;
606 	      sumA = sumA + prod;
607 	      aij += incaij;
608 	      b_elem = b_i[bij];
609 	      prod = (double) b_elem *x_elem;
610 	      sumB = sumB + prod;
611 	      bij += incbij;
612 	    }
613 	    /* now put the result into y_i */
614 	    tmp1[0] = sumA;
615 	    tmp1[1] = 0.0;
616 	    tmp2[0] = sumB;
617 	    tmp2[1] = 0.0;
618 	    tmp1[0] = tmp1[0] + tmp2[0];
619 	    tmp1[1] = tmp1[1] + tmp2[1];
620 	    y_i[yi] = tmp1[0];
621 	    y_i[yi + 1] = tmp1[1];
622 	    ai += incai;
623 	    bi += incbi;
624 	  }
625 	} else {
626 	  /* alpha is 1.0, beta is other */
627 
628 	  ai = 0;
629 	  bi = 0;
630 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
631 	    sumA = 0.0;
632 	    aij = ai;
633 	    sumB = 0.0;
634 	    bij = bi;
635 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
636 	      x_elem = x_i[xi];
637 	      a_elem = a_i[aij];
638 	      prod = (double) a_elem *x_elem;
639 	      sumA = sumA + prod;
640 	      aij += incaij;
641 	      b_elem = b_i[bij];
642 	      prod = (double) b_elem *x_elem;
643 	      sumB = sumB + prod;
644 	      bij += incbij;
645 	    }
646 	    /* now put the result into y_i */
647 	    tmp1[0] = sumA;
648 	    tmp1[1] = 0.0;
649 	    {
650 	      tmp2[0] = beta_i[0] * sumB;
651 	      tmp2[1] = beta_i[1] * sumB;
652 	    }
653 	    tmp1[0] = tmp1[0] + tmp2[0];
654 	    tmp1[1] = tmp1[1] + tmp2[1];
655 	    y_i[yi] = tmp1[0];
656 	    y_i[yi + 1] = tmp1[1];
657 	    ai += incai;
658 	    bi += incbi;
659 	  }
660 	}
661       } else {
662 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
663 	  /* alpha is other, beta is 0.0 */
664 
665 	  ai = 0;
666 
667 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
668 	    sumA = 0.0;
669 	    aij = ai;
670 
671 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
672 	      x_elem = x_i[xi];
673 	      a_elem = a_i[aij];
674 	      prod = (double) a_elem *x_elem;
675 	      sumA = sumA + prod;
676 	      aij += incaij;
677 
678 	    }
679 	    /* now put the result into y_i */
680 	    {
681 	      tmp1[0] = alpha_i[0] * sumA;
682 	      tmp1[1] = alpha_i[1] * sumA;
683 	    }
684 	    y_i[yi] = tmp1[0];
685 	    y_i[yi + 1] = tmp1[1];
686 	    ai += incai;
687 
688 	  }
689 	} else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
690 	  /* alpha is other, beta is 1.0 */
691 
692 	  ai = 0;
693 	  bi = 0;
694 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
695 	    sumA = 0.0;
696 	    aij = ai;
697 	    sumB = 0.0;
698 	    bij = bi;
699 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
700 	      x_elem = x_i[xi];
701 	      a_elem = a_i[aij];
702 	      prod = (double) a_elem *x_elem;
703 	      sumA = sumA + prod;
704 	      aij += incaij;
705 	      b_elem = b_i[bij];
706 	      prod = (double) b_elem *x_elem;
707 	      sumB = sumB + prod;
708 	      bij += incbij;
709 	    }
710 	    /* now put the result into y_i */
711 	    {
712 	      tmp1[0] = alpha_i[0] * sumA;
713 	      tmp1[1] = alpha_i[1] * sumA;
714 	    }
715 	    tmp2[0] = sumB;
716 	    tmp2[1] = 0.0;
717 	    tmp1[0] = tmp1[0] + tmp2[0];
718 	    tmp1[1] = tmp1[1] + tmp2[1];
719 	    y_i[yi] = tmp1[0];
720 	    y_i[yi + 1] = tmp1[1];
721 	    ai += incai;
722 	    bi += incbi;
723 	  }
724 	} else {
725 	  /* most general form, alpha, beta are other */
726 
727 	  ai = 0;
728 	  bi = 0;
729 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
730 	    sumA = 0.0;
731 	    aij = ai;
732 	    sumB = 0.0;
733 	    bij = bi;
734 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
735 	      x_elem = x_i[xi];
736 	      a_elem = a_i[aij];
737 	      prod = (double) a_elem *x_elem;
738 	      sumA = sumA + prod;
739 	      aij += incaij;
740 	      b_elem = b_i[bij];
741 	      prod = (double) b_elem *x_elem;
742 	      sumB = sumB + prod;
743 	      bij += incbij;
744 	    }
745 	    /* now put the result into y_i */
746 	    {
747 	      tmp1[0] = alpha_i[0] * sumA;
748 	      tmp1[1] = alpha_i[1] * sumA;
749 	    }
750 	    {
751 	      tmp2[0] = beta_i[0] * sumB;
752 	      tmp2[1] = beta_i[1] * sumB;
753 	    }
754 	    tmp1[0] = tmp1[0] + tmp2[0];
755 	    tmp1[1] = tmp1[1] + tmp2[1];
756 	    y_i[yi] = tmp1[0];
757 	    y_i[yi + 1] = tmp1[1];
758 	    ai += incai;
759 	    bi += incbi;
760 	  }
761 	}
762       }
763 
764     }
765     break;
766 
767   case blas_prec_extra:
768     {
769       int i, j;
770       int xi, yi;
771       int x_starti, y_starti, incxi, incyi;
772       int lda_min;
773       int ai;
774       int incai;
775       int aij;
776       int incaij;
777       int bi;
778       int incbi;
779       int bij;
780       int incbij;
781 
782       const float *a_i = a;
783       const float *b_i = b;
784       const float *x_i = x;
785       float *y_i = (float *) y;
786       float *alpha_i = (float *) alpha;
787       float *beta_i = (float *) beta;
788       float a_elem;
789       float b_elem;
790       float x_elem;
791       double head_prod, tail_prod;
792       double head_sumA, tail_sumA;
793       double head_sumB, tail_sumB;
794       double head_tmp1[2], tail_tmp1[2];
795       double head_tmp2[2], tail_tmp2[2];
796 
797       FPU_FIX_DECL;
798 
799       /* m is number of rows */
800       /* n is number of columns */
801 
802       if (m == 0 || n == 0)
803 	return;
804 
805 
806       /* all error calls */
807       if (order == blas_rowmajor) {
808 	lda_min = n;
809 	incai = lda;		/* row stride */
810 	incbi = ldb;
811 	incbij = incaij = 1;	/* column stride */
812       } else if (order == blas_colmajor) {
813 	lda_min = m;
814 	incai = incbi = 1;	/*row stride */
815 	incaij = lda;		/* column stride */
816 	incbij = ldb;
817       } else {
818 	/* error, order not blas_colmajor not blas_rowmajor */
819 	BLAS_error(routine_name, -1, order, 0);
820 	return;
821       }
822 
823       if (m < 0)
824 	BLAS_error(routine_name, -2, m, 0);
825       else if (n < 0)
826 	BLAS_error(routine_name, -3, n, 0);
827       if (lda < lda_min)
828 	BLAS_error(routine_name, -6, lda, 0);
829       else if (ldb < lda_min)
830 	BLAS_error(routine_name, -11, ldb, 0);
831       else if (incx == 0)
832 	BLAS_error(routine_name, -8, incx, 0);
833       else if (incy == 0)
834 	BLAS_error(routine_name, -13, incy, 0);
835 
836       incxi = incx;
837       incyi = incy;
838 
839       incyi *= 2;
840 
841 
842 
843 
844 
845       if (incxi > 0)
846 	x_starti = 0;
847       else
848 	x_starti = (1 - n) * incxi;
849 
850       if (incyi > 0)
851 	y_starti = 0;
852       else
853 	y_starti = (1 - m) * incyi;
854 
855       FPU_FIX_START;
856 
857       if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
858 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
859 	  /* alpha, beta are 0.0 */
860 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
861 	    y_i[yi] = 0.0;
862 	    y_i[yi + 1] = 0.0;
863 	  }
864 	} else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
865 	  /* alpha is 0.0, beta is 1.0 */
866 
867 
868 	  bi = 0;
869 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
870 
871 	    head_sumB = tail_sumB = 0.0;
872 	    bij = bi;
873 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
874 	      x_elem = x_i[xi];
875 
876 	      b_elem = b_i[bij];
877 	      head_prod = (double) b_elem *x_elem;
878 	      tail_prod = 0.0;
879 	      {
880 		/* Compute double-double = double-double + double-double. */
881 		double bv;
882 		double s1, s2, t1, t2;
883 
884 		/* Add two hi words. */
885 		s1 = head_sumB + head_prod;
886 		bv = s1 - head_sumB;
887 		s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
888 
889 		/* Add two lo words. */
890 		t1 = tail_sumB + tail_prod;
891 		bv = t1 - tail_sumB;
892 		t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
893 
894 		s2 += t1;
895 
896 		/* Renormalize (s1, s2)  to  (t1, s2) */
897 		t1 = s1 + s2;
898 		s2 = s2 - (t1 - s1);
899 
900 		t2 += s2;
901 
902 		/* Renormalize (t1, t2)  */
903 		head_sumB = t1 + t2;
904 		tail_sumB = t2 - (head_sumB - t1);
905 	      }
906 	      bij += incbij;
907 	    }
908 	    /* now put the result into y_i */
909 	    head_tmp1[0] = head_sumB;
910 	    tail_tmp1[0] = tail_sumB;
911 	    head_tmp1[1] = tail_tmp1[1] = 0.0;
912 	    y_i[yi] = head_tmp1[0];
913 	    y_i[yi + 1] = head_tmp1[1];
914 
915 	    bi += incbi;
916 	  }
917 	} else {
918 	  /* alpha is 0.0, beta not 1.0 nor 0.0 */
919 
920 
921 	  bi = 0;
922 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
923 
924 	    head_sumB = tail_sumB = 0.0;
925 	    bij = bi;
926 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
927 	      x_elem = x_i[xi];
928 
929 	      b_elem = b_i[bij];
930 	      head_prod = (double) b_elem *x_elem;
931 	      tail_prod = 0.0;
932 	      {
933 		/* Compute double-double = double-double + double-double. */
934 		double bv;
935 		double s1, s2, t1, t2;
936 
937 		/* Add two hi words. */
938 		s1 = head_sumB + head_prod;
939 		bv = s1 - head_sumB;
940 		s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
941 
942 		/* Add two lo words. */
943 		t1 = tail_sumB + tail_prod;
944 		bv = t1 - tail_sumB;
945 		t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
946 
947 		s2 += t1;
948 
949 		/* Renormalize (s1, s2)  to  (t1, s2) */
950 		t1 = s1 + s2;
951 		s2 = s2 - (t1 - s1);
952 
953 		t2 += s2;
954 
955 		/* Renormalize (t1, t2)  */
956 		head_sumB = t1 + t2;
957 		tail_sumB = t2 - (head_sumB - t1);
958 	      }
959 	      bij += incbij;
960 	    }
961 	    /* now put the result into y_i */
962 	    {
963 	      double head_e1, tail_e1;
964 	      double dt;
965 	      dt = (double) beta_i[0];
966 	      {
967 		/* Compute double-double = double-double * double. */
968 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
969 
970 		con = head_sumB * split;
971 		a11 = con - head_sumB;
972 		a11 = con - a11;
973 		a21 = head_sumB - a11;
974 		con = dt * split;
975 		b1 = con - dt;
976 		b1 = con - b1;
977 		b2 = dt - b1;
978 
979 		c11 = head_sumB * dt;
980 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
981 
982 		c2 = tail_sumB * dt;
983 		t1 = c11 + c2;
984 		t2 = (c2 - (t1 - c11)) + c21;
985 
986 		head_e1 = t1 + t2;
987 		tail_e1 = t2 - (head_e1 - t1);
988 	      }
989 	      head_tmp1[0] = head_e1;
990 	      tail_tmp1[0] = tail_e1;
991 	      dt = (double) beta_i[1];
992 	      {
993 		/* Compute double-double = double-double * double. */
994 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
995 
996 		con = head_sumB * split;
997 		a11 = con - head_sumB;
998 		a11 = con - a11;
999 		a21 = head_sumB - a11;
1000 		con = dt * split;
1001 		b1 = con - dt;
1002 		b1 = con - b1;
1003 		b2 = dt - b1;
1004 
1005 		c11 = head_sumB * dt;
1006 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1007 
1008 		c2 = tail_sumB * dt;
1009 		t1 = c11 + c2;
1010 		t2 = (c2 - (t1 - c11)) + c21;
1011 
1012 		head_e1 = t1 + t2;
1013 		tail_e1 = t2 - (head_e1 - t1);
1014 	      }
1015 	      head_tmp1[1] = head_e1;
1016 	      tail_tmp1[1] = tail_e1;
1017 	    }
1018 	    y_i[yi] = head_tmp1[0];
1019 	    y_i[yi + 1] = head_tmp1[1];
1020 
1021 	    bi += incbi;
1022 	  }
1023 	}
1024       } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
1025 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
1026 	  /* alpha is 1.0, beta is 0.0 */
1027 
1028 	  ai = 0;
1029 
1030 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1031 	    head_sumA = tail_sumA = 0.0;
1032 	    aij = ai;
1033 
1034 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1035 	      x_elem = x_i[xi];
1036 	      a_elem = a_i[aij];
1037 	      head_prod = (double) a_elem *x_elem;
1038 	      tail_prod = 0.0;
1039 	      {
1040 		/* Compute double-double = double-double + double-double. */
1041 		double bv;
1042 		double s1, s2, t1, t2;
1043 
1044 		/* Add two hi words. */
1045 		s1 = head_sumA + head_prod;
1046 		bv = s1 - head_sumA;
1047 		s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1048 
1049 		/* Add two lo words. */
1050 		t1 = tail_sumA + tail_prod;
1051 		bv = t1 - tail_sumA;
1052 		t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1053 
1054 		s2 += t1;
1055 
1056 		/* Renormalize (s1, s2)  to  (t1, s2) */
1057 		t1 = s1 + s2;
1058 		s2 = s2 - (t1 - s1);
1059 
1060 		t2 += s2;
1061 
1062 		/* Renormalize (t1, t2)  */
1063 		head_sumA = t1 + t2;
1064 		tail_sumA = t2 - (head_sumA - t1);
1065 	      }
1066 	      aij += incaij;
1067 
1068 	    }
1069 	    /* now put the result into y_i */
1070 	    head_tmp1[0] = head_sumA;
1071 	    tail_tmp1[0] = tail_sumA;
1072 	    head_tmp1[1] = tail_tmp1[1] = 0.0;
1073 	    y_i[yi] = head_tmp1[0];
1074 	    y_i[yi + 1] = head_tmp1[1];
1075 	    ai += incai;
1076 
1077 	  }
1078 	} else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
1079 	  /* alpha is 1.0, beta is 1.0 */
1080 
1081 	  ai = 0;
1082 	  bi = 0;
1083 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1084 	    head_sumA = tail_sumA = 0.0;
1085 	    aij = ai;
1086 	    head_sumB = tail_sumB = 0.0;
1087 	    bij = bi;
1088 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1089 	      x_elem = x_i[xi];
1090 	      a_elem = a_i[aij];
1091 	      head_prod = (double) a_elem *x_elem;
1092 	      tail_prod = 0.0;
1093 	      {
1094 		/* Compute double-double = double-double + double-double. */
1095 		double bv;
1096 		double s1, s2, t1, t2;
1097 
1098 		/* Add two hi words. */
1099 		s1 = head_sumA + head_prod;
1100 		bv = s1 - head_sumA;
1101 		s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1102 
1103 		/* Add two lo words. */
1104 		t1 = tail_sumA + tail_prod;
1105 		bv = t1 - tail_sumA;
1106 		t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1107 
1108 		s2 += t1;
1109 
1110 		/* Renormalize (s1, s2)  to  (t1, s2) */
1111 		t1 = s1 + s2;
1112 		s2 = s2 - (t1 - s1);
1113 
1114 		t2 += s2;
1115 
1116 		/* Renormalize (t1, t2)  */
1117 		head_sumA = t1 + t2;
1118 		tail_sumA = t2 - (head_sumA - t1);
1119 	      }
1120 	      aij += incaij;
1121 	      b_elem = b_i[bij];
1122 	      head_prod = (double) b_elem *x_elem;
1123 	      tail_prod = 0.0;
1124 	      {
1125 		/* Compute double-double = double-double + double-double. */
1126 		double bv;
1127 		double s1, s2, t1, t2;
1128 
1129 		/* Add two hi words. */
1130 		s1 = head_sumB + head_prod;
1131 		bv = s1 - head_sumB;
1132 		s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
1133 
1134 		/* Add two lo words. */
1135 		t1 = tail_sumB + tail_prod;
1136 		bv = t1 - tail_sumB;
1137 		t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
1138 
1139 		s2 += t1;
1140 
1141 		/* Renormalize (s1, s2)  to  (t1, s2) */
1142 		t1 = s1 + s2;
1143 		s2 = s2 - (t1 - s1);
1144 
1145 		t2 += s2;
1146 
1147 		/* Renormalize (t1, t2)  */
1148 		head_sumB = t1 + t2;
1149 		tail_sumB = t2 - (head_sumB - t1);
1150 	      }
1151 	      bij += incbij;
1152 	    }
1153 	    /* now put the result into y_i */
1154 	    head_tmp1[0] = head_sumA;
1155 	    tail_tmp1[0] = tail_sumA;
1156 	    head_tmp1[1] = tail_tmp1[1] = 0.0;
1157 	    head_tmp2[0] = head_sumB;
1158 	    tail_tmp2[0] = tail_sumB;
1159 	    head_tmp2[1] = tail_tmp2[1] = 0.0;
1160 	    {
1161 	      double head_t, tail_t;
1162 	      double head_a, tail_a;
1163 	      double head_b, tail_b;
1164 	      /* Real part */
1165 	      head_a = head_tmp1[0];
1166 	      tail_a = tail_tmp1[0];
1167 	      head_b = head_tmp2[0];
1168 	      tail_b = tail_tmp2[0];
1169 	      {
1170 		/* Compute double-double = double-double + double-double. */
1171 		double bv;
1172 		double s1, s2, t1, t2;
1173 
1174 		/* Add two hi words. */
1175 		s1 = head_a + head_b;
1176 		bv = s1 - head_a;
1177 		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1178 
1179 		/* Add two lo words. */
1180 		t1 = tail_a + tail_b;
1181 		bv = t1 - tail_a;
1182 		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1183 
1184 		s2 += t1;
1185 
1186 		/* Renormalize (s1, s2)  to  (t1, s2) */
1187 		t1 = s1 + s2;
1188 		s2 = s2 - (t1 - s1);
1189 
1190 		t2 += s2;
1191 
1192 		/* Renormalize (t1, t2)  */
1193 		head_t = t1 + t2;
1194 		tail_t = t2 - (head_t - t1);
1195 	      }
1196 	      head_tmp1[0] = head_t;
1197 	      tail_tmp1[0] = tail_t;
1198 	      /* Imaginary part */
1199 	      head_a = head_tmp1[1];
1200 	      tail_a = tail_tmp1[1];
1201 	      head_b = head_tmp2[1];
1202 	      tail_b = tail_tmp2[1];
1203 	      {
1204 		/* Compute double-double = double-double + double-double. */
1205 		double bv;
1206 		double s1, s2, t1, t2;
1207 
1208 		/* Add two hi words. */
1209 		s1 = head_a + head_b;
1210 		bv = s1 - head_a;
1211 		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1212 
1213 		/* Add two lo words. */
1214 		t1 = tail_a + tail_b;
1215 		bv = t1 - tail_a;
1216 		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1217 
1218 		s2 += t1;
1219 
1220 		/* Renormalize (s1, s2)  to  (t1, s2) */
1221 		t1 = s1 + s2;
1222 		s2 = s2 - (t1 - s1);
1223 
1224 		t2 += s2;
1225 
1226 		/* Renormalize (t1, t2)  */
1227 		head_t = t1 + t2;
1228 		tail_t = t2 - (head_t - t1);
1229 	      }
1230 	      head_tmp1[1] = head_t;
1231 	      tail_tmp1[1] = tail_t;
1232 	    }
1233 	    y_i[yi] = head_tmp1[0];
1234 	    y_i[yi + 1] = head_tmp1[1];
1235 	    ai += incai;
1236 	    bi += incbi;
1237 	  }
1238 	} else {
1239 	  /* alpha is 1.0, beta is other */
1240 
1241 	  ai = 0;
1242 	  bi = 0;
1243 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1244 	    head_sumA = tail_sumA = 0.0;
1245 	    aij = ai;
1246 	    head_sumB = tail_sumB = 0.0;
1247 	    bij = bi;
1248 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1249 	      x_elem = x_i[xi];
1250 	      a_elem = a_i[aij];
1251 	      head_prod = (double) a_elem *x_elem;
1252 	      tail_prod = 0.0;
1253 	      {
1254 		/* Compute double-double = double-double + double-double. */
1255 		double bv;
1256 		double s1, s2, t1, t2;
1257 
1258 		/* Add two hi words. */
1259 		s1 = head_sumA + head_prod;
1260 		bv = s1 - head_sumA;
1261 		s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1262 
1263 		/* Add two lo words. */
1264 		t1 = tail_sumA + tail_prod;
1265 		bv = t1 - tail_sumA;
1266 		t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1267 
1268 		s2 += t1;
1269 
1270 		/* Renormalize (s1, s2)  to  (t1, s2) */
1271 		t1 = s1 + s2;
1272 		s2 = s2 - (t1 - s1);
1273 
1274 		t2 += s2;
1275 
1276 		/* Renormalize (t1, t2)  */
1277 		head_sumA = t1 + t2;
1278 		tail_sumA = t2 - (head_sumA - t1);
1279 	      }
1280 	      aij += incaij;
1281 	      b_elem = b_i[bij];
1282 	      head_prod = (double) b_elem *x_elem;
1283 	      tail_prod = 0.0;
1284 	      {
1285 		/* Compute double-double = double-double + double-double. */
1286 		double bv;
1287 		double s1, s2, t1, t2;
1288 
1289 		/* Add two hi words. */
1290 		s1 = head_sumB + head_prod;
1291 		bv = s1 - head_sumB;
1292 		s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
1293 
1294 		/* Add two lo words. */
1295 		t1 = tail_sumB + tail_prod;
1296 		bv = t1 - tail_sumB;
1297 		t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
1298 
1299 		s2 += t1;
1300 
1301 		/* Renormalize (s1, s2)  to  (t1, s2) */
1302 		t1 = s1 + s2;
1303 		s2 = s2 - (t1 - s1);
1304 
1305 		t2 += s2;
1306 
1307 		/* Renormalize (t1, t2)  */
1308 		head_sumB = t1 + t2;
1309 		tail_sumB = t2 - (head_sumB - t1);
1310 	      }
1311 	      bij += incbij;
1312 	    }
1313 	    /* now put the result into y_i */
1314 	    head_tmp1[0] = head_sumA;
1315 	    tail_tmp1[0] = tail_sumA;
1316 	    head_tmp1[1] = tail_tmp1[1] = 0.0;
1317 	    {
1318 	      double head_e1, tail_e1;
1319 	      double dt;
1320 	      dt = (double) beta_i[0];
1321 	      {
1322 		/* Compute double-double = double-double * double. */
1323 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1324 
1325 		con = head_sumB * split;
1326 		a11 = con - head_sumB;
1327 		a11 = con - a11;
1328 		a21 = head_sumB - a11;
1329 		con = dt * split;
1330 		b1 = con - dt;
1331 		b1 = con - b1;
1332 		b2 = dt - b1;
1333 
1334 		c11 = head_sumB * dt;
1335 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1336 
1337 		c2 = tail_sumB * dt;
1338 		t1 = c11 + c2;
1339 		t2 = (c2 - (t1 - c11)) + c21;
1340 
1341 		head_e1 = t1 + t2;
1342 		tail_e1 = t2 - (head_e1 - t1);
1343 	      }
1344 	      head_tmp2[0] = head_e1;
1345 	      tail_tmp2[0] = tail_e1;
1346 	      dt = (double) beta_i[1];
1347 	      {
1348 		/* Compute double-double = double-double * double. */
1349 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1350 
1351 		con = head_sumB * split;
1352 		a11 = con - head_sumB;
1353 		a11 = con - a11;
1354 		a21 = head_sumB - a11;
1355 		con = dt * split;
1356 		b1 = con - dt;
1357 		b1 = con - b1;
1358 		b2 = dt - b1;
1359 
1360 		c11 = head_sumB * dt;
1361 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1362 
1363 		c2 = tail_sumB * dt;
1364 		t1 = c11 + c2;
1365 		t2 = (c2 - (t1 - c11)) + c21;
1366 
1367 		head_e1 = t1 + t2;
1368 		tail_e1 = t2 - (head_e1 - t1);
1369 	      }
1370 	      head_tmp2[1] = head_e1;
1371 	      tail_tmp2[1] = tail_e1;
1372 	    }
1373 	    {
1374 	      double head_t, tail_t;
1375 	      double head_a, tail_a;
1376 	      double head_b, tail_b;
1377 	      /* Real part */
1378 	      head_a = head_tmp1[0];
1379 	      tail_a = tail_tmp1[0];
1380 	      head_b = head_tmp2[0];
1381 	      tail_b = tail_tmp2[0];
1382 	      {
1383 		/* Compute double-double = double-double + double-double. */
1384 		double bv;
1385 		double s1, s2, t1, t2;
1386 
1387 		/* Add two hi words. */
1388 		s1 = head_a + head_b;
1389 		bv = s1 - head_a;
1390 		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1391 
1392 		/* Add two lo words. */
1393 		t1 = tail_a + tail_b;
1394 		bv = t1 - tail_a;
1395 		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1396 
1397 		s2 += t1;
1398 
1399 		/* Renormalize (s1, s2)  to  (t1, s2) */
1400 		t1 = s1 + s2;
1401 		s2 = s2 - (t1 - s1);
1402 
1403 		t2 += s2;
1404 
1405 		/* Renormalize (t1, t2)  */
1406 		head_t = t1 + t2;
1407 		tail_t = t2 - (head_t - t1);
1408 	      }
1409 	      head_tmp1[0] = head_t;
1410 	      tail_tmp1[0] = tail_t;
1411 	      /* Imaginary part */
1412 	      head_a = head_tmp1[1];
1413 	      tail_a = tail_tmp1[1];
1414 	      head_b = head_tmp2[1];
1415 	      tail_b = tail_tmp2[1];
1416 	      {
1417 		/* Compute double-double = double-double + double-double. */
1418 		double bv;
1419 		double s1, s2, t1, t2;
1420 
1421 		/* Add two hi words. */
1422 		s1 = head_a + head_b;
1423 		bv = s1 - head_a;
1424 		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1425 
1426 		/* Add two lo words. */
1427 		t1 = tail_a + tail_b;
1428 		bv = t1 - tail_a;
1429 		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1430 
1431 		s2 += t1;
1432 
1433 		/* Renormalize (s1, s2)  to  (t1, s2) */
1434 		t1 = s1 + s2;
1435 		s2 = s2 - (t1 - s1);
1436 
1437 		t2 += s2;
1438 
1439 		/* Renormalize (t1, t2)  */
1440 		head_t = t1 + t2;
1441 		tail_t = t2 - (head_t - t1);
1442 	      }
1443 	      head_tmp1[1] = head_t;
1444 	      tail_tmp1[1] = tail_t;
1445 	    }
1446 	    y_i[yi] = head_tmp1[0];
1447 	    y_i[yi + 1] = head_tmp1[1];
1448 	    ai += incai;
1449 	    bi += incbi;
1450 	  }
1451 	}
1452       } else {
1453 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
1454 	  /* alpha is other, beta is 0.0 */
1455 
1456 	  ai = 0;
1457 
1458 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1459 	    head_sumA = tail_sumA = 0.0;
1460 	    aij = ai;
1461 
1462 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1463 	      x_elem = x_i[xi];
1464 	      a_elem = a_i[aij];
1465 	      head_prod = (double) a_elem *x_elem;
1466 	      tail_prod = 0.0;
1467 	      {
1468 		/* Compute double-double = double-double + double-double. */
1469 		double bv;
1470 		double s1, s2, t1, t2;
1471 
1472 		/* Add two hi words. */
1473 		s1 = head_sumA + head_prod;
1474 		bv = s1 - head_sumA;
1475 		s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1476 
1477 		/* Add two lo words. */
1478 		t1 = tail_sumA + tail_prod;
1479 		bv = t1 - tail_sumA;
1480 		t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1481 
1482 		s2 += t1;
1483 
1484 		/* Renormalize (s1, s2)  to  (t1, s2) */
1485 		t1 = s1 + s2;
1486 		s2 = s2 - (t1 - s1);
1487 
1488 		t2 += s2;
1489 
1490 		/* Renormalize (t1, t2)  */
1491 		head_sumA = t1 + t2;
1492 		tail_sumA = t2 - (head_sumA - t1);
1493 	      }
1494 	      aij += incaij;
1495 
1496 	    }
1497 	    /* now put the result into y_i */
1498 	    {
1499 	      double head_e1, tail_e1;
1500 	      double dt;
1501 	      dt = (double) alpha_i[0];
1502 	      {
1503 		/* Compute double-double = double-double * double. */
1504 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1505 
1506 		con = head_sumA * split;
1507 		a11 = con - head_sumA;
1508 		a11 = con - a11;
1509 		a21 = head_sumA - a11;
1510 		con = dt * split;
1511 		b1 = con - dt;
1512 		b1 = con - b1;
1513 		b2 = dt - b1;
1514 
1515 		c11 = head_sumA * dt;
1516 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1517 
1518 		c2 = tail_sumA * dt;
1519 		t1 = c11 + c2;
1520 		t2 = (c2 - (t1 - c11)) + c21;
1521 
1522 		head_e1 = t1 + t2;
1523 		tail_e1 = t2 - (head_e1 - t1);
1524 	      }
1525 	      head_tmp1[0] = head_e1;
1526 	      tail_tmp1[0] = tail_e1;
1527 	      dt = (double) alpha_i[1];
1528 	      {
1529 		/* Compute double-double = double-double * double. */
1530 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1531 
1532 		con = head_sumA * split;
1533 		a11 = con - head_sumA;
1534 		a11 = con - a11;
1535 		a21 = head_sumA - a11;
1536 		con = dt * split;
1537 		b1 = con - dt;
1538 		b1 = con - b1;
1539 		b2 = dt - b1;
1540 
1541 		c11 = head_sumA * dt;
1542 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1543 
1544 		c2 = tail_sumA * dt;
1545 		t1 = c11 + c2;
1546 		t2 = (c2 - (t1 - c11)) + c21;
1547 
1548 		head_e1 = t1 + t2;
1549 		tail_e1 = t2 - (head_e1 - t1);
1550 	      }
1551 	      head_tmp1[1] = head_e1;
1552 	      tail_tmp1[1] = tail_e1;
1553 	    }
1554 	    y_i[yi] = head_tmp1[0];
1555 	    y_i[yi + 1] = head_tmp1[1];
1556 	    ai += incai;
1557 
1558 	  }
1559 	} else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
1560 	  /* alpha is other, beta is 1.0 */
1561 
1562 	  ai = 0;
1563 	  bi = 0;
1564 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1565 	    head_sumA = tail_sumA = 0.0;
1566 	    aij = ai;
1567 	    head_sumB = tail_sumB = 0.0;
1568 	    bij = bi;
1569 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1570 	      x_elem = x_i[xi];
1571 	      a_elem = a_i[aij];
1572 	      head_prod = (double) a_elem *x_elem;
1573 	      tail_prod = 0.0;
1574 	      {
1575 		/* Compute double-double = double-double + double-double. */
1576 		double bv;
1577 		double s1, s2, t1, t2;
1578 
1579 		/* Add two hi words. */
1580 		s1 = head_sumA + head_prod;
1581 		bv = s1 - head_sumA;
1582 		s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1583 
1584 		/* Add two lo words. */
1585 		t1 = tail_sumA + tail_prod;
1586 		bv = t1 - tail_sumA;
1587 		t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1588 
1589 		s2 += t1;
1590 
1591 		/* Renormalize (s1, s2)  to  (t1, s2) */
1592 		t1 = s1 + s2;
1593 		s2 = s2 - (t1 - s1);
1594 
1595 		t2 += s2;
1596 
1597 		/* Renormalize (t1, t2)  */
1598 		head_sumA = t1 + t2;
1599 		tail_sumA = t2 - (head_sumA - t1);
1600 	      }
1601 	      aij += incaij;
1602 	      b_elem = b_i[bij];
1603 	      head_prod = (double) b_elem *x_elem;
1604 	      tail_prod = 0.0;
1605 	      {
1606 		/* Compute double-double = double-double + double-double. */
1607 		double bv;
1608 		double s1, s2, t1, t2;
1609 
1610 		/* Add two hi words. */
1611 		s1 = head_sumB + head_prod;
1612 		bv = s1 - head_sumB;
1613 		s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
1614 
1615 		/* Add two lo words. */
1616 		t1 = tail_sumB + tail_prod;
1617 		bv = t1 - tail_sumB;
1618 		t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
1619 
1620 		s2 += t1;
1621 
1622 		/* Renormalize (s1, s2)  to  (t1, s2) */
1623 		t1 = s1 + s2;
1624 		s2 = s2 - (t1 - s1);
1625 
1626 		t2 += s2;
1627 
1628 		/* Renormalize (t1, t2)  */
1629 		head_sumB = t1 + t2;
1630 		tail_sumB = t2 - (head_sumB - t1);
1631 	      }
1632 	      bij += incbij;
1633 	    }
1634 	    /* now put the result into y_i */
1635 	    {
1636 	      double head_e1, tail_e1;
1637 	      double dt;
1638 	      dt = (double) alpha_i[0];
1639 	      {
1640 		/* Compute double-double = double-double * double. */
1641 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1642 
1643 		con = head_sumA * split;
1644 		a11 = con - head_sumA;
1645 		a11 = con - a11;
1646 		a21 = head_sumA - a11;
1647 		con = dt * split;
1648 		b1 = con - dt;
1649 		b1 = con - b1;
1650 		b2 = dt - b1;
1651 
1652 		c11 = head_sumA * dt;
1653 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1654 
1655 		c2 = tail_sumA * dt;
1656 		t1 = c11 + c2;
1657 		t2 = (c2 - (t1 - c11)) + c21;
1658 
1659 		head_e1 = t1 + t2;
1660 		tail_e1 = t2 - (head_e1 - t1);
1661 	      }
1662 	      head_tmp1[0] = head_e1;
1663 	      tail_tmp1[0] = tail_e1;
1664 	      dt = (double) alpha_i[1];
1665 	      {
1666 		/* Compute double-double = double-double * double. */
1667 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1668 
1669 		con = head_sumA * split;
1670 		a11 = con - head_sumA;
1671 		a11 = con - a11;
1672 		a21 = head_sumA - a11;
1673 		con = dt * split;
1674 		b1 = con - dt;
1675 		b1 = con - b1;
1676 		b2 = dt - b1;
1677 
1678 		c11 = head_sumA * dt;
1679 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1680 
1681 		c2 = tail_sumA * dt;
1682 		t1 = c11 + c2;
1683 		t2 = (c2 - (t1 - c11)) + c21;
1684 
1685 		head_e1 = t1 + t2;
1686 		tail_e1 = t2 - (head_e1 - t1);
1687 	      }
1688 	      head_tmp1[1] = head_e1;
1689 	      tail_tmp1[1] = tail_e1;
1690 	    }
1691 	    head_tmp2[0] = head_sumB;
1692 	    tail_tmp2[0] = tail_sumB;
1693 	    head_tmp2[1] = tail_tmp2[1] = 0.0;
1694 	    {
1695 	      double head_t, tail_t;
1696 	      double head_a, tail_a;
1697 	      double head_b, tail_b;
1698 	      /* Real part */
1699 	      head_a = head_tmp1[0];
1700 	      tail_a = tail_tmp1[0];
1701 	      head_b = head_tmp2[0];
1702 	      tail_b = tail_tmp2[0];
1703 	      {
1704 		/* Compute double-double = double-double + double-double. */
1705 		double bv;
1706 		double s1, s2, t1, t2;
1707 
1708 		/* Add two hi words. */
1709 		s1 = head_a + head_b;
1710 		bv = s1 - head_a;
1711 		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1712 
1713 		/* Add two lo words. */
1714 		t1 = tail_a + tail_b;
1715 		bv = t1 - tail_a;
1716 		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1717 
1718 		s2 += t1;
1719 
1720 		/* Renormalize (s1, s2)  to  (t1, s2) */
1721 		t1 = s1 + s2;
1722 		s2 = s2 - (t1 - s1);
1723 
1724 		t2 += s2;
1725 
1726 		/* Renormalize (t1, t2)  */
1727 		head_t = t1 + t2;
1728 		tail_t = t2 - (head_t - t1);
1729 	      }
1730 	      head_tmp1[0] = head_t;
1731 	      tail_tmp1[0] = tail_t;
1732 	      /* Imaginary part */
1733 	      head_a = head_tmp1[1];
1734 	      tail_a = tail_tmp1[1];
1735 	      head_b = head_tmp2[1];
1736 	      tail_b = tail_tmp2[1];
1737 	      {
1738 		/* Compute double-double = double-double + double-double. */
1739 		double bv;
1740 		double s1, s2, t1, t2;
1741 
1742 		/* Add two hi words. */
1743 		s1 = head_a + head_b;
1744 		bv = s1 - head_a;
1745 		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1746 
1747 		/* Add two lo words. */
1748 		t1 = tail_a + tail_b;
1749 		bv = t1 - tail_a;
1750 		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1751 
1752 		s2 += t1;
1753 
1754 		/* Renormalize (s1, s2)  to  (t1, s2) */
1755 		t1 = s1 + s2;
1756 		s2 = s2 - (t1 - s1);
1757 
1758 		t2 += s2;
1759 
1760 		/* Renormalize (t1, t2)  */
1761 		head_t = t1 + t2;
1762 		tail_t = t2 - (head_t - t1);
1763 	      }
1764 	      head_tmp1[1] = head_t;
1765 	      tail_tmp1[1] = tail_t;
1766 	    }
1767 	    y_i[yi] = head_tmp1[0];
1768 	    y_i[yi + 1] = head_tmp1[1];
1769 	    ai += incai;
1770 	    bi += incbi;
1771 	  }
1772 	} else {
1773 	  /* most general form, alpha, beta are other */
1774 
1775 	  ai = 0;
1776 	  bi = 0;
1777 	  for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
1778 	    head_sumA = tail_sumA = 0.0;
1779 	    aij = ai;
1780 	    head_sumB = tail_sumB = 0.0;
1781 	    bij = bi;
1782 	    for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
1783 	      x_elem = x_i[xi];
1784 	      a_elem = a_i[aij];
1785 	      head_prod = (double) a_elem *x_elem;
1786 	      tail_prod = 0.0;
1787 	      {
1788 		/* Compute double-double = double-double + double-double. */
1789 		double bv;
1790 		double s1, s2, t1, t2;
1791 
1792 		/* Add two hi words. */
1793 		s1 = head_sumA + head_prod;
1794 		bv = s1 - head_sumA;
1795 		s2 = ((head_prod - bv) + (head_sumA - (s1 - bv)));
1796 
1797 		/* Add two lo words. */
1798 		t1 = tail_sumA + tail_prod;
1799 		bv = t1 - tail_sumA;
1800 		t2 = ((tail_prod - bv) + (tail_sumA - (t1 - bv)));
1801 
1802 		s2 += t1;
1803 
1804 		/* Renormalize (s1, s2)  to  (t1, s2) */
1805 		t1 = s1 + s2;
1806 		s2 = s2 - (t1 - s1);
1807 
1808 		t2 += s2;
1809 
1810 		/* Renormalize (t1, t2)  */
1811 		head_sumA = t1 + t2;
1812 		tail_sumA = t2 - (head_sumA - t1);
1813 	      }
1814 	      aij += incaij;
1815 	      b_elem = b_i[bij];
1816 	      head_prod = (double) b_elem *x_elem;
1817 	      tail_prod = 0.0;
1818 	      {
1819 		/* Compute double-double = double-double + double-double. */
1820 		double bv;
1821 		double s1, s2, t1, t2;
1822 
1823 		/* Add two hi words. */
1824 		s1 = head_sumB + head_prod;
1825 		bv = s1 - head_sumB;
1826 		s2 = ((head_prod - bv) + (head_sumB - (s1 - bv)));
1827 
1828 		/* Add two lo words. */
1829 		t1 = tail_sumB + tail_prod;
1830 		bv = t1 - tail_sumB;
1831 		t2 = ((tail_prod - bv) + (tail_sumB - (t1 - bv)));
1832 
1833 		s2 += t1;
1834 
1835 		/* Renormalize (s1, s2)  to  (t1, s2) */
1836 		t1 = s1 + s2;
1837 		s2 = s2 - (t1 - s1);
1838 
1839 		t2 += s2;
1840 
1841 		/* Renormalize (t1, t2)  */
1842 		head_sumB = t1 + t2;
1843 		tail_sumB = t2 - (head_sumB - t1);
1844 	      }
1845 	      bij += incbij;
1846 	    }
1847 	    /* now put the result into y_i */
1848 	    {
1849 	      double head_e1, tail_e1;
1850 	      double dt;
1851 	      dt = (double) alpha_i[0];
1852 	      {
1853 		/* Compute double-double = double-double * double. */
1854 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1855 
1856 		con = head_sumA * split;
1857 		a11 = con - head_sumA;
1858 		a11 = con - a11;
1859 		a21 = head_sumA - a11;
1860 		con = dt * split;
1861 		b1 = con - dt;
1862 		b1 = con - b1;
1863 		b2 = dt - b1;
1864 
1865 		c11 = head_sumA * dt;
1866 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1867 
1868 		c2 = tail_sumA * dt;
1869 		t1 = c11 + c2;
1870 		t2 = (c2 - (t1 - c11)) + c21;
1871 
1872 		head_e1 = t1 + t2;
1873 		tail_e1 = t2 - (head_e1 - t1);
1874 	      }
1875 	      head_tmp1[0] = head_e1;
1876 	      tail_tmp1[0] = tail_e1;
1877 	      dt = (double) alpha_i[1];
1878 	      {
1879 		/* Compute double-double = double-double * double. */
1880 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1881 
1882 		con = head_sumA * split;
1883 		a11 = con - head_sumA;
1884 		a11 = con - a11;
1885 		a21 = head_sumA - a11;
1886 		con = dt * split;
1887 		b1 = con - dt;
1888 		b1 = con - b1;
1889 		b2 = dt - b1;
1890 
1891 		c11 = head_sumA * dt;
1892 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1893 
1894 		c2 = tail_sumA * dt;
1895 		t1 = c11 + c2;
1896 		t2 = (c2 - (t1 - c11)) + c21;
1897 
1898 		head_e1 = t1 + t2;
1899 		tail_e1 = t2 - (head_e1 - t1);
1900 	      }
1901 	      head_tmp1[1] = head_e1;
1902 	      tail_tmp1[1] = tail_e1;
1903 	    }
1904 	    {
1905 	      double head_e1, tail_e1;
1906 	      double dt;
1907 	      dt = (double) beta_i[0];
1908 	      {
1909 		/* Compute double-double = double-double * double. */
1910 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1911 
1912 		con = head_sumB * split;
1913 		a11 = con - head_sumB;
1914 		a11 = con - a11;
1915 		a21 = head_sumB - a11;
1916 		con = dt * split;
1917 		b1 = con - dt;
1918 		b1 = con - b1;
1919 		b2 = dt - b1;
1920 
1921 		c11 = head_sumB * dt;
1922 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1923 
1924 		c2 = tail_sumB * dt;
1925 		t1 = c11 + c2;
1926 		t2 = (c2 - (t1 - c11)) + c21;
1927 
1928 		head_e1 = t1 + t2;
1929 		tail_e1 = t2 - (head_e1 - t1);
1930 	      }
1931 	      head_tmp2[0] = head_e1;
1932 	      tail_tmp2[0] = tail_e1;
1933 	      dt = (double) beta_i[1];
1934 	      {
1935 		/* Compute double-double = double-double * double. */
1936 		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1937 
1938 		con = head_sumB * split;
1939 		a11 = con - head_sumB;
1940 		a11 = con - a11;
1941 		a21 = head_sumB - a11;
1942 		con = dt * split;
1943 		b1 = con - dt;
1944 		b1 = con - b1;
1945 		b2 = dt - b1;
1946 
1947 		c11 = head_sumB * dt;
1948 		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1949 
1950 		c2 = tail_sumB * dt;
1951 		t1 = c11 + c2;
1952 		t2 = (c2 - (t1 - c11)) + c21;
1953 
1954 		head_e1 = t1 + t2;
1955 		tail_e1 = t2 - (head_e1 - t1);
1956 	      }
1957 	      head_tmp2[1] = head_e1;
1958 	      tail_tmp2[1] = tail_e1;
1959 	    }
1960 	    {
1961 	      double head_t, tail_t;
1962 	      double head_a, tail_a;
1963 	      double head_b, tail_b;
1964 	      /* Real part */
1965 	      head_a = head_tmp1[0];
1966 	      tail_a = tail_tmp1[0];
1967 	      head_b = head_tmp2[0];
1968 	      tail_b = tail_tmp2[0];
1969 	      {
1970 		/* Compute double-double = double-double + double-double. */
1971 		double bv;
1972 		double s1, s2, t1, t2;
1973 
1974 		/* Add two hi words. */
1975 		s1 = head_a + head_b;
1976 		bv = s1 - head_a;
1977 		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1978 
1979 		/* Add two lo words. */
1980 		t1 = tail_a + tail_b;
1981 		bv = t1 - tail_a;
1982 		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1983 
1984 		s2 += t1;
1985 
1986 		/* Renormalize (s1, s2)  to  (t1, s2) */
1987 		t1 = s1 + s2;
1988 		s2 = s2 - (t1 - s1);
1989 
1990 		t2 += s2;
1991 
1992 		/* Renormalize (t1, t2)  */
1993 		head_t = t1 + t2;
1994 		tail_t = t2 - (head_t - t1);
1995 	      }
1996 	      head_tmp1[0] = head_t;
1997 	      tail_tmp1[0] = tail_t;
1998 	      /* Imaginary part */
1999 	      head_a = head_tmp1[1];
2000 	      tail_a = tail_tmp1[1];
2001 	      head_b = head_tmp2[1];
2002 	      tail_b = tail_tmp2[1];
2003 	      {
2004 		/* Compute double-double = double-double + double-double. */
2005 		double bv;
2006 		double s1, s2, t1, t2;
2007 
2008 		/* Add two hi words. */
2009 		s1 = head_a + head_b;
2010 		bv = s1 - head_a;
2011 		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
2012 
2013 		/* Add two lo words. */
2014 		t1 = tail_a + tail_b;
2015 		bv = t1 - tail_a;
2016 		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
2017 
2018 		s2 += t1;
2019 
2020 		/* Renormalize (s1, s2)  to  (t1, s2) */
2021 		t1 = s1 + s2;
2022 		s2 = s2 - (t1 - s1);
2023 
2024 		t2 += s2;
2025 
2026 		/* Renormalize (t1, t2)  */
2027 		head_t = t1 + t2;
2028 		tail_t = t2 - (head_t - t1);
2029 	      }
2030 	      head_tmp1[1] = head_t;
2031 	      tail_tmp1[1] = tail_t;
2032 	    }
2033 	    y_i[yi] = head_tmp1[0];
2034 	    y_i[yi + 1] = head_tmp1[1];
2035 	    ai += incai;
2036 	    bi += incbi;
2037 	  }
2038 	}
2039       }
2040       FPU_FIX_STOP;
2041     }
2042     break;
2043 
2044   default:
2045     {
2046       BLAS_error(routine_name, -14, prec, 0);
2047     }
2048     break;
2049   }
2050 
2051 }				/* end BLAS_cge_sum_mv_s_s */
2052