1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_csbmv_c_s_x(enum blas_order_type order,enum blas_uplo_type uplo,int n,int k,const void * alpha,const void * a,int lda,const float * x,int incx,const void * beta,void * y,int incy,enum blas_prec_type prec)3 void BLAS_csbmv_c_s_x(enum blas_order_type order, enum blas_uplo_type uplo,
4 int n, int k, const void *alpha, const void *a, int lda,
5 const float *x, int incx, const void *beta,
6 void *y, int incy, enum blas_prec_type prec)
7
8 /*
9 * Purpose
10 * =======
11 *
12 * This routines computes the matrix product:
13 *
14 * y <- alpha * A * x + beta * y
15 *
16 * where A is a symmetric band matrix.
17 *
18 * Arguments
19 * =========
20 *
21 * order (input) enum blas_order_type
22 * Storage format of input symmetric matrix A.
23 *
24 * uplo (input) enum blas_uplo_type
25 * Determines which half of matrix A (upper or lower triangle)
26 * is accessed.
27 *
28 * n (input) int
29 * Dimension of A and size of vectors x, y.
30 *
31 * k (input) int
32 * Number of subdiagonals ( = number of superdiagonals)
33 *
34 * alpha (input) const void*
35 *
36 * a (input) void*
37 * Matrix A.
38 *
39 * lda (input) int
40 * Leading dimension of matrix A.
41 *
42 * x (input) float*
43 * Vector x.
44 *
45 * incx (input) int
46 * Stride for vector x.
47 *
48 * beta (input) const void*
49 *
50 * y (input/output) void*
51 * Vector y.
52 *
53 * incy (input) int
54 * Stride for vector y.
55 *
56 * prec (input) enum blas_prec_type
57 * Specifies the internal precision to be used.
58 * = blas_prec_single: single precision.
59 * = blas_prec_double: double precision.
60 * = blas_prec_extra : anything at least 1.5 times as accurate
61 * than double, and wider than 80-bits.
62 * We use double-double in our implementation.
63 *
64 *
65 * Notes on storing a symmetric band matrix:
66 *
67 * Integers in the below arrays represent values of
68 * type complex float.
69 *
70 * if we have a symettric matrix:
71 *
72 * 1 2 3 0 0
73 * 2 4 5 6 0
74 * 3 5 7 8 9
75 * 0 6 8 10 11
76 * 0 0 9 11 12
77 *
78 * This matrix has n == 5, and k == 2. It can be stored in the
79 * following ways:
80 *
81 * Notes for the examples:
82 * Each column below represents a contigous vector.
83 * Columns are strided by lda.
84 * An asterisk (*) represents a position in the
85 * matrix that is not used.
86 * Note that the minimum lda (size of column) is 3 (k+1).
87 * lda may be arbitrarily large; an lda > 3 would mean
88 * there would be unused data at the bottom of the below
89 * columns.
90 *
91 * blas_colmajor and blas_upper:
92 * * * 3 6 9
93 * * 2 5 8 11
94 * 1 4 7 10 12
95 *
96 *
97 * blas_colmajor and blas_lower
98 * 1 4 7 10 12
99 * 2 5 8 11 *
100 * 3 6 9 * *
101 *
102 *
103 * blas_rowmajor and blas_upper
104 * Columns here also represent contigous arrays.
105 * 1 4 7 10 12
106 * 2 5 8 11 *
107 * 3 6 9 * *
108 *
109 *
110 * blas_rowmajor and blas_lower
111 * Columns here also represent contigous arrays.
112 * * * 3 6 9
113 * * 2 5 8 11
114 * 1 4 7 10 12
115 *
116 */
117 {
118 static const char routine_name[] = "BLAS_csbmv_c_s_x";
119 switch (prec) {
120
121 case blas_prec_single:{
122
123 /* Integer Index Variables */
124 int i, j;
125 int xi, yi;
126 int aij, astarti, x_starti, y_starti;
127 int incaij, incaij2;
128 int n_i;
129 int maxj_first, maxj_second;
130
131 /* Input Matrices */
132 const float *a_i = (float *) a;
133 const float *x_i = x;
134
135 /* Output Vector */
136 float *y_i = (float *) y;
137
138 /* Input Scalars */
139 float *alpha_i = (float *) alpha;
140 float *beta_i = (float *) beta;
141
142 /* Temporary Floating-Point Variables */
143 float a_elem[2];
144 float x_elem;
145 float y_elem[2];
146 float prod[2];
147 float sum[2];
148 float tmp1[2];
149 float tmp2[2];
150
151
152 /* Test for no-op */
153 if (n <= 0) {
154 return;
155 }
156 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
157 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
158 return;
159 }
160
161 /* Check for error conditions. */
162 if (order != blas_colmajor && order != blas_rowmajor) {
163 BLAS_error(routine_name, -1, order, 0);
164 }
165 if (uplo != blas_upper && uplo != blas_lower) {
166 BLAS_error(routine_name, -2, uplo, 0);
167 }
168 if (n < 0) {
169 BLAS_error(routine_name, -3, n, 0);
170 }
171 if (k < 0 || k > n) {
172 BLAS_error(routine_name, -4, k, 0);
173 }
174 if ((lda < k + 1) || (lda < 1)) {
175 BLAS_error(routine_name, -7, lda, 0);
176 }
177 if (incx == 0) {
178 BLAS_error(routine_name, -9, incx, 0);
179 }
180 if (incy == 0) {
181 BLAS_error(routine_name, -12, incy, 0);
182 }
183
184 /* Set Index Parameters */
185 n_i = n;
186
187 if (((uplo == blas_upper) && (order == blas_colmajor)) ||
188 ((uplo == blas_lower) && (order == blas_rowmajor))) {
189 incaij = 1; /* increment in first loop */
190 incaij2 = lda - 1; /* increment in second loop */
191 astarti = k; /* does not start on zero element */
192 } else {
193 incaij = lda - 1;
194 incaij2 = 1;
195 astarti = 0; /* start on first element of array */
196 }
197 /* Adjustment to increments (if any) */
198 incy *= 2;
199 astarti *= 2;
200 incaij *= 2;
201 incaij2 *= 2;
202 if (incx < 0) {
203 x_starti = (-n_i + 1) * incx;
204 } else {
205 x_starti = 0;
206 }
207 if (incy < 0) {
208 y_starti = (-n_i + 1) * incy;
209 } else {
210 y_starti = 0;
211 }
212
213
214
215 /* alpha = 0. In this case, just return beta * y */
216 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
217 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
218 y_elem[0] = y_i[yi];
219 y_elem[1] = y_i[yi + 1];
220 {
221 tmp1[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
222 tmp1[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
223 }
224
225 y_i[yi] = tmp1[0];
226 y_i[yi + 1] = tmp1[1];
227 }
228 } else {
229 /* determine the loop interation counts */
230 /* number of elements done in first loop
231 (this will increase by one over each column up to a limit) */
232 maxj_first = 0;
233 /* number of elements done in second loop the first time */
234 maxj_second = MIN(k + 1, n_i);
235
236 /* Case alpha == 1. */
237 if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
238
239 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
240 /* Case alpha = 1, beta = 0. We compute y <--- A * x */
241 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
242 sum[0] = sum[1] = 0.0;
243 for (j = 0, aij = astarti, xi = x_starti;
244 j < maxj_first; j++, aij += incaij, xi += incx) {
245 a_elem[0] = a_i[aij];
246 a_elem[1] = a_i[aij + 1];
247 x_elem = x_i[xi];
248 {
249 prod[0] = a_elem[0] * x_elem;
250 prod[1] = a_elem[1] * x_elem;
251 }
252 sum[0] = sum[0] + prod[0];
253 sum[1] = sum[1] + prod[1];
254 }
255 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
256 a_elem[0] = a_i[aij];
257 a_elem[1] = a_i[aij + 1];
258 x_elem = x_i[xi];
259 {
260 prod[0] = a_elem[0] * x_elem;
261 prod[1] = a_elem[1] * x_elem;
262 }
263 sum[0] = sum[0] + prod[0];
264 sum[1] = sum[1] + prod[1];
265 }
266 y_i[yi] = sum[0];
267 y_i[yi + 1] = sum[1];
268 if (i + 1 >= (n_i - k))
269 maxj_second--;
270 if (i >= k) {
271 astarti += (incaij + incaij2);
272 x_starti += incx;
273 } else {
274 maxj_first++;
275 astarti += incaij2;
276 }
277 }
278 } else {
279 /* Case alpha = 1, but beta != 0.
280 We compute y <--- A * x + beta * y */
281 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
282 sum[0] = sum[1] = 0.0;
283
284 for (j = 0, aij = astarti, xi = x_starti;
285 j < maxj_first; j++, aij += incaij, xi += incx) {
286 a_elem[0] = a_i[aij];
287 a_elem[1] = a_i[aij + 1];
288 x_elem = x_i[xi];
289 {
290 prod[0] = a_elem[0] * x_elem;
291 prod[1] = a_elem[1] * x_elem;
292 }
293 sum[0] = sum[0] + prod[0];
294 sum[1] = sum[1] + prod[1];
295 }
296 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
297 a_elem[0] = a_i[aij];
298 a_elem[1] = a_i[aij + 1];
299 x_elem = x_i[xi];
300 {
301 prod[0] = a_elem[0] * x_elem;
302 prod[1] = a_elem[1] * x_elem;
303 }
304 sum[0] = sum[0] + prod[0];
305 sum[1] = sum[1] + prod[1];
306 }
307 y_elem[0] = y_i[yi];
308 y_elem[1] = y_i[yi + 1];
309 {
310 tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
311 tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
312 }
313
314 tmp1[0] = sum[0];
315 tmp1[1] = sum[1];
316 tmp1[0] = tmp2[0] + tmp1[0];
317 tmp1[1] = tmp2[1] + tmp1[1];
318 y_i[yi] = tmp1[0];
319 y_i[yi + 1] = tmp1[1];
320 if (i + 1 >= (n_i - k))
321 maxj_second--;
322 if (i >= k) {
323 astarti += (incaij + incaij2);
324 x_starti += incx;
325 } else {
326 maxj_first++;
327 astarti += incaij2;
328 }
329 }
330 }
331 } else {
332 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
333 /* Case alpha != 1, but beta == 0.
334 We compute y <--- A * x * a */
335 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
336 sum[0] = sum[1] = 0.0;
337
338 for (j = 0, aij = astarti, xi = x_starti;
339 j < maxj_first; j++, aij += incaij, xi += incx) {
340 a_elem[0] = a_i[aij];
341 a_elem[1] = a_i[aij + 1];
342 x_elem = x_i[xi];
343 {
344 prod[0] = a_elem[0] * x_elem;
345 prod[1] = a_elem[1] * x_elem;
346 }
347 sum[0] = sum[0] + prod[0];
348 sum[1] = sum[1] + prod[1];
349 }
350 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
351 a_elem[0] = a_i[aij];
352 a_elem[1] = a_i[aij + 1];
353 x_elem = x_i[xi];
354 {
355 prod[0] = a_elem[0] * x_elem;
356 prod[1] = a_elem[1] * x_elem;
357 }
358 sum[0] = sum[0] + prod[0];
359 sum[1] = sum[1] + prod[1];
360 }
361 y_elem[0] = y_i[yi];
362 y_elem[1] = y_i[yi + 1];
363 {
364 tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
365 tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
366 }
367
368 y_i[yi] = tmp1[0];
369 y_i[yi + 1] = tmp1[1];
370 if (i + 1 >= (n_i - k))
371 maxj_second--;
372 if (i >= k) {
373 astarti += (incaij + incaij2);
374 x_starti += incx;
375 } else {
376 maxj_first++;
377 astarti += incaij2;
378 }
379 }
380 } else {
381 /* The most general form, y <--- alpha * A * x + beta * y */
382 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
383 sum[0] = sum[1] = 0.0;
384
385 for (j = 0, aij = astarti, xi = x_starti;
386 j < maxj_first; j++, aij += incaij, xi += incx) {
387 a_elem[0] = a_i[aij];
388 a_elem[1] = a_i[aij + 1];
389 x_elem = x_i[xi];
390 {
391 prod[0] = a_elem[0] * x_elem;
392 prod[1] = a_elem[1] * x_elem;
393 }
394 sum[0] = sum[0] + prod[0];
395 sum[1] = sum[1] + prod[1];
396 }
397 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
398 a_elem[0] = a_i[aij];
399 a_elem[1] = a_i[aij + 1];
400 x_elem = x_i[xi];
401 {
402 prod[0] = a_elem[0] * x_elem;
403 prod[1] = a_elem[1] * x_elem;
404 }
405 sum[0] = sum[0] + prod[0];
406 sum[1] = sum[1] + prod[1];
407 }
408 y_elem[0] = y_i[yi];
409 y_elem[1] = y_i[yi + 1];
410 {
411 tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
412 tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
413 }
414
415 {
416 tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
417 tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
418 }
419
420 tmp1[0] = tmp2[0] + tmp1[0];
421 tmp1[1] = tmp2[1] + tmp1[1];
422 y_i[yi] = tmp1[0];
423 y_i[yi + 1] = tmp1[1];
424 if (i + 1 >= (n_i - k))
425 maxj_second--;
426 if (i >= k) {
427 astarti += (incaij + incaij2);
428 x_starti += incx;
429 } else {
430 maxj_first++;
431 astarti += incaij2;
432 }
433 }
434 }
435 }
436 }
437
438
439 break;
440 }
441 case blas_prec_indigenous:
442 case blas_prec_double:{
443
444 /* Integer Index Variables */
445 int i, j;
446 int xi, yi;
447 int aij, astarti, x_starti, y_starti;
448 int incaij, incaij2;
449 int n_i;
450 int maxj_first, maxj_second;
451
452 /* Input Matrices */
453 const float *a_i = (float *) a;
454 const float *x_i = x;
455
456 /* Output Vector */
457 float *y_i = (float *) y;
458
459 /* Input Scalars */
460 float *alpha_i = (float *) alpha;
461 float *beta_i = (float *) beta;
462
463 /* Temporary Floating-Point Variables */
464 float a_elem[2];
465 float x_elem;
466 float y_elem[2];
467 double prod[2];
468 double sum[2];
469 double tmp1[2];
470 double tmp2[2];
471
472
473 /* Test for no-op */
474 if (n <= 0) {
475 return;
476 }
477 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
478 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
479 return;
480 }
481
482 /* Check for error conditions. */
483 if (order != blas_colmajor && order != blas_rowmajor) {
484 BLAS_error(routine_name, -1, order, 0);
485 }
486 if (uplo != blas_upper && uplo != blas_lower) {
487 BLAS_error(routine_name, -2, uplo, 0);
488 }
489 if (n < 0) {
490 BLAS_error(routine_name, -3, n, 0);
491 }
492 if (k < 0 || k > n) {
493 BLAS_error(routine_name, -4, k, 0);
494 }
495 if ((lda < k + 1) || (lda < 1)) {
496 BLAS_error(routine_name, -7, lda, 0);
497 }
498 if (incx == 0) {
499 BLAS_error(routine_name, -9, incx, 0);
500 }
501 if (incy == 0) {
502 BLAS_error(routine_name, -12, incy, 0);
503 }
504
505 /* Set Index Parameters */
506 n_i = n;
507
508 if (((uplo == blas_upper) && (order == blas_colmajor)) ||
509 ((uplo == blas_lower) && (order == blas_rowmajor))) {
510 incaij = 1; /* increment in first loop */
511 incaij2 = lda - 1; /* increment in second loop */
512 astarti = k; /* does not start on zero element */
513 } else {
514 incaij = lda - 1;
515 incaij2 = 1;
516 astarti = 0; /* start on first element of array */
517 }
518 /* Adjustment to increments (if any) */
519 incy *= 2;
520 astarti *= 2;
521 incaij *= 2;
522 incaij2 *= 2;
523 if (incx < 0) {
524 x_starti = (-n_i + 1) * incx;
525 } else {
526 x_starti = 0;
527 }
528 if (incy < 0) {
529 y_starti = (-n_i + 1) * incy;
530 } else {
531 y_starti = 0;
532 }
533
534
535
536 /* alpha = 0. In this case, just return beta * y */
537 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
538 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
539 y_elem[0] = y_i[yi];
540 y_elem[1] = y_i[yi + 1];
541 {
542 tmp1[0] =
543 (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
544 tmp1[1] =
545 (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
546 }
547 y_i[yi] = tmp1[0];
548 y_i[yi + 1] = tmp1[1];
549 }
550 } else {
551 /* determine the loop interation counts */
552 /* number of elements done in first loop
553 (this will increase by one over each column up to a limit) */
554 maxj_first = 0;
555 /* number of elements done in second loop the first time */
556 maxj_second = MIN(k + 1, n_i);
557
558 /* Case alpha == 1. */
559 if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
560
561 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
562 /* Case alpha = 1, beta = 0. We compute y <--- A * x */
563 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
564 sum[0] = sum[1] = 0.0;
565 for (j = 0, aij = astarti, xi = x_starti;
566 j < maxj_first; j++, aij += incaij, xi += incx) {
567 a_elem[0] = a_i[aij];
568 a_elem[1] = a_i[aij + 1];
569 x_elem = x_i[xi];
570 {
571 prod[0] = (double) a_elem[0] * x_elem;
572 prod[1] = (double) a_elem[1] * x_elem;
573 }
574 sum[0] = sum[0] + prod[0];
575 sum[1] = sum[1] + prod[1];
576 }
577 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
578 a_elem[0] = a_i[aij];
579 a_elem[1] = a_i[aij + 1];
580 x_elem = x_i[xi];
581 {
582 prod[0] = (double) a_elem[0] * x_elem;
583 prod[1] = (double) a_elem[1] * x_elem;
584 }
585 sum[0] = sum[0] + prod[0];
586 sum[1] = sum[1] + prod[1];
587 }
588 y_i[yi] = sum[0];
589 y_i[yi + 1] = sum[1];
590 if (i + 1 >= (n_i - k))
591 maxj_second--;
592 if (i >= k) {
593 astarti += (incaij + incaij2);
594 x_starti += incx;
595 } else {
596 maxj_first++;
597 astarti += incaij2;
598 }
599 }
600 } else {
601 /* Case alpha = 1, but beta != 0.
602 We compute y <--- A * x + beta * y */
603 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
604 sum[0] = sum[1] = 0.0;
605
606 for (j = 0, aij = astarti, xi = x_starti;
607 j < maxj_first; j++, aij += incaij, xi += incx) {
608 a_elem[0] = a_i[aij];
609 a_elem[1] = a_i[aij + 1];
610 x_elem = x_i[xi];
611 {
612 prod[0] = (double) a_elem[0] * x_elem;
613 prod[1] = (double) a_elem[1] * x_elem;
614 }
615 sum[0] = sum[0] + prod[0];
616 sum[1] = sum[1] + prod[1];
617 }
618 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
619 a_elem[0] = a_i[aij];
620 a_elem[1] = a_i[aij + 1];
621 x_elem = x_i[xi];
622 {
623 prod[0] = (double) a_elem[0] * x_elem;
624 prod[1] = (double) a_elem[1] * x_elem;
625 }
626 sum[0] = sum[0] + prod[0];
627 sum[1] = sum[1] + prod[1];
628 }
629 y_elem[0] = y_i[yi];
630 y_elem[1] = y_i[yi + 1];
631 {
632 tmp2[0] =
633 (double) y_elem[0] * beta_i[0] -
634 (double) y_elem[1] * beta_i[1];
635 tmp2[1] =
636 (double) y_elem[0] * beta_i[1] +
637 (double) y_elem[1] * beta_i[0];
638 }
639 tmp1[0] = sum[0];
640 tmp1[1] = sum[1];
641 tmp1[0] = tmp2[0] + tmp1[0];
642 tmp1[1] = tmp2[1] + tmp1[1];
643 y_i[yi] = tmp1[0];
644 y_i[yi + 1] = tmp1[1];
645 if (i + 1 >= (n_i - k))
646 maxj_second--;
647 if (i >= k) {
648 astarti += (incaij + incaij2);
649 x_starti += incx;
650 } else {
651 maxj_first++;
652 astarti += incaij2;
653 }
654 }
655 }
656 } else {
657 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
658 /* Case alpha != 1, but beta == 0.
659 We compute y <--- A * x * a */
660 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
661 sum[0] = sum[1] = 0.0;
662
663 for (j = 0, aij = astarti, xi = x_starti;
664 j < maxj_first; j++, aij += incaij, xi += incx) {
665 a_elem[0] = a_i[aij];
666 a_elem[1] = a_i[aij + 1];
667 x_elem = x_i[xi];
668 {
669 prod[0] = (double) a_elem[0] * x_elem;
670 prod[1] = (double) a_elem[1] * x_elem;
671 }
672 sum[0] = sum[0] + prod[0];
673 sum[1] = sum[1] + prod[1];
674 }
675 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
676 a_elem[0] = a_i[aij];
677 a_elem[1] = a_i[aij + 1];
678 x_elem = x_i[xi];
679 {
680 prod[0] = (double) a_elem[0] * x_elem;
681 prod[1] = (double) a_elem[1] * x_elem;
682 }
683 sum[0] = sum[0] + prod[0];
684 sum[1] = sum[1] + prod[1];
685 }
686 y_elem[0] = y_i[yi];
687 y_elem[1] = y_i[yi + 1];
688 {
689 tmp1[0] =
690 (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
691 tmp1[1] =
692 (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
693 }
694 y_i[yi] = tmp1[0];
695 y_i[yi + 1] = tmp1[1];
696 if (i + 1 >= (n_i - k))
697 maxj_second--;
698 if (i >= k) {
699 astarti += (incaij + incaij2);
700 x_starti += incx;
701 } else {
702 maxj_first++;
703 astarti += incaij2;
704 }
705 }
706 } else {
707 /* The most general form, y <--- alpha * A * x + beta * y */
708 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
709 sum[0] = sum[1] = 0.0;
710
711 for (j = 0, aij = astarti, xi = x_starti;
712 j < maxj_first; j++, aij += incaij, xi += incx) {
713 a_elem[0] = a_i[aij];
714 a_elem[1] = a_i[aij + 1];
715 x_elem = x_i[xi];
716 {
717 prod[0] = (double) a_elem[0] * x_elem;
718 prod[1] = (double) a_elem[1] * x_elem;
719 }
720 sum[0] = sum[0] + prod[0];
721 sum[1] = sum[1] + prod[1];
722 }
723 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
724 a_elem[0] = a_i[aij];
725 a_elem[1] = a_i[aij + 1];
726 x_elem = x_i[xi];
727 {
728 prod[0] = (double) a_elem[0] * x_elem;
729 prod[1] = (double) a_elem[1] * x_elem;
730 }
731 sum[0] = sum[0] + prod[0];
732 sum[1] = sum[1] + prod[1];
733 }
734 y_elem[0] = y_i[yi];
735 y_elem[1] = y_i[yi + 1];
736 {
737 tmp2[0] =
738 (double) y_elem[0] * beta_i[0] -
739 (double) y_elem[1] * beta_i[1];
740 tmp2[1] =
741 (double) y_elem[0] * beta_i[1] +
742 (double) y_elem[1] * beta_i[0];
743 }
744 {
745 tmp1[0] =
746 (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
747 tmp1[1] =
748 (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
749 }
750 tmp1[0] = tmp2[0] + tmp1[0];
751 tmp1[1] = tmp2[1] + tmp1[1];
752 y_i[yi] = tmp1[0];
753 y_i[yi + 1] = tmp1[1];
754 if (i + 1 >= (n_i - k))
755 maxj_second--;
756 if (i >= k) {
757 astarti += (incaij + incaij2);
758 x_starti += incx;
759 } else {
760 maxj_first++;
761 astarti += incaij2;
762 }
763 }
764 }
765 }
766 }
767
768
769 break;
770 }
771
772 case blas_prec_extra:{
773
774 /* Integer Index Variables */
775 int i, j;
776 int xi, yi;
777 int aij, astarti, x_starti, y_starti;
778 int incaij, incaij2;
779 int n_i;
780 int maxj_first, maxj_second;
781
782 /* Input Matrices */
783 const float *a_i = (float *) a;
784 const float *x_i = x;
785
786 /* Output Vector */
787 float *y_i = (float *) y;
788
789 /* Input Scalars */
790 float *alpha_i = (float *) alpha;
791 float *beta_i = (float *) beta;
792
793 /* Temporary Floating-Point Variables */
794 float a_elem[2];
795 float x_elem;
796 float y_elem[2];
797 double head_prod[2], tail_prod[2];
798 double head_sum[2], tail_sum[2];
799 double head_tmp1[2], tail_tmp1[2];
800 double head_tmp2[2], tail_tmp2[2];
801 FPU_FIX_DECL;
802
803 /* Test for no-op */
804 if (n <= 0) {
805 return;
806 }
807 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
808 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
809 return;
810 }
811
812 /* Check for error conditions. */
813 if (order != blas_colmajor && order != blas_rowmajor) {
814 BLAS_error(routine_name, -1, order, 0);
815 }
816 if (uplo != blas_upper && uplo != blas_lower) {
817 BLAS_error(routine_name, -2, uplo, 0);
818 }
819 if (n < 0) {
820 BLAS_error(routine_name, -3, n, 0);
821 }
822 if (k < 0 || k > n) {
823 BLAS_error(routine_name, -4, k, 0);
824 }
825 if ((lda < k + 1) || (lda < 1)) {
826 BLAS_error(routine_name, -7, lda, 0);
827 }
828 if (incx == 0) {
829 BLAS_error(routine_name, -9, incx, 0);
830 }
831 if (incy == 0) {
832 BLAS_error(routine_name, -12, incy, 0);
833 }
834
835 /* Set Index Parameters */
836 n_i = n;
837
838 if (((uplo == blas_upper) && (order == blas_colmajor)) ||
839 ((uplo == blas_lower) && (order == blas_rowmajor))) {
840 incaij = 1; /* increment in first loop */
841 incaij2 = lda - 1; /* increment in second loop */
842 astarti = k; /* does not start on zero element */
843 } else {
844 incaij = lda - 1;
845 incaij2 = 1;
846 astarti = 0; /* start on first element of array */
847 }
848 /* Adjustment to increments (if any) */
849 incy *= 2;
850 astarti *= 2;
851 incaij *= 2;
852 incaij2 *= 2;
853 if (incx < 0) {
854 x_starti = (-n_i + 1) * incx;
855 } else {
856 x_starti = 0;
857 }
858 if (incy < 0) {
859 y_starti = (-n_i + 1) * incy;
860 } else {
861 y_starti = 0;
862 }
863
864 FPU_FIX_START;
865
866 /* alpha = 0. In this case, just return beta * y */
867 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
868 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
869 y_elem[0] = y_i[yi];
870 y_elem[1] = y_i[yi + 1];
871 {
872 double head_e1, tail_e1;
873 double d1;
874 double d2;
875 /* Real part */
876 d1 = (double) y_elem[0] * beta_i[0];
877 d2 = (double) -y_elem[1] * beta_i[1];
878 {
879 /* Compute double-double = double + double. */
880 double e, t1, t2;
881
882 /* Knuth trick. */
883 t1 = d1 + d2;
884 e = t1 - d1;
885 t2 = ((d2 - e) + (d1 - (t1 - e)));
886
887 /* The result is t1 + t2, after normalization. */
888 head_e1 = t1 + t2;
889 tail_e1 = t2 - (head_e1 - t1);
890 }
891 head_tmp1[0] = head_e1;
892 tail_tmp1[0] = tail_e1;
893 /* imaginary part */
894 d1 = (double) y_elem[0] * beta_i[1];
895 d2 = (double) y_elem[1] * beta_i[0];
896 {
897 /* Compute double-double = double + double. */
898 double e, t1, t2;
899
900 /* Knuth trick. */
901 t1 = d1 + d2;
902 e = t1 - d1;
903 t2 = ((d2 - e) + (d1 - (t1 - e)));
904
905 /* The result is t1 + t2, after normalization. */
906 head_e1 = t1 + t2;
907 tail_e1 = t2 - (head_e1 - t1);
908 }
909 head_tmp1[1] = head_e1;
910 tail_tmp1[1] = tail_e1;
911 }
912 y_i[yi] = head_tmp1[0];
913 y_i[yi + 1] = head_tmp1[1];
914 }
915 } else {
916 /* determine the loop interation counts */
917 /* number of elements done in first loop
918 (this will increase by one over each column up to a limit) */
919 maxj_first = 0;
920 /* number of elements done in second loop the first time */
921 maxj_second = MIN(k + 1, n_i);
922
923 /* Case alpha == 1. */
924 if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
925
926 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
927 /* Case alpha = 1, beta = 0. We compute y <--- A * x */
928 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
929 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
930 for (j = 0, aij = astarti, xi = x_starti;
931 j < maxj_first; j++, aij += incaij, xi += incx) {
932 a_elem[0] = a_i[aij];
933 a_elem[1] = a_i[aij + 1];
934 x_elem = x_i[xi];
935 {
936 head_prod[0] = (double) a_elem[0] * x_elem;
937 tail_prod[0] = 0.0;
938 head_prod[1] = (double) a_elem[1] * x_elem;
939 tail_prod[1] = 0.0;
940 }
941 {
942 double head_t, tail_t;
943 double head_a, tail_a;
944 double head_b, tail_b;
945 /* Real part */
946 head_a = head_sum[0];
947 tail_a = tail_sum[0];
948 head_b = head_prod[0];
949 tail_b = tail_prod[0];
950 {
951 /* Compute double-double = double-double + double-double. */
952 double bv;
953 double s1, s2, t1, t2;
954
955 /* Add two hi words. */
956 s1 = head_a + head_b;
957 bv = s1 - head_a;
958 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
959
960 /* Add two lo words. */
961 t1 = tail_a + tail_b;
962 bv = t1 - tail_a;
963 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
964
965 s2 += t1;
966
967 /* Renormalize (s1, s2) to (t1, s2) */
968 t1 = s1 + s2;
969 s2 = s2 - (t1 - s1);
970
971 t2 += s2;
972
973 /* Renormalize (t1, t2) */
974 head_t = t1 + t2;
975 tail_t = t2 - (head_t - t1);
976 }
977 head_sum[0] = head_t;
978 tail_sum[0] = tail_t;
979 /* Imaginary part */
980 head_a = head_sum[1];
981 tail_a = tail_sum[1];
982 head_b = head_prod[1];
983 tail_b = tail_prod[1];
984 {
985 /* Compute double-double = double-double + double-double. */
986 double bv;
987 double s1, s2, t1, t2;
988
989 /* Add two hi words. */
990 s1 = head_a + head_b;
991 bv = s1 - head_a;
992 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
993
994 /* Add two lo words. */
995 t1 = tail_a + tail_b;
996 bv = t1 - tail_a;
997 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
998
999 s2 += t1;
1000
1001 /* Renormalize (s1, s2) to (t1, s2) */
1002 t1 = s1 + s2;
1003 s2 = s2 - (t1 - s1);
1004
1005 t2 += s2;
1006
1007 /* Renormalize (t1, t2) */
1008 head_t = t1 + t2;
1009 tail_t = t2 - (head_t - t1);
1010 }
1011 head_sum[1] = head_t;
1012 tail_sum[1] = tail_t;
1013 }
1014 }
1015 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
1016 a_elem[0] = a_i[aij];
1017 a_elem[1] = a_i[aij + 1];
1018 x_elem = x_i[xi];
1019 {
1020 head_prod[0] = (double) a_elem[0] * x_elem;
1021 tail_prod[0] = 0.0;
1022 head_prod[1] = (double) a_elem[1] * x_elem;
1023 tail_prod[1] = 0.0;
1024 }
1025 {
1026 double head_t, tail_t;
1027 double head_a, tail_a;
1028 double head_b, tail_b;
1029 /* Real part */
1030 head_a = head_sum[0];
1031 tail_a = tail_sum[0];
1032 head_b = head_prod[0];
1033 tail_b = tail_prod[0];
1034 {
1035 /* Compute double-double = double-double + double-double. */
1036 double bv;
1037 double s1, s2, t1, t2;
1038
1039 /* Add two hi words. */
1040 s1 = head_a + head_b;
1041 bv = s1 - head_a;
1042 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1043
1044 /* Add two lo words. */
1045 t1 = tail_a + tail_b;
1046 bv = t1 - tail_a;
1047 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1048
1049 s2 += t1;
1050
1051 /* Renormalize (s1, s2) to (t1, s2) */
1052 t1 = s1 + s2;
1053 s2 = s2 - (t1 - s1);
1054
1055 t2 += s2;
1056
1057 /* Renormalize (t1, t2) */
1058 head_t = t1 + t2;
1059 tail_t = t2 - (head_t - t1);
1060 }
1061 head_sum[0] = head_t;
1062 tail_sum[0] = tail_t;
1063 /* Imaginary part */
1064 head_a = head_sum[1];
1065 tail_a = tail_sum[1];
1066 head_b = head_prod[1];
1067 tail_b = tail_prod[1];
1068 {
1069 /* Compute double-double = double-double + double-double. */
1070 double bv;
1071 double s1, s2, t1, t2;
1072
1073 /* Add two hi words. */
1074 s1 = head_a + head_b;
1075 bv = s1 - head_a;
1076 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1077
1078 /* Add two lo words. */
1079 t1 = tail_a + tail_b;
1080 bv = t1 - tail_a;
1081 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1082
1083 s2 += t1;
1084
1085 /* Renormalize (s1, s2) to (t1, s2) */
1086 t1 = s1 + s2;
1087 s2 = s2 - (t1 - s1);
1088
1089 t2 += s2;
1090
1091 /* Renormalize (t1, t2) */
1092 head_t = t1 + t2;
1093 tail_t = t2 - (head_t - t1);
1094 }
1095 head_sum[1] = head_t;
1096 tail_sum[1] = tail_t;
1097 }
1098 }
1099 y_i[yi] = head_sum[0];
1100 y_i[yi + 1] = head_sum[1];
1101 if (i + 1 >= (n_i - k))
1102 maxj_second--;
1103 if (i >= k) {
1104 astarti += (incaij + incaij2);
1105 x_starti += incx;
1106 } else {
1107 maxj_first++;
1108 astarti += incaij2;
1109 }
1110 }
1111 } else {
1112 /* Case alpha = 1, but beta != 0.
1113 We compute y <--- A * x + beta * y */
1114 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
1115 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
1116
1117 for (j = 0, aij = astarti, xi = x_starti;
1118 j < maxj_first; j++, aij += incaij, xi += incx) {
1119 a_elem[0] = a_i[aij];
1120 a_elem[1] = a_i[aij + 1];
1121 x_elem = x_i[xi];
1122 {
1123 head_prod[0] = (double) a_elem[0] * x_elem;
1124 tail_prod[0] = 0.0;
1125 head_prod[1] = (double) a_elem[1] * x_elem;
1126 tail_prod[1] = 0.0;
1127 }
1128 {
1129 double head_t, tail_t;
1130 double head_a, tail_a;
1131 double head_b, tail_b;
1132 /* Real part */
1133 head_a = head_sum[0];
1134 tail_a = tail_sum[0];
1135 head_b = head_prod[0];
1136 tail_b = tail_prod[0];
1137 {
1138 /* Compute double-double = double-double + double-double. */
1139 double bv;
1140 double s1, s2, t1, t2;
1141
1142 /* Add two hi words. */
1143 s1 = head_a + head_b;
1144 bv = s1 - head_a;
1145 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1146
1147 /* Add two lo words. */
1148 t1 = tail_a + tail_b;
1149 bv = t1 - tail_a;
1150 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1151
1152 s2 += t1;
1153
1154 /* Renormalize (s1, s2) to (t1, s2) */
1155 t1 = s1 + s2;
1156 s2 = s2 - (t1 - s1);
1157
1158 t2 += s2;
1159
1160 /* Renormalize (t1, t2) */
1161 head_t = t1 + t2;
1162 tail_t = t2 - (head_t - t1);
1163 }
1164 head_sum[0] = head_t;
1165 tail_sum[0] = tail_t;
1166 /* Imaginary part */
1167 head_a = head_sum[1];
1168 tail_a = tail_sum[1];
1169 head_b = head_prod[1];
1170 tail_b = tail_prod[1];
1171 {
1172 /* Compute double-double = double-double + double-double. */
1173 double bv;
1174 double s1, s2, t1, t2;
1175
1176 /* Add two hi words. */
1177 s1 = head_a + head_b;
1178 bv = s1 - head_a;
1179 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1180
1181 /* Add two lo words. */
1182 t1 = tail_a + tail_b;
1183 bv = t1 - tail_a;
1184 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1185
1186 s2 += t1;
1187
1188 /* Renormalize (s1, s2) to (t1, s2) */
1189 t1 = s1 + s2;
1190 s2 = s2 - (t1 - s1);
1191
1192 t2 += s2;
1193
1194 /* Renormalize (t1, t2) */
1195 head_t = t1 + t2;
1196 tail_t = t2 - (head_t - t1);
1197 }
1198 head_sum[1] = head_t;
1199 tail_sum[1] = tail_t;
1200 }
1201 }
1202 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
1203 a_elem[0] = a_i[aij];
1204 a_elem[1] = a_i[aij + 1];
1205 x_elem = x_i[xi];
1206 {
1207 head_prod[0] = (double) a_elem[0] * x_elem;
1208 tail_prod[0] = 0.0;
1209 head_prod[1] = (double) a_elem[1] * x_elem;
1210 tail_prod[1] = 0.0;
1211 }
1212 {
1213 double head_t, tail_t;
1214 double head_a, tail_a;
1215 double head_b, tail_b;
1216 /* Real part */
1217 head_a = head_sum[0];
1218 tail_a = tail_sum[0];
1219 head_b = head_prod[0];
1220 tail_b = tail_prod[0];
1221 {
1222 /* Compute double-double = double-double + double-double. */
1223 double bv;
1224 double s1, s2, t1, t2;
1225
1226 /* Add two hi words. */
1227 s1 = head_a + head_b;
1228 bv = s1 - head_a;
1229 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1230
1231 /* Add two lo words. */
1232 t1 = tail_a + tail_b;
1233 bv = t1 - tail_a;
1234 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1235
1236 s2 += t1;
1237
1238 /* Renormalize (s1, s2) to (t1, s2) */
1239 t1 = s1 + s2;
1240 s2 = s2 - (t1 - s1);
1241
1242 t2 += s2;
1243
1244 /* Renormalize (t1, t2) */
1245 head_t = t1 + t2;
1246 tail_t = t2 - (head_t - t1);
1247 }
1248 head_sum[0] = head_t;
1249 tail_sum[0] = tail_t;
1250 /* Imaginary part */
1251 head_a = head_sum[1];
1252 tail_a = tail_sum[1];
1253 head_b = head_prod[1];
1254 tail_b = tail_prod[1];
1255 {
1256 /* Compute double-double = double-double + double-double. */
1257 double bv;
1258 double s1, s2, t1, t2;
1259
1260 /* Add two hi words. */
1261 s1 = head_a + head_b;
1262 bv = s1 - head_a;
1263 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1264
1265 /* Add two lo words. */
1266 t1 = tail_a + tail_b;
1267 bv = t1 - tail_a;
1268 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1269
1270 s2 += t1;
1271
1272 /* Renormalize (s1, s2) to (t1, s2) */
1273 t1 = s1 + s2;
1274 s2 = s2 - (t1 - s1);
1275
1276 t2 += s2;
1277
1278 /* Renormalize (t1, t2) */
1279 head_t = t1 + t2;
1280 tail_t = t2 - (head_t - t1);
1281 }
1282 head_sum[1] = head_t;
1283 tail_sum[1] = tail_t;
1284 }
1285 }
1286 y_elem[0] = y_i[yi];
1287 y_elem[1] = y_i[yi + 1];
1288 {
1289 double head_e1, tail_e1;
1290 double d1;
1291 double d2;
1292 /* Real part */
1293 d1 = (double) y_elem[0] * beta_i[0];
1294 d2 = (double) -y_elem[1] * beta_i[1];
1295 {
1296 /* Compute double-double = double + double. */
1297 double e, t1, t2;
1298
1299 /* Knuth trick. */
1300 t1 = d1 + d2;
1301 e = t1 - d1;
1302 t2 = ((d2 - e) + (d1 - (t1 - e)));
1303
1304 /* The result is t1 + t2, after normalization. */
1305 head_e1 = t1 + t2;
1306 tail_e1 = t2 - (head_e1 - t1);
1307 }
1308 head_tmp2[0] = head_e1;
1309 tail_tmp2[0] = tail_e1;
1310 /* imaginary part */
1311 d1 = (double) y_elem[0] * beta_i[1];
1312 d2 = (double) y_elem[1] * beta_i[0];
1313 {
1314 /* Compute double-double = double + double. */
1315 double e, t1, t2;
1316
1317 /* Knuth trick. */
1318 t1 = d1 + d2;
1319 e = t1 - d1;
1320 t2 = ((d2 - e) + (d1 - (t1 - e)));
1321
1322 /* The result is t1 + t2, after normalization. */
1323 head_e1 = t1 + t2;
1324 tail_e1 = t2 - (head_e1 - t1);
1325 }
1326 head_tmp2[1] = head_e1;
1327 tail_tmp2[1] = tail_e1;
1328 }
1329 head_tmp1[0] = head_sum[0];
1330 tail_tmp1[0] = tail_sum[0];
1331 head_tmp1[1] = head_sum[1];
1332 tail_tmp1[1] = tail_sum[1];
1333 {
1334 double head_t, tail_t;
1335 double head_a, tail_a;
1336 double head_b, tail_b;
1337 /* Real part */
1338 head_a = head_tmp2[0];
1339 tail_a = tail_tmp2[0];
1340 head_b = head_tmp1[0];
1341 tail_b = tail_tmp1[0];
1342 {
1343 /* Compute double-double = double-double + double-double. */
1344 double bv;
1345 double s1, s2, t1, t2;
1346
1347 /* Add two hi words. */
1348 s1 = head_a + head_b;
1349 bv = s1 - head_a;
1350 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1351
1352 /* Add two lo words. */
1353 t1 = tail_a + tail_b;
1354 bv = t1 - tail_a;
1355 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1356
1357 s2 += t1;
1358
1359 /* Renormalize (s1, s2) to (t1, s2) */
1360 t1 = s1 + s2;
1361 s2 = s2 - (t1 - s1);
1362
1363 t2 += s2;
1364
1365 /* Renormalize (t1, t2) */
1366 head_t = t1 + t2;
1367 tail_t = t2 - (head_t - t1);
1368 }
1369 head_tmp1[0] = head_t;
1370 tail_tmp1[0] = tail_t;
1371 /* Imaginary part */
1372 head_a = head_tmp2[1];
1373 tail_a = tail_tmp2[1];
1374 head_b = head_tmp1[1];
1375 tail_b = tail_tmp1[1];
1376 {
1377 /* Compute double-double = double-double + double-double. */
1378 double bv;
1379 double s1, s2, t1, t2;
1380
1381 /* Add two hi words. */
1382 s1 = head_a + head_b;
1383 bv = s1 - head_a;
1384 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1385
1386 /* Add two lo words. */
1387 t1 = tail_a + tail_b;
1388 bv = t1 - tail_a;
1389 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1390
1391 s2 += t1;
1392
1393 /* Renormalize (s1, s2) to (t1, s2) */
1394 t1 = s1 + s2;
1395 s2 = s2 - (t1 - s1);
1396
1397 t2 += s2;
1398
1399 /* Renormalize (t1, t2) */
1400 head_t = t1 + t2;
1401 tail_t = t2 - (head_t - t1);
1402 }
1403 head_tmp1[1] = head_t;
1404 tail_tmp1[1] = tail_t;
1405 }
1406 y_i[yi] = head_tmp1[0];
1407 y_i[yi + 1] = head_tmp1[1];
1408 if (i + 1 >= (n_i - k))
1409 maxj_second--;
1410 if (i >= k) {
1411 astarti += (incaij + incaij2);
1412 x_starti += incx;
1413 } else {
1414 maxj_first++;
1415 astarti += incaij2;
1416 }
1417 }
1418 }
1419 } else {
1420 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
1421 /* Case alpha != 1, but beta == 0.
1422 We compute y <--- A * x * a */
1423 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
1424 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
1425
1426 for (j = 0, aij = astarti, xi = x_starti;
1427 j < maxj_first; j++, aij += incaij, xi += incx) {
1428 a_elem[0] = a_i[aij];
1429 a_elem[1] = a_i[aij + 1];
1430 x_elem = x_i[xi];
1431 {
1432 head_prod[0] = (double) a_elem[0] * x_elem;
1433 tail_prod[0] = 0.0;
1434 head_prod[1] = (double) a_elem[1] * x_elem;
1435 tail_prod[1] = 0.0;
1436 }
1437 {
1438 double head_t, tail_t;
1439 double head_a, tail_a;
1440 double head_b, tail_b;
1441 /* Real part */
1442 head_a = head_sum[0];
1443 tail_a = tail_sum[0];
1444 head_b = head_prod[0];
1445 tail_b = tail_prod[0];
1446 {
1447 /* Compute double-double = double-double + double-double. */
1448 double bv;
1449 double s1, s2, t1, t2;
1450
1451 /* Add two hi words. */
1452 s1 = head_a + head_b;
1453 bv = s1 - head_a;
1454 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1455
1456 /* Add two lo words. */
1457 t1 = tail_a + tail_b;
1458 bv = t1 - tail_a;
1459 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1460
1461 s2 += t1;
1462
1463 /* Renormalize (s1, s2) to (t1, s2) */
1464 t1 = s1 + s2;
1465 s2 = s2 - (t1 - s1);
1466
1467 t2 += s2;
1468
1469 /* Renormalize (t1, t2) */
1470 head_t = t1 + t2;
1471 tail_t = t2 - (head_t - t1);
1472 }
1473 head_sum[0] = head_t;
1474 tail_sum[0] = tail_t;
1475 /* Imaginary part */
1476 head_a = head_sum[1];
1477 tail_a = tail_sum[1];
1478 head_b = head_prod[1];
1479 tail_b = tail_prod[1];
1480 {
1481 /* Compute double-double = double-double + double-double. */
1482 double bv;
1483 double s1, s2, t1, t2;
1484
1485 /* Add two hi words. */
1486 s1 = head_a + head_b;
1487 bv = s1 - head_a;
1488 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1489
1490 /* Add two lo words. */
1491 t1 = tail_a + tail_b;
1492 bv = t1 - tail_a;
1493 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1494
1495 s2 += t1;
1496
1497 /* Renormalize (s1, s2) to (t1, s2) */
1498 t1 = s1 + s2;
1499 s2 = s2 - (t1 - s1);
1500
1501 t2 += s2;
1502
1503 /* Renormalize (t1, t2) */
1504 head_t = t1 + t2;
1505 tail_t = t2 - (head_t - t1);
1506 }
1507 head_sum[1] = head_t;
1508 tail_sum[1] = tail_t;
1509 }
1510 }
1511 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
1512 a_elem[0] = a_i[aij];
1513 a_elem[1] = a_i[aij + 1];
1514 x_elem = x_i[xi];
1515 {
1516 head_prod[0] = (double) a_elem[0] * x_elem;
1517 tail_prod[0] = 0.0;
1518 head_prod[1] = (double) a_elem[1] * x_elem;
1519 tail_prod[1] = 0.0;
1520 }
1521 {
1522 double head_t, tail_t;
1523 double head_a, tail_a;
1524 double head_b, tail_b;
1525 /* Real part */
1526 head_a = head_sum[0];
1527 tail_a = tail_sum[0];
1528 head_b = head_prod[0];
1529 tail_b = tail_prod[0];
1530 {
1531 /* Compute double-double = double-double + double-double. */
1532 double bv;
1533 double s1, s2, t1, t2;
1534
1535 /* Add two hi words. */
1536 s1 = head_a + head_b;
1537 bv = s1 - head_a;
1538 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1539
1540 /* Add two lo words. */
1541 t1 = tail_a + tail_b;
1542 bv = t1 - tail_a;
1543 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1544
1545 s2 += t1;
1546
1547 /* Renormalize (s1, s2) to (t1, s2) */
1548 t1 = s1 + s2;
1549 s2 = s2 - (t1 - s1);
1550
1551 t2 += s2;
1552
1553 /* Renormalize (t1, t2) */
1554 head_t = t1 + t2;
1555 tail_t = t2 - (head_t - t1);
1556 }
1557 head_sum[0] = head_t;
1558 tail_sum[0] = tail_t;
1559 /* Imaginary part */
1560 head_a = head_sum[1];
1561 tail_a = tail_sum[1];
1562 head_b = head_prod[1];
1563 tail_b = tail_prod[1];
1564 {
1565 /* Compute double-double = double-double + double-double. */
1566 double bv;
1567 double s1, s2, t1, t2;
1568
1569 /* Add two hi words. */
1570 s1 = head_a + head_b;
1571 bv = s1 - head_a;
1572 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1573
1574 /* Add two lo words. */
1575 t1 = tail_a + tail_b;
1576 bv = t1 - tail_a;
1577 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1578
1579 s2 += t1;
1580
1581 /* Renormalize (s1, s2) to (t1, s2) */
1582 t1 = s1 + s2;
1583 s2 = s2 - (t1 - s1);
1584
1585 t2 += s2;
1586
1587 /* Renormalize (t1, t2) */
1588 head_t = t1 + t2;
1589 tail_t = t2 - (head_t - t1);
1590 }
1591 head_sum[1] = head_t;
1592 tail_sum[1] = tail_t;
1593 }
1594 }
1595 y_elem[0] = y_i[yi];
1596 y_elem[1] = y_i[yi + 1];
1597 {
1598 double cd[2];
1599 cd[0] = (double) alpha_i[0];
1600 cd[1] = (double) alpha_i[1];
1601 {
1602 /* Compute complex-extra = complex-extra * complex-double. */
1603 double head_a0, tail_a0;
1604 double head_a1, tail_a1;
1605 double head_t1, tail_t1;
1606 double head_t2, tail_t2;
1607 head_a0 = head_sum[0];
1608 tail_a0 = tail_sum[0];
1609 head_a1 = head_sum[1];
1610 tail_a1 = tail_sum[1];
1611 /* real part */
1612 {
1613 /* Compute double-double = double-double * double. */
1614 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1615
1616 con = head_a0 * split;
1617 a11 = con - head_a0;
1618 a11 = con - a11;
1619 a21 = head_a0 - a11;
1620 con = cd[0] * split;
1621 b1 = con - cd[0];
1622 b1 = con - b1;
1623 b2 = cd[0] - b1;
1624
1625 c11 = head_a0 * cd[0];
1626 c21 =
1627 (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1628
1629 c2 = tail_a0 * cd[0];
1630 t1 = c11 + c2;
1631 t2 = (c2 - (t1 - c11)) + c21;
1632
1633 head_t1 = t1 + t2;
1634 tail_t1 = t2 - (head_t1 - t1);
1635 }
1636 {
1637 /* Compute double-double = double-double * double. */
1638 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1639
1640 con = head_a1 * split;
1641 a11 = con - head_a1;
1642 a11 = con - a11;
1643 a21 = head_a1 - a11;
1644 con = cd[1] * split;
1645 b1 = con - cd[1];
1646 b1 = con - b1;
1647 b2 = cd[1] - b1;
1648
1649 c11 = head_a1 * cd[1];
1650 c21 =
1651 (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1652
1653 c2 = tail_a1 * cd[1];
1654 t1 = c11 + c2;
1655 t2 = (c2 - (t1 - c11)) + c21;
1656
1657 head_t2 = t1 + t2;
1658 tail_t2 = t2 - (head_t2 - t1);
1659 }
1660 head_t2 = -head_t2;
1661 tail_t2 = -tail_t2;
1662 {
1663 /* Compute double-double = double-double + double-double. */
1664 double bv;
1665 double s1, s2, t1, t2;
1666
1667 /* Add two hi words. */
1668 s1 = head_t1 + head_t2;
1669 bv = s1 - head_t1;
1670 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1671
1672 /* Add two lo words. */
1673 t1 = tail_t1 + tail_t2;
1674 bv = t1 - tail_t1;
1675 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1676
1677 s2 += t1;
1678
1679 /* Renormalize (s1, s2) to (t1, s2) */
1680 t1 = s1 + s2;
1681 s2 = s2 - (t1 - s1);
1682
1683 t2 += s2;
1684
1685 /* Renormalize (t1, t2) */
1686 head_t1 = t1 + t2;
1687 tail_t1 = t2 - (head_t1 - t1);
1688 }
1689 head_tmp1[0] = head_t1;
1690 tail_tmp1[0] = tail_t1;
1691 /* imaginary part */
1692 {
1693 /* Compute double-double = double-double * double. */
1694 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1695
1696 con = head_a1 * split;
1697 a11 = con - head_a1;
1698 a11 = con - a11;
1699 a21 = head_a1 - a11;
1700 con = cd[0] * split;
1701 b1 = con - cd[0];
1702 b1 = con - b1;
1703 b2 = cd[0] - b1;
1704
1705 c11 = head_a1 * cd[0];
1706 c21 =
1707 (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1708
1709 c2 = tail_a1 * cd[0];
1710 t1 = c11 + c2;
1711 t2 = (c2 - (t1 - c11)) + c21;
1712
1713 head_t1 = t1 + t2;
1714 tail_t1 = t2 - (head_t1 - t1);
1715 }
1716 {
1717 /* Compute double-double = double-double * double. */
1718 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1719
1720 con = head_a0 * split;
1721 a11 = con - head_a0;
1722 a11 = con - a11;
1723 a21 = head_a0 - a11;
1724 con = cd[1] * split;
1725 b1 = con - cd[1];
1726 b1 = con - b1;
1727 b2 = cd[1] - b1;
1728
1729 c11 = head_a0 * cd[1];
1730 c21 =
1731 (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1732
1733 c2 = tail_a0 * cd[1];
1734 t1 = c11 + c2;
1735 t2 = (c2 - (t1 - c11)) + c21;
1736
1737 head_t2 = t1 + t2;
1738 tail_t2 = t2 - (head_t2 - t1);
1739 }
1740 {
1741 /* Compute double-double = double-double + double-double. */
1742 double bv;
1743 double s1, s2, t1, t2;
1744
1745 /* Add two hi words. */
1746 s1 = head_t1 + head_t2;
1747 bv = s1 - head_t1;
1748 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
1749
1750 /* Add two lo words. */
1751 t1 = tail_t1 + tail_t2;
1752 bv = t1 - tail_t1;
1753 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
1754
1755 s2 += t1;
1756
1757 /* Renormalize (s1, s2) to (t1, s2) */
1758 t1 = s1 + s2;
1759 s2 = s2 - (t1 - s1);
1760
1761 t2 += s2;
1762
1763 /* Renormalize (t1, t2) */
1764 head_t1 = t1 + t2;
1765 tail_t1 = t2 - (head_t1 - t1);
1766 }
1767 head_tmp1[1] = head_t1;
1768 tail_tmp1[1] = tail_t1;
1769 }
1770
1771 }
1772 y_i[yi] = head_tmp1[0];
1773 y_i[yi + 1] = head_tmp1[1];
1774 if (i + 1 >= (n_i - k))
1775 maxj_second--;
1776 if (i >= k) {
1777 astarti += (incaij + incaij2);
1778 x_starti += incx;
1779 } else {
1780 maxj_first++;
1781 astarti += incaij2;
1782 }
1783 }
1784 } else {
1785 /* The most general form, y <--- alpha * A * x + beta * y */
1786 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
1787 head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;
1788
1789 for (j = 0, aij = astarti, xi = x_starti;
1790 j < maxj_first; j++, aij += incaij, xi += incx) {
1791 a_elem[0] = a_i[aij];
1792 a_elem[1] = a_i[aij + 1];
1793 x_elem = x_i[xi];
1794 {
1795 head_prod[0] = (double) a_elem[0] * x_elem;
1796 tail_prod[0] = 0.0;
1797 head_prod[1] = (double) a_elem[1] * x_elem;
1798 tail_prod[1] = 0.0;
1799 }
1800 {
1801 double head_t, tail_t;
1802 double head_a, tail_a;
1803 double head_b, tail_b;
1804 /* Real part */
1805 head_a = head_sum[0];
1806 tail_a = tail_sum[0];
1807 head_b = head_prod[0];
1808 tail_b = tail_prod[0];
1809 {
1810 /* Compute double-double = double-double + double-double. */
1811 double bv;
1812 double s1, s2, t1, t2;
1813
1814 /* Add two hi words. */
1815 s1 = head_a + head_b;
1816 bv = s1 - head_a;
1817 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1818
1819 /* Add two lo words. */
1820 t1 = tail_a + tail_b;
1821 bv = t1 - tail_a;
1822 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1823
1824 s2 += t1;
1825
1826 /* Renormalize (s1, s2) to (t1, s2) */
1827 t1 = s1 + s2;
1828 s2 = s2 - (t1 - s1);
1829
1830 t2 += s2;
1831
1832 /* Renormalize (t1, t2) */
1833 head_t = t1 + t2;
1834 tail_t = t2 - (head_t - t1);
1835 }
1836 head_sum[0] = head_t;
1837 tail_sum[0] = tail_t;
1838 /* Imaginary part */
1839 head_a = head_sum[1];
1840 tail_a = tail_sum[1];
1841 head_b = head_prod[1];
1842 tail_b = tail_prod[1];
1843 {
1844 /* Compute double-double = double-double + double-double. */
1845 double bv;
1846 double s1, s2, t1, t2;
1847
1848 /* Add two hi words. */
1849 s1 = head_a + head_b;
1850 bv = s1 - head_a;
1851 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1852
1853 /* Add two lo words. */
1854 t1 = tail_a + tail_b;
1855 bv = t1 - tail_a;
1856 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1857
1858 s2 += t1;
1859
1860 /* Renormalize (s1, s2) to (t1, s2) */
1861 t1 = s1 + s2;
1862 s2 = s2 - (t1 - s1);
1863
1864 t2 += s2;
1865
1866 /* Renormalize (t1, t2) */
1867 head_t = t1 + t2;
1868 tail_t = t2 - (head_t - t1);
1869 }
1870 head_sum[1] = head_t;
1871 tail_sum[1] = tail_t;
1872 }
1873 }
1874 for (j = 0; j < maxj_second; j++, aij += incaij2, xi += incx) {
1875 a_elem[0] = a_i[aij];
1876 a_elem[1] = a_i[aij + 1];
1877 x_elem = x_i[xi];
1878 {
1879 head_prod[0] = (double) a_elem[0] * x_elem;
1880 tail_prod[0] = 0.0;
1881 head_prod[1] = (double) a_elem[1] * x_elem;
1882 tail_prod[1] = 0.0;
1883 }
1884 {
1885 double head_t, tail_t;
1886 double head_a, tail_a;
1887 double head_b, tail_b;
1888 /* Real part */
1889 head_a = head_sum[0];
1890 tail_a = tail_sum[0];
1891 head_b = head_prod[0];
1892 tail_b = tail_prod[0];
1893 {
1894 /* Compute double-double = double-double + double-double. */
1895 double bv;
1896 double s1, s2, t1, t2;
1897
1898 /* Add two hi words. */
1899 s1 = head_a + head_b;
1900 bv = s1 - head_a;
1901 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1902
1903 /* Add two lo words. */
1904 t1 = tail_a + tail_b;
1905 bv = t1 - tail_a;
1906 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1907
1908 s2 += t1;
1909
1910 /* Renormalize (s1, s2) to (t1, s2) */
1911 t1 = s1 + s2;
1912 s2 = s2 - (t1 - s1);
1913
1914 t2 += s2;
1915
1916 /* Renormalize (t1, t2) */
1917 head_t = t1 + t2;
1918 tail_t = t2 - (head_t - t1);
1919 }
1920 head_sum[0] = head_t;
1921 tail_sum[0] = tail_t;
1922 /* Imaginary part */
1923 head_a = head_sum[1];
1924 tail_a = tail_sum[1];
1925 head_b = head_prod[1];
1926 tail_b = tail_prod[1];
1927 {
1928 /* Compute double-double = double-double + double-double. */
1929 double bv;
1930 double s1, s2, t1, t2;
1931
1932 /* Add two hi words. */
1933 s1 = head_a + head_b;
1934 bv = s1 - head_a;
1935 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
1936
1937 /* Add two lo words. */
1938 t1 = tail_a + tail_b;
1939 bv = t1 - tail_a;
1940 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
1941
1942 s2 += t1;
1943
1944 /* Renormalize (s1, s2) to (t1, s2) */
1945 t1 = s1 + s2;
1946 s2 = s2 - (t1 - s1);
1947
1948 t2 += s2;
1949
1950 /* Renormalize (t1, t2) */
1951 head_t = t1 + t2;
1952 tail_t = t2 - (head_t - t1);
1953 }
1954 head_sum[1] = head_t;
1955 tail_sum[1] = tail_t;
1956 }
1957 }
1958 y_elem[0] = y_i[yi];
1959 y_elem[1] = y_i[yi + 1];
1960 {
1961 double head_e1, tail_e1;
1962 double d1;
1963 double d2;
1964 /* Real part */
1965 d1 = (double) y_elem[0] * beta_i[0];
1966 d2 = (double) -y_elem[1] * beta_i[1];
1967 {
1968 /* Compute double-double = double + double. */
1969 double e, t1, t2;
1970
1971 /* Knuth trick. */
1972 t1 = d1 + d2;
1973 e = t1 - d1;
1974 t2 = ((d2 - e) + (d1 - (t1 - e)));
1975
1976 /* The result is t1 + t2, after normalization. */
1977 head_e1 = t1 + t2;
1978 tail_e1 = t2 - (head_e1 - t1);
1979 }
1980 head_tmp2[0] = head_e1;
1981 tail_tmp2[0] = tail_e1;
1982 /* imaginary part */
1983 d1 = (double) y_elem[0] * beta_i[1];
1984 d2 = (double) y_elem[1] * beta_i[0];
1985 {
1986 /* Compute double-double = double + double. */
1987 double e, t1, t2;
1988
1989 /* Knuth trick. */
1990 t1 = d1 + d2;
1991 e = t1 - d1;
1992 t2 = ((d2 - e) + (d1 - (t1 - e)));
1993
1994 /* The result is t1 + t2, after normalization. */
1995 head_e1 = t1 + t2;
1996 tail_e1 = t2 - (head_e1 - t1);
1997 }
1998 head_tmp2[1] = head_e1;
1999 tail_tmp2[1] = tail_e1;
2000 }
2001 {
2002 double cd[2];
2003 cd[0] = (double) alpha_i[0];
2004 cd[1] = (double) alpha_i[1];
2005 {
2006 /* Compute complex-extra = complex-extra * complex-double. */
2007 double head_a0, tail_a0;
2008 double head_a1, tail_a1;
2009 double head_t1, tail_t1;
2010 double head_t2, tail_t2;
2011 head_a0 = head_sum[0];
2012 tail_a0 = tail_sum[0];
2013 head_a1 = head_sum[1];
2014 tail_a1 = tail_sum[1];
2015 /* real part */
2016 {
2017 /* Compute double-double = double-double * double. */
2018 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2019
2020 con = head_a0 * split;
2021 a11 = con - head_a0;
2022 a11 = con - a11;
2023 a21 = head_a0 - a11;
2024 con = cd[0] * split;
2025 b1 = con - cd[0];
2026 b1 = con - b1;
2027 b2 = cd[0] - b1;
2028
2029 c11 = head_a0 * cd[0];
2030 c21 =
2031 (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2032
2033 c2 = tail_a0 * cd[0];
2034 t1 = c11 + c2;
2035 t2 = (c2 - (t1 - c11)) + c21;
2036
2037 head_t1 = t1 + t2;
2038 tail_t1 = t2 - (head_t1 - t1);
2039 }
2040 {
2041 /* Compute double-double = double-double * double. */
2042 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2043
2044 con = head_a1 * split;
2045 a11 = con - head_a1;
2046 a11 = con - a11;
2047 a21 = head_a1 - a11;
2048 con = cd[1] * split;
2049 b1 = con - cd[1];
2050 b1 = con - b1;
2051 b2 = cd[1] - b1;
2052
2053 c11 = head_a1 * cd[1];
2054 c21 =
2055 (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2056
2057 c2 = tail_a1 * cd[1];
2058 t1 = c11 + c2;
2059 t2 = (c2 - (t1 - c11)) + c21;
2060
2061 head_t2 = t1 + t2;
2062 tail_t2 = t2 - (head_t2 - t1);
2063 }
2064 head_t2 = -head_t2;
2065 tail_t2 = -tail_t2;
2066 {
2067 /* Compute double-double = double-double + double-double. */
2068 double bv;
2069 double s1, s2, t1, t2;
2070
2071 /* Add two hi words. */
2072 s1 = head_t1 + head_t2;
2073 bv = s1 - head_t1;
2074 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
2075
2076 /* Add two lo words. */
2077 t1 = tail_t1 + tail_t2;
2078 bv = t1 - tail_t1;
2079 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
2080
2081 s2 += t1;
2082
2083 /* Renormalize (s1, s2) to (t1, s2) */
2084 t1 = s1 + s2;
2085 s2 = s2 - (t1 - s1);
2086
2087 t2 += s2;
2088
2089 /* Renormalize (t1, t2) */
2090 head_t1 = t1 + t2;
2091 tail_t1 = t2 - (head_t1 - t1);
2092 }
2093 head_tmp1[0] = head_t1;
2094 tail_tmp1[0] = tail_t1;
2095 /* imaginary part */
2096 {
2097 /* Compute double-double = double-double * double. */
2098 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2099
2100 con = head_a1 * split;
2101 a11 = con - head_a1;
2102 a11 = con - a11;
2103 a21 = head_a1 - a11;
2104 con = cd[0] * split;
2105 b1 = con - cd[0];
2106 b1 = con - b1;
2107 b2 = cd[0] - b1;
2108
2109 c11 = head_a1 * cd[0];
2110 c21 =
2111 (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2112
2113 c2 = tail_a1 * cd[0];
2114 t1 = c11 + c2;
2115 t2 = (c2 - (t1 - c11)) + c21;
2116
2117 head_t1 = t1 + t2;
2118 tail_t1 = t2 - (head_t1 - t1);
2119 }
2120 {
2121 /* Compute double-double = double-double * double. */
2122 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2123
2124 con = head_a0 * split;
2125 a11 = con - head_a0;
2126 a11 = con - a11;
2127 a21 = head_a0 - a11;
2128 con = cd[1] * split;
2129 b1 = con - cd[1];
2130 b1 = con - b1;
2131 b2 = cd[1] - b1;
2132
2133 c11 = head_a0 * cd[1];
2134 c21 =
2135 (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2136
2137 c2 = tail_a0 * cd[1];
2138 t1 = c11 + c2;
2139 t2 = (c2 - (t1 - c11)) + c21;
2140
2141 head_t2 = t1 + t2;
2142 tail_t2 = t2 - (head_t2 - t1);
2143 }
2144 {
2145 /* Compute double-double = double-double + double-double. */
2146 double bv;
2147 double s1, s2, t1, t2;
2148
2149 /* Add two hi words. */
2150 s1 = head_t1 + head_t2;
2151 bv = s1 - head_t1;
2152 s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
2153
2154 /* Add two lo words. */
2155 t1 = tail_t1 + tail_t2;
2156 bv = t1 - tail_t1;
2157 t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
2158
2159 s2 += t1;
2160
2161 /* Renormalize (s1, s2) to (t1, s2) */
2162 t1 = s1 + s2;
2163 s2 = s2 - (t1 - s1);
2164
2165 t2 += s2;
2166
2167 /* Renormalize (t1, t2) */
2168 head_t1 = t1 + t2;
2169 tail_t1 = t2 - (head_t1 - t1);
2170 }
2171 head_tmp1[1] = head_t1;
2172 tail_tmp1[1] = tail_t1;
2173 }
2174
2175 }
2176 {
2177 double head_t, tail_t;
2178 double head_a, tail_a;
2179 double head_b, tail_b;
2180 /* Real part */
2181 head_a = head_tmp2[0];
2182 tail_a = tail_tmp2[0];
2183 head_b = head_tmp1[0];
2184 tail_b = tail_tmp1[0];
2185 {
2186 /* Compute double-double = double-double + double-double. */
2187 double bv;
2188 double s1, s2, t1, t2;
2189
2190 /* Add two hi words. */
2191 s1 = head_a + head_b;
2192 bv = s1 - head_a;
2193 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
2194
2195 /* Add two lo words. */
2196 t1 = tail_a + tail_b;
2197 bv = t1 - tail_a;
2198 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
2199
2200 s2 += t1;
2201
2202 /* Renormalize (s1, s2) to (t1, s2) */
2203 t1 = s1 + s2;
2204 s2 = s2 - (t1 - s1);
2205
2206 t2 += s2;
2207
2208 /* Renormalize (t1, t2) */
2209 head_t = t1 + t2;
2210 tail_t = t2 - (head_t - t1);
2211 }
2212 head_tmp1[0] = head_t;
2213 tail_tmp1[0] = tail_t;
2214 /* Imaginary part */
2215 head_a = head_tmp2[1];
2216 tail_a = tail_tmp2[1];
2217 head_b = head_tmp1[1];
2218 tail_b = tail_tmp1[1];
2219 {
2220 /* Compute double-double = double-double + double-double. */
2221 double bv;
2222 double s1, s2, t1, t2;
2223
2224 /* Add two hi words. */
2225 s1 = head_a + head_b;
2226 bv = s1 - head_a;
2227 s2 = ((head_b - bv) + (head_a - (s1 - bv)));
2228
2229 /* Add two lo words. */
2230 t1 = tail_a + tail_b;
2231 bv = t1 - tail_a;
2232 t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
2233
2234 s2 += t1;
2235
2236 /* Renormalize (s1, s2) to (t1, s2) */
2237 t1 = s1 + s2;
2238 s2 = s2 - (t1 - s1);
2239
2240 t2 += s2;
2241
2242 /* Renormalize (t1, t2) */
2243 head_t = t1 + t2;
2244 tail_t = t2 - (head_t - t1);
2245 }
2246 head_tmp1[1] = head_t;
2247 tail_tmp1[1] = tail_t;
2248 }
2249 y_i[yi] = head_tmp1[0];
2250 y_i[yi + 1] = head_tmp1[1];
2251 if (i + 1 >= (n_i - k))
2252 maxj_second--;
2253 if (i >= k) {
2254 astarti += (incaij + incaij2);
2255 x_starti += incx;
2256 } else {
2257 maxj_first++;
2258 astarti += incaij2;
2259 }
2260 }
2261 }
2262 }
2263 }
2264 FPU_FIX_STOP;
2265
2266 break;
2267 }
2268
2269 default:
2270 BLAS_error(routine_name, -13, prec, 0);
2271 break;
2272 }
2273 } /* end BLAS_csbmv_c_s */
2274