1 #include <blas_extended.h>
2 #include <blas_extended_private.h>
3 #include <blas_fpu.h>
BLAS_dsymv2_d_s_x(enum blas_order_type order,enum blas_uplo_type uplo,int n,double alpha,const double * a,int lda,const float * x_head,const float * x_tail,int incx,double beta,double * y,int incy,enum blas_prec_type prec)4 void BLAS_dsymv2_d_s_x(enum blas_order_type order, enum blas_uplo_type uplo,
5 		       int n, double alpha, const double *a, int lda,
6 		       const float *x_head, const float *x_tail, int incx,
7 		       double beta, double *y, int incy,
8 		       enum blas_prec_type prec)
9 
10 /*
11  * Purpose
12  * =======
13  *
14  * This routines computes the matrix product:
15  *
16  *     y  <-  alpha * A * (x_head + x_tail) + beta * y
17  *
18  * where A is a symmetric matrix.
19  *
20  * Arguments
21  * =========
22  *
23  * order   (input) enum blas_order_type
24  *         Storage format of input symmetric matrix A.
25  *
26  * uplo    (input) enum blas_uplo_type
27  *         Determines which half of matrix A (upper or lower triangle)
28  *         is accessed.
29  *
30  * n       (input) int
31  *         Dimension of A and size of vectors x, y.
32  *
33  * alpha   (input) double
34  *
35  * a       (input) double*
36  *         Matrix A.
37  *
38  * lda     (input) int
39  *         Leading dimension of matrix A.
40  *
41  * x_head  (input) float*
42  *         Vector x_head
43  *
44  * x_tail  (input) float*
45  *         Vector x_tail
46  *
47  * incx    (input) int
48  *         Stride for vector x.
49  *
50  * beta    (input) double
51  *
52  * y       (input) float*
53  *         Vector y.
54  *
55  * incy    (input) int
56  *         Stride for vector y.
57  *
58  * prec   (input) enum blas_prec_type
59  *        Specifies the internal precision to be used.
60  *        = blas_prec_single: single precision.
61  *        = blas_prec_double: double precision.
62  *        = blas_prec_extra : anything at least 1.5 times as accurate
63  *                            than double, and wider than 80-bits.
64  *                            We use double-double in our implementation.
65  *
66  */
67 {
68   /* Routine name */
69   const char routine_name[] = "BLAS_dsymv2_d_s_x";
70   switch (prec) {
71 
72   case blas_prec_single:{
73 
74       int i, j;
75       int xi, yi, xi0, yi0;
76       int aij, ai;
77       int incai;
78       int incaij, incaij2;
79 
80       const double *a_i = a;
81       const float *x_head_i = x_head;
82       const float *x_tail_i = x_tail;
83       double *y_i = y;
84       double alpha_i = alpha;
85       double beta_i = beta;
86       double a_elem;
87       float x_elem;
88       double y_elem;
89       double prod1;
90       double prod2;
91       double sum1;
92       double sum2;
93       double tmp1;
94       double tmp2;
95       double tmp3;
96 
97 
98 
99       /* Test for no-op */
100       if (n <= 0) {
101 	return;
102       }
103       if (alpha_i == 0.0 && beta_i == 1.0) {
104 	return;
105       }
106 
107       /* Check for error conditions. */
108       if (n < 0) {
109 	BLAS_error(routine_name, -3, n, NULL);
110       }
111       if (lda < n) {
112 	BLAS_error(routine_name, -6, n, NULL);
113       }
114       if (incx == 0) {
115 	BLAS_error(routine_name, -9, incx, NULL);
116       }
117       if (incy == 0) {
118 	BLAS_error(routine_name, -12, incy, NULL);
119       }
120 
121       if ((order == blas_colmajor && uplo == blas_upper) ||
122 	  (order == blas_rowmajor && uplo == blas_lower)) {
123 	incai = lda;
124 	incaij = 1;
125 	incaij2 = lda;
126       } else {
127 	incai = 1;
128 	incaij = lda;
129 	incaij2 = 1;
130       }
131 
132 
133 
134 
135 
136 
137       xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
138       yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
139 
140 
141 
142       /* The most general form,   y <--- alpha * A * (x_head + x_tail) + beta * y   */
143       for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
144 	sum1 = 0.0;
145 	sum2 = 0.0;
146 
147 	for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
148 	  a_elem = a_i[aij];
149 	  x_elem = x_head_i[xi];
150 	  prod1 = a_elem * x_elem;
151 	  sum1 = sum1 + prod1;
152 	  x_elem = x_tail_i[xi];
153 	  prod2 = a_elem * x_elem;
154 	  sum2 = sum2 + prod2;
155 	}
156 	for (; j < n; j++, aij += incaij2, xi += incx) {
157 	  a_elem = a_i[aij];
158 	  x_elem = x_head_i[xi];
159 	  prod1 = a_elem * x_elem;
160 	  sum1 = sum1 + prod1;
161 	  x_elem = x_tail_i[xi];
162 	  prod2 = a_elem * x_elem;
163 	  sum2 = sum2 + prod2;
164 	}
165 	sum1 = sum1 + sum2;
166 	tmp1 = sum1 * alpha_i;
167 	y_elem = y_i[yi];
168 	tmp2 = y_elem * beta_i;
169 	tmp3 = tmp1 + tmp2;
170 	y_i[yi] = tmp3;
171       }
172 
173 
174 
175       break;
176     }
177 
178   case blas_prec_double:
179   case blas_prec_indigenous:{
180 
181       int i, j;
182       int xi, yi, xi0, yi0;
183       int aij, ai;
184       int incai;
185       int incaij, incaij2;
186 
187       const double *a_i = a;
188       const float *x_head_i = x_head;
189       const float *x_tail_i = x_tail;
190       double *y_i = y;
191       double alpha_i = alpha;
192       double beta_i = beta;
193       double a_elem;
194       float x_elem;
195       double y_elem;
196       double prod1;
197       double prod2;
198       double sum1;
199       double sum2;
200       double tmp1;
201       double tmp2;
202       double tmp3;
203 
204 
205 
206       /* Test for no-op */
207       if (n <= 0) {
208 	return;
209       }
210       if (alpha_i == 0.0 && beta_i == 1.0) {
211 	return;
212       }
213 
214       /* Check for error conditions. */
215       if (n < 0) {
216 	BLAS_error(routine_name, -3, n, NULL);
217       }
218       if (lda < n) {
219 	BLAS_error(routine_name, -6, n, NULL);
220       }
221       if (incx == 0) {
222 	BLAS_error(routine_name, -9, incx, NULL);
223       }
224       if (incy == 0) {
225 	BLAS_error(routine_name, -12, incy, NULL);
226       }
227 
228       if ((order == blas_colmajor && uplo == blas_upper) ||
229 	  (order == blas_rowmajor && uplo == blas_lower)) {
230 	incai = lda;
231 	incaij = 1;
232 	incaij2 = lda;
233       } else {
234 	incai = 1;
235 	incaij = lda;
236 	incaij2 = 1;
237       }
238 
239 
240 
241 
242 
243 
244       xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
245       yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
246 
247 
248 
249       /* The most general form,   y <--- alpha * A * (x_head + x_tail) + beta * y   */
250       for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
251 	sum1 = 0.0;
252 	sum2 = 0.0;
253 
254 	for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
255 	  a_elem = a_i[aij];
256 	  x_elem = x_head_i[xi];
257 	  prod1 = a_elem * x_elem;
258 	  sum1 = sum1 + prod1;
259 	  x_elem = x_tail_i[xi];
260 	  prod2 = a_elem * x_elem;
261 	  sum2 = sum2 + prod2;
262 	}
263 	for (; j < n; j++, aij += incaij2, xi += incx) {
264 	  a_elem = a_i[aij];
265 	  x_elem = x_head_i[xi];
266 	  prod1 = a_elem * x_elem;
267 	  sum1 = sum1 + prod1;
268 	  x_elem = x_tail_i[xi];
269 	  prod2 = a_elem * x_elem;
270 	  sum2 = sum2 + prod2;
271 	}
272 	sum1 = sum1 + sum2;
273 	tmp1 = sum1 * alpha_i;
274 	y_elem = y_i[yi];
275 	tmp2 = y_elem * beta_i;
276 	tmp3 = tmp1 + tmp2;
277 	y_i[yi] = tmp3;
278       }
279 
280 
281 
282       break;
283     }
284 
285   case blas_prec_extra:{
286 
287       int i, j;
288       int xi, yi, xi0, yi0;
289       int aij, ai;
290       int incai;
291       int incaij, incaij2;
292 
293       const double *a_i = a;
294       const float *x_head_i = x_head;
295       const float *x_tail_i = x_tail;
296       double *y_i = y;
297       double alpha_i = alpha;
298       double beta_i = beta;
299       double a_elem;
300       float x_elem;
301       double y_elem;
302       double head_prod1, tail_prod1;
303       double head_prod2, tail_prod2;
304       double head_sum1, tail_sum1;
305       double head_sum2, tail_sum2;
306       double head_tmp1, tail_tmp1;
307       double head_tmp2, tail_tmp2;
308       double head_tmp3, tail_tmp3;
309 
310       FPU_FIX_DECL;
311 
312       /* Test for no-op */
313       if (n <= 0) {
314 	return;
315       }
316       if (alpha_i == 0.0 && beta_i == 1.0) {
317 	return;
318       }
319 
320       /* Check for error conditions. */
321       if (n < 0) {
322 	BLAS_error(routine_name, -3, n, NULL);
323       }
324       if (lda < n) {
325 	BLAS_error(routine_name, -6, n, NULL);
326       }
327       if (incx == 0) {
328 	BLAS_error(routine_name, -9, incx, NULL);
329       }
330       if (incy == 0) {
331 	BLAS_error(routine_name, -12, incy, NULL);
332       }
333 
334       if ((order == blas_colmajor && uplo == blas_upper) ||
335 	  (order == blas_rowmajor && uplo == blas_lower)) {
336 	incai = lda;
337 	incaij = 1;
338 	incaij2 = lda;
339       } else {
340 	incai = 1;
341 	incaij = lda;
342 	incaij2 = 1;
343       }
344 
345 
346 
347 
348 
349 
350       xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
351       yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
352 
353       FPU_FIX_START;
354 
355       /* The most general form,   y <--- alpha * A * (x_head + x_tail) + beta * y   */
356       for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
357 	head_sum1 = tail_sum1 = 0.0;
358 	head_sum2 = tail_sum2 = 0.0;
359 
360 	for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
361 	  a_elem = a_i[aij];
362 	  x_elem = x_head_i[xi];
363 	  {
364 	    double dt = (double) x_elem;
365 	    {
366 	      /* Compute double_double = double * double. */
367 	      double a1, a2, b1, b2, con;
368 
369 	      con = a_elem * split;
370 	      a1 = con - a_elem;
371 	      a1 = con - a1;
372 	      a2 = a_elem - a1;
373 	      con = dt * split;
374 	      b1 = con - dt;
375 	      b1 = con - b1;
376 	      b2 = dt - b1;
377 
378 	      head_prod1 = a_elem * dt;
379 	      tail_prod1 =
380 		(((a1 * b1 - head_prod1) + a1 * b2) + a2 * b1) + a2 * b2;
381 	    }
382 	  }
383 	  {
384 	    /* Compute double-double = double-double + double-double. */
385 	    double bv;
386 	    double s1, s2, t1, t2;
387 
388 	    /* Add two hi words. */
389 	    s1 = head_sum1 + head_prod1;
390 	    bv = s1 - head_sum1;
391 	    s2 = ((head_prod1 - bv) + (head_sum1 - (s1 - bv)));
392 
393 	    /* Add two lo words. */
394 	    t1 = tail_sum1 + tail_prod1;
395 	    bv = t1 - tail_sum1;
396 	    t2 = ((tail_prod1 - bv) + (tail_sum1 - (t1 - bv)));
397 
398 	    s2 += t1;
399 
400 	    /* Renormalize (s1, s2)  to  (t1, s2) */
401 	    t1 = s1 + s2;
402 	    s2 = s2 - (t1 - s1);
403 
404 	    t2 += s2;
405 
406 	    /* Renormalize (t1, t2)  */
407 	    head_sum1 = t1 + t2;
408 	    tail_sum1 = t2 - (head_sum1 - t1);
409 	  }
410 	  x_elem = x_tail_i[xi];
411 	  {
412 	    double dt = (double) x_elem;
413 	    {
414 	      /* Compute double_double = double * double. */
415 	      double a1, a2, b1, b2, con;
416 
417 	      con = a_elem * split;
418 	      a1 = con - a_elem;
419 	      a1 = con - a1;
420 	      a2 = a_elem - a1;
421 	      con = dt * split;
422 	      b1 = con - dt;
423 	      b1 = con - b1;
424 	      b2 = dt - b1;
425 
426 	      head_prod2 = a_elem * dt;
427 	      tail_prod2 =
428 		(((a1 * b1 - head_prod2) + a1 * b2) + a2 * b1) + a2 * b2;
429 	    }
430 	  }
431 	  {
432 	    /* Compute double-double = double-double + double-double. */
433 	    double bv;
434 	    double s1, s2, t1, t2;
435 
436 	    /* Add two hi words. */
437 	    s1 = head_sum2 + head_prod2;
438 	    bv = s1 - head_sum2;
439 	    s2 = ((head_prod2 - bv) + (head_sum2 - (s1 - bv)));
440 
441 	    /* Add two lo words. */
442 	    t1 = tail_sum2 + tail_prod2;
443 	    bv = t1 - tail_sum2;
444 	    t2 = ((tail_prod2 - bv) + (tail_sum2 - (t1 - bv)));
445 
446 	    s2 += t1;
447 
448 	    /* Renormalize (s1, s2)  to  (t1, s2) */
449 	    t1 = s1 + s2;
450 	    s2 = s2 - (t1 - s1);
451 
452 	    t2 += s2;
453 
454 	    /* Renormalize (t1, t2)  */
455 	    head_sum2 = t1 + t2;
456 	    tail_sum2 = t2 - (head_sum2 - t1);
457 	  }
458 	}
459 	for (; j < n; j++, aij += incaij2, xi += incx) {
460 	  a_elem = a_i[aij];
461 	  x_elem = x_head_i[xi];
462 	  {
463 	    double dt = (double) x_elem;
464 	    {
465 	      /* Compute double_double = double * double. */
466 	      double a1, a2, b1, b2, con;
467 
468 	      con = a_elem * split;
469 	      a1 = con - a_elem;
470 	      a1 = con - a1;
471 	      a2 = a_elem - a1;
472 	      con = dt * split;
473 	      b1 = con - dt;
474 	      b1 = con - b1;
475 	      b2 = dt - b1;
476 
477 	      head_prod1 = a_elem * dt;
478 	      tail_prod1 =
479 		(((a1 * b1 - head_prod1) + a1 * b2) + a2 * b1) + a2 * b2;
480 	    }
481 	  }
482 	  {
483 	    /* Compute double-double = double-double + double-double. */
484 	    double bv;
485 	    double s1, s2, t1, t2;
486 
487 	    /* Add two hi words. */
488 	    s1 = head_sum1 + head_prod1;
489 	    bv = s1 - head_sum1;
490 	    s2 = ((head_prod1 - bv) + (head_sum1 - (s1 - bv)));
491 
492 	    /* Add two lo words. */
493 	    t1 = tail_sum1 + tail_prod1;
494 	    bv = t1 - tail_sum1;
495 	    t2 = ((tail_prod1 - bv) + (tail_sum1 - (t1 - bv)));
496 
497 	    s2 += t1;
498 
499 	    /* Renormalize (s1, s2)  to  (t1, s2) */
500 	    t1 = s1 + s2;
501 	    s2 = s2 - (t1 - s1);
502 
503 	    t2 += s2;
504 
505 	    /* Renormalize (t1, t2)  */
506 	    head_sum1 = t1 + t2;
507 	    tail_sum1 = t2 - (head_sum1 - t1);
508 	  }
509 	  x_elem = x_tail_i[xi];
510 	  {
511 	    double dt = (double) x_elem;
512 	    {
513 	      /* Compute double_double = double * double. */
514 	      double a1, a2, b1, b2, con;
515 
516 	      con = a_elem * split;
517 	      a1 = con - a_elem;
518 	      a1 = con - a1;
519 	      a2 = a_elem - a1;
520 	      con = dt * split;
521 	      b1 = con - dt;
522 	      b1 = con - b1;
523 	      b2 = dt - b1;
524 
525 	      head_prod2 = a_elem * dt;
526 	      tail_prod2 =
527 		(((a1 * b1 - head_prod2) + a1 * b2) + a2 * b1) + a2 * b2;
528 	    }
529 	  }
530 	  {
531 	    /* Compute double-double = double-double + double-double. */
532 	    double bv;
533 	    double s1, s2, t1, t2;
534 
535 	    /* Add two hi words. */
536 	    s1 = head_sum2 + head_prod2;
537 	    bv = s1 - head_sum2;
538 	    s2 = ((head_prod2 - bv) + (head_sum2 - (s1 - bv)));
539 
540 	    /* Add two lo words. */
541 	    t1 = tail_sum2 + tail_prod2;
542 	    bv = t1 - tail_sum2;
543 	    t2 = ((tail_prod2 - bv) + (tail_sum2 - (t1 - bv)));
544 
545 	    s2 += t1;
546 
547 	    /* Renormalize (s1, s2)  to  (t1, s2) */
548 	    t1 = s1 + s2;
549 	    s2 = s2 - (t1 - s1);
550 
551 	    t2 += s2;
552 
553 	    /* Renormalize (t1, t2)  */
554 	    head_sum2 = t1 + t2;
555 	    tail_sum2 = t2 - (head_sum2 - t1);
556 	  }
557 	}
558 	{
559 	  /* Compute double-double = double-double + double-double. */
560 	  double bv;
561 	  double s1, s2, t1, t2;
562 
563 	  /* Add two hi words. */
564 	  s1 = head_sum1 + head_sum2;
565 	  bv = s1 - head_sum1;
566 	  s2 = ((head_sum2 - bv) + (head_sum1 - (s1 - bv)));
567 
568 	  /* Add two lo words. */
569 	  t1 = tail_sum1 + tail_sum2;
570 	  bv = t1 - tail_sum1;
571 	  t2 = ((tail_sum2 - bv) + (tail_sum1 - (t1 - bv)));
572 
573 	  s2 += t1;
574 
575 	  /* Renormalize (s1, s2)  to  (t1, s2) */
576 	  t1 = s1 + s2;
577 	  s2 = s2 - (t1 - s1);
578 
579 	  t2 += s2;
580 
581 	  /* Renormalize (t1, t2)  */
582 	  head_sum1 = t1 + t2;
583 	  tail_sum1 = t2 - (head_sum1 - t1);
584 	}
585 	{
586 	  /* Compute double-double = double-double * double. */
587 	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
588 
589 	  con = head_sum1 * split;
590 	  a11 = con - head_sum1;
591 	  a11 = con - a11;
592 	  a21 = head_sum1 - a11;
593 	  con = alpha_i * split;
594 	  b1 = con - alpha_i;
595 	  b1 = con - b1;
596 	  b2 = alpha_i - b1;
597 
598 	  c11 = head_sum1 * alpha_i;
599 	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
600 
601 	  c2 = tail_sum1 * alpha_i;
602 	  t1 = c11 + c2;
603 	  t2 = (c2 - (t1 - c11)) + c21;
604 
605 	  head_tmp1 = t1 + t2;
606 	  tail_tmp1 = t2 - (head_tmp1 - t1);
607 	}
608 	y_elem = y_i[yi];
609 	{
610 	  /* Compute double_double = double * double. */
611 	  double a1, a2, b1, b2, con;
612 
613 	  con = y_elem * split;
614 	  a1 = con - y_elem;
615 	  a1 = con - a1;
616 	  a2 = y_elem - a1;
617 	  con = beta_i * split;
618 	  b1 = con - beta_i;
619 	  b1 = con - b1;
620 	  b2 = beta_i - b1;
621 
622 	  head_tmp2 = y_elem * beta_i;
623 	  tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
624 	}
625 	{
626 	  /* Compute double-double = double-double + double-double. */
627 	  double bv;
628 	  double s1, s2, t1, t2;
629 
630 	  /* Add two hi words. */
631 	  s1 = head_tmp1 + head_tmp2;
632 	  bv = s1 - head_tmp1;
633 	  s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
634 
635 	  /* Add two lo words. */
636 	  t1 = tail_tmp1 + tail_tmp2;
637 	  bv = t1 - tail_tmp1;
638 	  t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
639 
640 	  s2 += t1;
641 
642 	  /* Renormalize (s1, s2)  to  (t1, s2) */
643 	  t1 = s1 + s2;
644 	  s2 = s2 - (t1 - s1);
645 
646 	  t2 += s2;
647 
648 	  /* Renormalize (t1, t2)  */
649 	  head_tmp3 = t1 + t2;
650 	  tail_tmp3 = t2 - (head_tmp3 - t1);
651 	}
652 	y_i[yi] = head_tmp3;
653       }
654 
655       FPU_FIX_STOP;
656 
657       break;
658     }
659   }
660 }				/* end BLAS_dsymv2_d_s_x */
661