1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
3
BLAS_dtpmv_x(enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,double alpha,const double * tp,double * x,int incx,enum blas_prec_type prec)4 void BLAS_dtpmv_x(enum blas_order_type order, enum blas_uplo_type uplo,
5 enum blas_trans_type trans, enum blas_diag_type diag,
6 int n, double alpha, const double *tp,
7 double *x, int incx, enum blas_prec_type prec)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * Computes x = alpha * tp * x, x = alpha * tp_transpose * x,
14 * or x = alpha * tp_conjugate_transpose where tp is a triangular
15 * packed matrix.
16 *
17 * Arguments
18 * =========
19 *
20 * order (input) blas_order_type
21 * Order of tp; row or column major
22 *
23 * uplo (input) blas_uplo_type
24 * Whether tp is upper or lower
25 *
26 * trans (input) blas_trans_type
27 *
28 * diag (input) blas_diag_type
29 * Whether the diagonal entries of tp are 1
30 *
31 * n (input) int
32 * Dimension of tp and the length of vector x
33 *
34 * alpha (input) double
35 *
36 * tp (input) double*
37 *
38 * x (input) double*
39 *
40 * incx (input) int
41 * The stride for vector x.
42 *
43 * prec (input) enum blas_prec_type
44 * Specifies the internal precision to be used.
45 * = blas_prec_single: single precision.
46 * = blas_prec_double: double precision.
47 * = blas_prec_extra : anything at least 1.5 times as accurate
48 * than double, and wider than 80-bits.
49 * We use double-double in our implementation.
50 *
51 */
52 {
53 static const char routine_name[] = "BLAS_dtpmv_x";
54
55
56 switch (prec) {
57 case blas_prec_single:
58 case blas_prec_double:
59 case blas_prec_indigenous:{
60
61 {
62 int matrix_row, step, tp_index, tp_start, x_index, x_start;
63 int inctp, x_index2, stride, col_index, inctp2;
64
65 double alpha_i = alpha;
66
67 const double *tp_i = tp;
68 double *x_i = x;
69 double rowsum;
70 double rowtmp;
71 double result;
72 double matval;
73 double vecval;
74 double one;
75
76
77 one = 1.0;
78
79 inctp = 1;
80
81
82
83 if (incx < 0)
84 x_start = (-n + 1) * incx;
85 else
86 x_start = 0;
87
88 if (n < 1) {
89 return;
90 }
91
92 /* Check for error conditions. */
93 if (order != blas_colmajor && order != blas_rowmajor) {
94 BLAS_error(routine_name, -1, order, NULL);
95 }
96 if (uplo != blas_upper && uplo != blas_lower) {
97 BLAS_error(routine_name, -2, uplo, NULL);
98 }
99 if (incx == 0) {
100 BLAS_error(routine_name, -9, incx, NULL);
101 }
102
103
104
105 {
106 if ((uplo == blas_upper &&
107 trans == blas_no_trans && order == blas_rowmajor) ||
108 (uplo == blas_lower &&
109 trans != blas_no_trans && order == blas_colmajor)) {
110 tp_start = 0;
111 tp_index = tp_start;
112 for (matrix_row = 0; matrix_row < n; matrix_row++) {
113 x_index = x_start + incx * matrix_row;
114 x_index2 = x_index;
115 col_index = matrix_row;
116 rowsum = 0.0;
117 rowtmp = 0.0;
118 result = 0.0;
119 while (col_index < n) {
120 vecval = x_i[x_index];
121 if ((diag == blas_unit_diag) && (col_index == matrix_row)) {
122 rowtmp = vecval * one;
123 } else {
124 matval = tp_i[tp_index];
125 rowtmp = matval * vecval;
126 }
127 rowsum = rowsum + rowtmp;
128 x_index += incx;
129 tp_index += inctp;
130 col_index++;
131 }
132 result = rowsum * alpha_i;
133 x_i[x_index2] = result;
134 }
135 } else if ((uplo == blas_upper &&
136 trans == blas_no_trans && order == blas_colmajor) ||
137 (uplo == blas_lower &&
138 trans != blas_no_trans && order == blas_rowmajor)) {
139 tp_start = ((n - 1) * n) / 2;
140 inctp2 = n - 1;
141 x_index2 = x_start;
142 for (matrix_row = 0; matrix_row < n; matrix_row++, inctp2 = n - 1) {
143 x_index = x_start + incx * (n - 1);
144 tp_index = (tp_start + matrix_row) * inctp;
145 col_index = (n - 1) - matrix_row;
146 rowsum = 0.0;
147 rowtmp = 0.0;
148 result = 0.0;
149 while (col_index >= 0) {
150 vecval = x_i[x_index];
151 if ((diag == blas_unit_diag) && (col_index == 0)) {
152 rowtmp = vecval * one;
153 } else {
154 matval = tp_i[tp_index];
155 rowtmp = matval * vecval;
156 }
157 rowsum = rowsum + rowtmp;
158 x_index -= incx;
159 tp_index -= inctp2 * inctp;
160 inctp2--;
161 col_index--;
162 }
163 result = rowsum * alpha_i;
164 x_i[x_index2] = result;
165 x_index2 += incx;
166 }
167 } else if ((uplo == blas_lower &&
168 trans == blas_no_trans && order == blas_rowmajor) ||
169 (uplo == blas_upper &&
170 trans != blas_no_trans && order == blas_colmajor)) {
171 tp_start = (n - 1) + ((n - 1) * n) / 2;
172 tp_index = tp_start * inctp;
173 x_index = x_start + (n - 1) * incx;
174
175 for (matrix_row = n - 1; matrix_row >= 0; matrix_row--) {
176 x_index2 = x_index;
177 rowsum = 0.0;
178 rowtmp = 0.0;
179 result = 0.0;
180 for (step = 0; step <= matrix_row; step++) {
181 vecval = x_i[x_index2];
182 if ((diag == blas_unit_diag) && (step == 0)) {
183 rowtmp = vecval * one;
184 } else {
185 matval = tp_i[tp_index];
186 rowtmp = matval * vecval;
187 }
188 rowsum = rowsum + rowtmp;
189 x_index2 -= incx;
190 tp_index -= inctp;
191 }
192 result = rowsum * alpha_i;
193 x_i[x_index] = result;
194 x_index -= incx;
195 }
196 } else {
197 tp_start = 0;
198 x_index = x_start + (n - 1) * incx;
199 for (matrix_row = n - 1; matrix_row >= 0; matrix_row--) {
200 tp_index = matrix_row * inctp;
201 x_index2 = x_start;
202 rowsum = 0.0;
203 rowtmp = 0.0;
204 result = 0.0;
205 stride = n;
206 for (step = 0; step <= matrix_row; step++) {
207 vecval = x_i[x_index2];
208 if ((diag == blas_unit_diag) && (step == matrix_row)) {
209 rowtmp = vecval * one;
210 } else {
211 matval = tp_i[tp_index];
212 rowtmp = matval * vecval;
213 }
214 rowsum = rowsum + rowtmp;
215 stride--;
216 tp_index += stride * inctp;
217 x_index2 += incx;
218 }
219 result = rowsum * alpha_i;
220 x_i[x_index] = result;
221 x_index -= incx;
222 }
223 }
224 }
225
226
227 }
228 break;
229 }
230
231 case blas_prec_extra:{
232
233 {
234 int matrix_row, step, tp_index, tp_start, x_index, x_start;
235 int inctp, x_index2, stride, col_index, inctp2;
236
237 double alpha_i = alpha;
238
239 const double *tp_i = tp;
240 double *x_i = x;
241 double head_rowsum, tail_rowsum;
242 double head_rowtmp, tail_rowtmp;
243 double head_result, tail_result;
244 double matval;
245 double vecval;
246 double one;
247
248 FPU_FIX_DECL;
249 one = 1.0;
250
251 inctp = 1;
252
253
254
255 if (incx < 0)
256 x_start = (-n + 1) * incx;
257 else
258 x_start = 0;
259
260 if (n < 1) {
261 return;
262 }
263
264 /* Check for error conditions. */
265 if (order != blas_colmajor && order != blas_rowmajor) {
266 BLAS_error(routine_name, -1, order, NULL);
267 }
268 if (uplo != blas_upper && uplo != blas_lower) {
269 BLAS_error(routine_name, -2, uplo, NULL);
270 }
271 if (incx == 0) {
272 BLAS_error(routine_name, -9, incx, NULL);
273 }
274 FPU_FIX_START;
275
276
277 {
278 if ((uplo == blas_upper &&
279 trans == blas_no_trans && order == blas_rowmajor) ||
280 (uplo == blas_lower &&
281 trans != blas_no_trans && order == blas_colmajor)) {
282 tp_start = 0;
283 tp_index = tp_start;
284 for (matrix_row = 0; matrix_row < n; matrix_row++) {
285 x_index = x_start + incx * matrix_row;
286 x_index2 = x_index;
287 col_index = matrix_row;
288 head_rowsum = tail_rowsum = 0.0;
289 head_rowtmp = tail_rowtmp = 0.0;
290 head_result = tail_result = 0.0;
291 while (col_index < n) {
292 vecval = x_i[x_index];
293 if ((diag == blas_unit_diag) && (col_index == matrix_row)) {
294 {
295 /* Compute double_double = double * double. */
296 double a1, a2, b1, b2, con;
297
298 con = vecval * split;
299 a1 = con - vecval;
300 a1 = con - a1;
301 a2 = vecval - a1;
302 con = one * split;
303 b1 = con - one;
304 b1 = con - b1;
305 b2 = one - b1;
306
307 head_rowtmp = vecval * one;
308 tail_rowtmp =
309 (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) +
310 a2 * b2;
311 }
312 } else {
313 matval = tp_i[tp_index];
314 {
315 /* Compute double_double = double * double. */
316 double a1, a2, b1, b2, con;
317
318 con = matval * split;
319 a1 = con - matval;
320 a1 = con - a1;
321 a2 = matval - a1;
322 con = vecval * split;
323 b1 = con - vecval;
324 b1 = con - b1;
325 b2 = vecval - b1;
326
327 head_rowtmp = matval * vecval;
328 tail_rowtmp =
329 (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) +
330 a2 * b2;
331 }
332 }
333 {
334 /* Compute double-double = double-double + double-double. */
335 double bv;
336 double s1, s2, t1, t2;
337
338 /* Add two hi words. */
339 s1 = head_rowsum + head_rowtmp;
340 bv = s1 - head_rowsum;
341 s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));
342
343 /* Add two lo words. */
344 t1 = tail_rowsum + tail_rowtmp;
345 bv = t1 - tail_rowsum;
346 t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));
347
348 s2 += t1;
349
350 /* Renormalize (s1, s2) to (t1, s2) */
351 t1 = s1 + s2;
352 s2 = s2 - (t1 - s1);
353
354 t2 += s2;
355
356 /* Renormalize (t1, t2) */
357 head_rowsum = t1 + t2;
358 tail_rowsum = t2 - (head_rowsum - t1);
359 }
360 x_index += incx;
361 tp_index += inctp;
362 col_index++;
363 }
364 {
365 /* Compute double-double = double-double * double. */
366 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
367
368 con = head_rowsum * split;
369 a11 = con - head_rowsum;
370 a11 = con - a11;
371 a21 = head_rowsum - a11;
372 con = alpha_i * split;
373 b1 = con - alpha_i;
374 b1 = con - b1;
375 b2 = alpha_i - b1;
376
377 c11 = head_rowsum * alpha_i;
378 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
379
380 c2 = tail_rowsum * alpha_i;
381 t1 = c11 + c2;
382 t2 = (c2 - (t1 - c11)) + c21;
383
384 head_result = t1 + t2;
385 tail_result = t2 - (head_result - t1);
386 }
387 x_i[x_index2] = head_result;
388 }
389 } else if ((uplo == blas_upper &&
390 trans == blas_no_trans && order == blas_colmajor) ||
391 (uplo == blas_lower &&
392 trans != blas_no_trans && order == blas_rowmajor)) {
393 tp_start = ((n - 1) * n) / 2;
394 inctp2 = n - 1;
395 x_index2 = x_start;
396 for (matrix_row = 0; matrix_row < n; matrix_row++, inctp2 = n - 1) {
397 x_index = x_start + incx * (n - 1);
398 tp_index = (tp_start + matrix_row) * inctp;
399 col_index = (n - 1) - matrix_row;
400 head_rowsum = tail_rowsum = 0.0;
401 head_rowtmp = tail_rowtmp = 0.0;
402 head_result = tail_result = 0.0;
403 while (col_index >= 0) {
404 vecval = x_i[x_index];
405 if ((diag == blas_unit_diag) && (col_index == 0)) {
406 {
407 /* Compute double_double = double * double. */
408 double a1, a2, b1, b2, con;
409
410 con = vecval * split;
411 a1 = con - vecval;
412 a1 = con - a1;
413 a2 = vecval - a1;
414 con = one * split;
415 b1 = con - one;
416 b1 = con - b1;
417 b2 = one - b1;
418
419 head_rowtmp = vecval * one;
420 tail_rowtmp =
421 (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) +
422 a2 * b2;
423 }
424 } else {
425 matval = tp_i[tp_index];
426 {
427 /* Compute double_double = double * double. */
428 double a1, a2, b1, b2, con;
429
430 con = matval * split;
431 a1 = con - matval;
432 a1 = con - a1;
433 a2 = matval - a1;
434 con = vecval * split;
435 b1 = con - vecval;
436 b1 = con - b1;
437 b2 = vecval - b1;
438
439 head_rowtmp = matval * vecval;
440 tail_rowtmp =
441 (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) +
442 a2 * b2;
443 }
444 }
445 {
446 /* Compute double-double = double-double + double-double. */
447 double bv;
448 double s1, s2, t1, t2;
449
450 /* Add two hi words. */
451 s1 = head_rowsum + head_rowtmp;
452 bv = s1 - head_rowsum;
453 s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));
454
455 /* Add two lo words. */
456 t1 = tail_rowsum + tail_rowtmp;
457 bv = t1 - tail_rowsum;
458 t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));
459
460 s2 += t1;
461
462 /* Renormalize (s1, s2) to (t1, s2) */
463 t1 = s1 + s2;
464 s2 = s2 - (t1 - s1);
465
466 t2 += s2;
467
468 /* Renormalize (t1, t2) */
469 head_rowsum = t1 + t2;
470 tail_rowsum = t2 - (head_rowsum - t1);
471 }
472 x_index -= incx;
473 tp_index -= inctp2 * inctp;
474 inctp2--;
475 col_index--;
476 }
477 {
478 /* Compute double-double = double-double * double. */
479 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
480
481 con = head_rowsum * split;
482 a11 = con - head_rowsum;
483 a11 = con - a11;
484 a21 = head_rowsum - a11;
485 con = alpha_i * split;
486 b1 = con - alpha_i;
487 b1 = con - b1;
488 b2 = alpha_i - b1;
489
490 c11 = head_rowsum * alpha_i;
491 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
492
493 c2 = tail_rowsum * alpha_i;
494 t1 = c11 + c2;
495 t2 = (c2 - (t1 - c11)) + c21;
496
497 head_result = t1 + t2;
498 tail_result = t2 - (head_result - t1);
499 }
500 x_i[x_index2] = head_result;
501 x_index2 += incx;
502 }
503 } else if ((uplo == blas_lower &&
504 trans == blas_no_trans && order == blas_rowmajor) ||
505 (uplo == blas_upper &&
506 trans != blas_no_trans && order == blas_colmajor)) {
507 tp_start = (n - 1) + ((n - 1) * n) / 2;
508 tp_index = tp_start * inctp;
509 x_index = x_start + (n - 1) * incx;
510
511 for (matrix_row = n - 1; matrix_row >= 0; matrix_row--) {
512 x_index2 = x_index;
513 head_rowsum = tail_rowsum = 0.0;
514 head_rowtmp = tail_rowtmp = 0.0;
515 head_result = tail_result = 0.0;
516 for (step = 0; step <= matrix_row; step++) {
517 vecval = x_i[x_index2];
518 if ((diag == blas_unit_diag) && (step == 0)) {
519 {
520 /* Compute double_double = double * double. */
521 double a1, a2, b1, b2, con;
522
523 con = vecval * split;
524 a1 = con - vecval;
525 a1 = con - a1;
526 a2 = vecval - a1;
527 con = one * split;
528 b1 = con - one;
529 b1 = con - b1;
530 b2 = one - b1;
531
532 head_rowtmp = vecval * one;
533 tail_rowtmp =
534 (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) +
535 a2 * b2;
536 }
537 } else {
538 matval = tp_i[tp_index];
539 {
540 /* Compute double_double = double * double. */
541 double a1, a2, b1, b2, con;
542
543 con = matval * split;
544 a1 = con - matval;
545 a1 = con - a1;
546 a2 = matval - a1;
547 con = vecval * split;
548 b1 = con - vecval;
549 b1 = con - b1;
550 b2 = vecval - b1;
551
552 head_rowtmp = matval * vecval;
553 tail_rowtmp =
554 (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) +
555 a2 * b2;
556 }
557 }
558 {
559 /* Compute double-double = double-double + double-double. */
560 double bv;
561 double s1, s2, t1, t2;
562
563 /* Add two hi words. */
564 s1 = head_rowsum + head_rowtmp;
565 bv = s1 - head_rowsum;
566 s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));
567
568 /* Add two lo words. */
569 t1 = tail_rowsum + tail_rowtmp;
570 bv = t1 - tail_rowsum;
571 t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));
572
573 s2 += t1;
574
575 /* Renormalize (s1, s2) to (t1, s2) */
576 t1 = s1 + s2;
577 s2 = s2 - (t1 - s1);
578
579 t2 += s2;
580
581 /* Renormalize (t1, t2) */
582 head_rowsum = t1 + t2;
583 tail_rowsum = t2 - (head_rowsum - t1);
584 }
585 x_index2 -= incx;
586 tp_index -= inctp;
587 }
588 {
589 /* Compute double-double = double-double * double. */
590 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
591
592 con = head_rowsum * split;
593 a11 = con - head_rowsum;
594 a11 = con - a11;
595 a21 = head_rowsum - a11;
596 con = alpha_i * split;
597 b1 = con - alpha_i;
598 b1 = con - b1;
599 b2 = alpha_i - b1;
600
601 c11 = head_rowsum * alpha_i;
602 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
603
604 c2 = tail_rowsum * alpha_i;
605 t1 = c11 + c2;
606 t2 = (c2 - (t1 - c11)) + c21;
607
608 head_result = t1 + t2;
609 tail_result = t2 - (head_result - t1);
610 }
611 x_i[x_index] = head_result;
612 x_index -= incx;
613 }
614 } else {
615 tp_start = 0;
616 x_index = x_start + (n - 1) * incx;
617 for (matrix_row = n - 1; matrix_row >= 0; matrix_row--) {
618 tp_index = matrix_row * inctp;
619 x_index2 = x_start;
620 head_rowsum = tail_rowsum = 0.0;
621 head_rowtmp = tail_rowtmp = 0.0;
622 head_result = tail_result = 0.0;
623 stride = n;
624 for (step = 0; step <= matrix_row; step++) {
625 vecval = x_i[x_index2];
626 if ((diag == blas_unit_diag) && (step == matrix_row)) {
627 {
628 /* Compute double_double = double * double. */
629 double a1, a2, b1, b2, con;
630
631 con = vecval * split;
632 a1 = con - vecval;
633 a1 = con - a1;
634 a2 = vecval - a1;
635 con = one * split;
636 b1 = con - one;
637 b1 = con - b1;
638 b2 = one - b1;
639
640 head_rowtmp = vecval * one;
641 tail_rowtmp =
642 (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) +
643 a2 * b2;
644 }
645 } else {
646 matval = tp_i[tp_index];
647 {
648 /* Compute double_double = double * double. */
649 double a1, a2, b1, b2, con;
650
651 con = matval * split;
652 a1 = con - matval;
653 a1 = con - a1;
654 a2 = matval - a1;
655 con = vecval * split;
656 b1 = con - vecval;
657 b1 = con - b1;
658 b2 = vecval - b1;
659
660 head_rowtmp = matval * vecval;
661 tail_rowtmp =
662 (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) +
663 a2 * b2;
664 }
665 }
666 {
667 /* Compute double-double = double-double + double-double. */
668 double bv;
669 double s1, s2, t1, t2;
670
671 /* Add two hi words. */
672 s1 = head_rowsum + head_rowtmp;
673 bv = s1 - head_rowsum;
674 s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));
675
676 /* Add two lo words. */
677 t1 = tail_rowsum + tail_rowtmp;
678 bv = t1 - tail_rowsum;
679 t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));
680
681 s2 += t1;
682
683 /* Renormalize (s1, s2) to (t1, s2) */
684 t1 = s1 + s2;
685 s2 = s2 - (t1 - s1);
686
687 t2 += s2;
688
689 /* Renormalize (t1, t2) */
690 head_rowsum = t1 + t2;
691 tail_rowsum = t2 - (head_rowsum - t1);
692 }
693 stride--;
694 tp_index += stride * inctp;
695 x_index2 += incx;
696 }
697 {
698 /* Compute double-double = double-double * double. */
699 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
700
701 con = head_rowsum * split;
702 a11 = con - head_rowsum;
703 a11 = con - a11;
704 a21 = head_rowsum - a11;
705 con = alpha_i * split;
706 b1 = con - alpha_i;
707 b1 = con - b1;
708 b2 = alpha_i - b1;
709
710 c11 = head_rowsum * alpha_i;
711 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
712
713 c2 = tail_rowsum * alpha_i;
714 t1 = c11 + c2;
715 t2 = (c2 - (t1 - c11)) + c21;
716
717 head_result = t1 + t2;
718 tail_result = t2 - (head_result - t1);
719 }
720 x_i[x_index] = head_result;
721 x_index -= incx;
722 }
723 }
724 }
725
726
727 }
728 break;
729 }
730 }
731
732 }
733