1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_dsum_x(int n,const double * x,int incx,double * sum,enum blas_prec_type prec)3 void BLAS_dsum_x(int n, const double *x, int incx,
4 double *sum, enum blas_prec_type prec)
5
6 /*
7 * Purpose
8 * =======
9 *
10 * This routine computes the summation:
11 *
12 * sum <- SUM_{i=0, n-1} x[i].
13 *
14 * Arguments
15 * =========
16 *
17 * n (input) int
18 * The length of vector x.
19 *
20 * x (input) const double*
21 * Array of length n.
22 *
23 * incx (input) int
24 * The stride used to access components x[i].
25 *
26 * sum (output) double*
27 *
28 * prec (input) enum blas_prec_type
29 * Specifies the internal precision to be used.
30 * = blas_prec_single: single precision.
31 * = blas_prec_double: double precision.
32 * = blas_prec_extra : anything at least 1.5 times as accurate
33 * than double, and wider than 80-bits.
34 * We use double-double in our implementation.
35 *
36 */
37 {
38 static const char routine_name[] = "BLAS_dsum_x";
39 switch (prec) {
40 case blas_prec_single:
41 case blas_prec_double:
42 case blas_prec_indigenous:{
43
44 int i, xi;
45 double *sum_i = sum;
46 const double *x_i = x;
47 double x_elem;
48 double tmp;
49
50
51 /* Test the input parameters. */
52 if (n < 0)
53 BLAS_error(routine_name, -1, n, NULL);
54 if (incx == 0)
55 BLAS_error(routine_name, -3, incx, NULL);
56
57 /* Immediate return. */
58 if (n <= 0) {
59 *sum_i = 0.0;
60 return;
61 }
62
63
64
65 tmp = 0.0;
66
67
68 if (incx < 0)
69 xi = -(n - 1) * incx;
70 else
71 xi = 0;
72
73 for (i = 0; i < n; i++, xi += incx) {
74 x_elem = x_i[xi];
75 tmp = tmp + x_elem;
76 }
77 *sum = tmp;
78
79
80
81 break;
82 }
83
84 case blas_prec_extra:
85 {
86 int i, xi;
87 double *sum_i = sum;
88 const double *x_i = x;
89 double x_elem;
90 double head_tmp, tail_tmp;
91 FPU_FIX_DECL;
92
93 /* Test the input parameters. */
94 if (n < 0)
95 BLAS_error(routine_name, -1, n, NULL);
96 if (incx == 0)
97 BLAS_error(routine_name, -3, incx, NULL);
98
99 /* Immediate return. */
100 if (n <= 0) {
101 *sum_i = 0.0;
102 return;
103 }
104
105 FPU_FIX_START;
106
107 head_tmp = tail_tmp = 0.0;
108
109
110 if (incx < 0)
111 xi = -(n - 1) * incx;
112 else
113 xi = 0;
114
115 for (i = 0; i < n; i++, xi += incx) {
116 x_elem = x_i[xi];
117 {
118 /* Compute double-double = double-double + double. */
119 double e, t1, t2;
120
121 /* Knuth trick. */
122 t1 = head_tmp + x_elem;
123 e = t1 - head_tmp;
124 t2 = ((x_elem - e) + (head_tmp - (t1 - e))) + tail_tmp;
125
126 /* The result is t1 + t2, after normalization. */
127 head_tmp = t1 + t2;
128 tail_tmp = t2 - (head_tmp - t1);
129 }
130 }
131 *sum = head_tmp;
132
133 FPU_FIX_STOP;
134 }
135 break;
136 }
137 }
138