1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_zsum_x(int n,const void * x,int incx,void * sum,enum blas_prec_type prec)3 void BLAS_zsum_x(int n, const void *x, int incx,
4 void *sum, enum blas_prec_type prec)
5
6 /*
7 * Purpose
8 * =======
9 *
10 * This routine computes the summation:
11 *
12 * sum <- SUM_{i=0, n-1} x[i].
13 *
14 * Arguments
15 * =========
16 *
17 * n (input) int
18 * The length of vector x.
19 *
20 * x (input) const void*
21 * Array of length n.
22 *
23 * incx (input) int
24 * The stride used to access components x[i].
25 *
26 * sum (output) void*
27 *
28 * prec (input) enum blas_prec_type
29 * Specifies the internal precision to be used.
30 * = blas_prec_single: single precision.
31 * = blas_prec_double: double precision.
32 * = blas_prec_extra : anything at least 1.5 times as accurate
33 * than double, and wider than 80-bits.
34 * We use double-double in our implementation.
35 *
36 */
37 {
38 static const char routine_name[] = "BLAS_zsum_x";
39 switch (prec) {
40 case blas_prec_single:
41 case blas_prec_double:
42 case blas_prec_indigenous:{
43
44 int i, xi;
45 double *sum_i = (double *) sum;
46 const double *x_i = (double *) x;
47 double x_elem[2];
48 double tmp[2];
49
50
51 /* Test the input parameters. */
52 if (n < 0)
53 BLAS_error(routine_name, -1, n, NULL);
54 if (incx == 0)
55 BLAS_error(routine_name, -3, incx, NULL);
56
57 /* Immediate return. */
58 if (n <= 0) {
59 sum_i[0] = sum_i[1] = 0.0;
60 return;
61 }
62
63
64
65 tmp[0] = tmp[1] = 0.0;
66
67 incx *= 2;
68 if (incx < 0)
69 xi = -(n - 1) * incx;
70 else
71 xi = 0;
72
73 for (i = 0; i < n; i++, xi += incx) {
74 x_elem[0] = x_i[xi];
75 x_elem[1] = x_i[xi + 1];
76 tmp[0] = tmp[0] + x_elem[0];
77 tmp[1] = tmp[1] + x_elem[1];
78 }
79 ((double *) sum)[0] = tmp[0];
80 ((double *) sum)[1] = tmp[1];
81
82
83
84 break;
85 }
86
87 case blas_prec_extra:
88 {
89 int i, xi;
90 double *sum_i = (double *) sum;
91 const double *x_i = (double *) x;
92 double x_elem[2];
93 double head_tmp[2], tail_tmp[2];
94 FPU_FIX_DECL;
95
96 /* Test the input parameters. */
97 if (n < 0)
98 BLAS_error(routine_name, -1, n, NULL);
99 if (incx == 0)
100 BLAS_error(routine_name, -3, incx, NULL);
101
102 /* Immediate return. */
103 if (n <= 0) {
104 sum_i[0] = sum_i[1] = 0.0;
105 return;
106 }
107
108 FPU_FIX_START;
109
110 head_tmp[0] = head_tmp[1] = tail_tmp[0] = tail_tmp[1] = 0.0;
111
112 incx *= 2;
113 if (incx < 0)
114 xi = -(n - 1) * incx;
115 else
116 xi = 0;
117
118 for (i = 0; i < n; i++, xi += incx) {
119 x_elem[0] = x_i[xi];
120 x_elem[1] = x_i[xi + 1];
121 {
122 double head_t, tail_t;
123 double head_a, tail_a;
124 head_a = head_tmp[0];
125 tail_a = tail_tmp[0];
126 {
127 /* Compute double-double = double-double + double. */
128 double e, t1, t2;
129
130 /* Knuth trick. */
131 t1 = head_a + x_elem[0];
132 e = t1 - head_a;
133 t2 = ((x_elem[0] - e) + (head_a - (t1 - e))) + tail_a;
134
135 /* The result is t1 + t2, after normalization. */
136 head_t = t1 + t2;
137 tail_t = t2 - (head_t - t1);
138 }
139 head_tmp[0] = head_t;
140 tail_tmp[0] = tail_t;
141 head_a = head_tmp[1];
142 tail_a = tail_tmp[1];
143 {
144 /* Compute double-double = double-double + double. */
145 double e, t1, t2;
146
147 /* Knuth trick. */
148 t1 = head_a + x_elem[1];
149 e = t1 - head_a;
150 t2 = ((x_elem[1] - e) + (head_a - (t1 - e))) + tail_a;
151
152 /* The result is t1 + t2, after normalization. */
153 head_t = t1 + t2;
154 tail_t = t2 - (head_t - t1);
155 }
156 head_tmp[1] = head_t;
157 tail_tmp[1] = tail_t;
158 }
159 }
160 ((double *) sum)[0] = head_tmp[0];
161 ((double *) sum)[1] = head_tmp[1];
162
163 FPU_FIX_STOP;
164 }
165 break;
166 }
167
168 }
169