1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_zsum_x(int n,const void * x,int incx,void * sum,enum blas_prec_type prec)3 void BLAS_zsum_x(int n, const void *x, int incx,
4 		 void *sum, enum blas_prec_type prec)
5 
6 /*
7  * Purpose
8  * =======
9  *
10  * This routine computes the summation:
11  *
12  *     sum <- SUM_{i=0, n-1} x[i].
13  *
14  * Arguments
15  * =========
16  *
17  * n      (input) int
18  *        The length of vector x.
19  *
20  * x      (input) const void*
21  *        Array of length n.
22  *
23  * incx   (input) int
24  *        The stride used to access components x[i].
25  *
26  * sum    (output) void*
27  *
28  * prec   (input) enum blas_prec_type
29  *        Specifies the internal precision to be used.
30  *        = blas_prec_single: single precision.
31  *        = blas_prec_double: double precision.
32  *        = blas_prec_extra : anything at least 1.5 times as accurate
33  *                            than double, and wider than 80-bits.
34  *                            We use double-double in our implementation.
35  *
36  */
37 {
38   static const char routine_name[] = "BLAS_zsum_x";
39   switch (prec) {
40   case blas_prec_single:
41   case blas_prec_double:
42   case blas_prec_indigenous:{
43 
44       int i, xi;
45       double *sum_i = (double *) sum;
46       const double *x_i = (double *) x;
47       double x_elem[2];
48       double tmp[2];
49 
50 
51       /* Test the input parameters. */
52       if (n < 0)
53 	BLAS_error(routine_name, -1, n, NULL);
54       if (incx == 0)
55 	BLAS_error(routine_name, -3, incx, NULL);
56 
57       /* Immediate return. */
58       if (n <= 0) {
59 	sum_i[0] = sum_i[1] = 0.0;
60 	return;
61       }
62 
63 
64 
65       tmp[0] = tmp[1] = 0.0;
66 
67       incx *= 2;
68       if (incx < 0)
69 	xi = -(n - 1) * incx;
70       else
71 	xi = 0;
72 
73       for (i = 0; i < n; i++, xi += incx) {
74 	x_elem[0] = x_i[xi];
75 	x_elem[1] = x_i[xi + 1];
76 	tmp[0] = tmp[0] + x_elem[0];
77 	tmp[1] = tmp[1] + x_elem[1];
78       }
79       ((double *) sum)[0] = tmp[0];
80       ((double *) sum)[1] = tmp[1];
81 
82 
83 
84       break;
85     }
86 
87   case blas_prec_extra:
88     {
89       int i, xi;
90       double *sum_i = (double *) sum;
91       const double *x_i = (double *) x;
92       double x_elem[2];
93       double head_tmp[2], tail_tmp[2];
94       FPU_FIX_DECL;
95 
96       /* Test the input parameters. */
97       if (n < 0)
98 	BLAS_error(routine_name, -1, n, NULL);
99       if (incx == 0)
100 	BLAS_error(routine_name, -3, incx, NULL);
101 
102       /* Immediate return. */
103       if (n <= 0) {
104 	sum_i[0] = sum_i[1] = 0.0;
105 	return;
106       }
107 
108       FPU_FIX_START;
109 
110       head_tmp[0] = head_tmp[1] = tail_tmp[0] = tail_tmp[1] = 0.0;
111 
112       incx *= 2;
113       if (incx < 0)
114 	xi = -(n - 1) * incx;
115       else
116 	xi = 0;
117 
118       for (i = 0; i < n; i++, xi += incx) {
119 	x_elem[0] = x_i[xi];
120 	x_elem[1] = x_i[xi + 1];
121 	{
122 	  double head_t, tail_t;
123 	  double head_a, tail_a;
124 	  head_a = head_tmp[0];
125 	  tail_a = tail_tmp[0];
126 	  {
127 	    /* Compute double-double = double-double + double. */
128 	    double e, t1, t2;
129 
130 	    /* Knuth trick. */
131 	    t1 = head_a + x_elem[0];
132 	    e = t1 - head_a;
133 	    t2 = ((x_elem[0] - e) + (head_a - (t1 - e))) + tail_a;
134 
135 	    /* The result is t1 + t2, after normalization. */
136 	    head_t = t1 + t2;
137 	    tail_t = t2 - (head_t - t1);
138 	  }
139 	  head_tmp[0] = head_t;
140 	  tail_tmp[0] = tail_t;
141 	  head_a = head_tmp[1];
142 	  tail_a = tail_tmp[1];
143 	  {
144 	    /* Compute double-double = double-double + double. */
145 	    double e, t1, t2;
146 
147 	    /* Knuth trick. */
148 	    t1 = head_a + x_elem[1];
149 	    e = t1 - head_a;
150 	    t2 = ((x_elem[1] - e) + (head_a - (t1 - e))) + tail_a;
151 
152 	    /* The result is t1 + t2, after normalization. */
153 	    head_t = t1 + t2;
154 	    tail_t = t2 - (head_t - t1);
155 	  }
156 	  head_tmp[1] = head_t;
157 	  tail_tmp[1] = tail_t;
158 	}
159       }
160       ((double *) sum)[0] = head_tmp[0];
161       ((double *) sum)[1] = head_tmp[1];
162 
163       FPU_FIX_STOP;
164     }
165     break;
166   }
167 
168 }
169