1 #include <blas_extended.h>
2 #include <blas_extended_private.h>
3 #include <blas_fpu.h>
BLAS_chemv2_c_s(enum blas_order_type order,enum blas_uplo_type uplo,int n,const void * alpha,const void * a,int lda,const float * x_head,const float * x_tail,int incx,const void * beta,const float * y,int incy)4 void BLAS_chemv2_c_s(enum blas_order_type order, enum blas_uplo_type uplo,
5 		     int n, const void *alpha, const void *a, int lda,
6 		     const float *x_head, const float *x_tail, int incx,
7 		     const void *beta, const float *y, int incy)
8 
9 /*
10  * Purpose
11  * =======
12  *
13  * This routines computes the matrix product:
14  *
15  *     y  <-  alpha * A * (x_head + x_tail) + beta * y
16  *
17  * where A is a complex Hermitian matrix.
18  *
19  * Arguments
20  * =========
21  *
22  * order   (input) enum blas_order_type
23  *         Storage format of input symmetric matrix A.
24  *
25  * uplo    (input) enum blas_uplo_type
26  *         Determines which half of matrix A (upper or lower triangle)
27  *         is accessed.
28  *
29  * n       (input) int
30  *         Dimension of A and size of vectors x, y.
31  *
32  * alpha   (input) const void*
33  *
34  * a       (input) void*
35  *         Matrix A.
36  *
37  * lda     (input) int
38  *         Leading dimension of matrix A.
39  *
40  * x_head  (input) float*
41  *         Vector x_head
42  *
43  * x_tail  (input) float*
44  *         Vector x_tail
45  *
46  * incx    (input) int
47  *         Stride for vector x.
48  *
49  * beta    (input) const void*
50  *
51  * y       (input) float*
52  *         Vector y.
53  *
54  * incy    (input) int
55  *         Stride for vector y.
56  *
57  */
58 {
59   /* Routine name */
60   const char routine_name[] = "BLAS_chemv2_c_s";
61 
62   int i, j;
63   int xi, yi, xi0, yi0;
64   int aij, ai;
65   int incai;
66   int incaij, incaij2;
67 
68   const float *a_i = (float *) a;
69   const float *x_head_i = x_head;
70   const float *x_tail_i = x_tail;
71   float *y_i = (float *) y;
72   float *alpha_i = (float *) alpha;
73   float *beta_i = (float *) beta;
74   float a_elem[2];
75   float x_elem;
76   float y_elem[2];
77   float diag_elem;
78   float prod1[2];
79   float prod2[2];
80   float sum1[2];
81   float sum2[2];
82   float tmp1[2];
83   float tmp2[2];
84   float tmp3[2];
85 
86 
87 
88   /* Test for no-op */
89   if (n <= 0) {
90     return;
91   }
92   if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
93       && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
94     return;
95   }
96 
97   /* Check for error conditions. */
98   if (n < 0) {
99     BLAS_error(routine_name, -3, n, NULL);
100   }
101   if (lda < n) {
102     BLAS_error(routine_name, -6, n, NULL);
103   }
104   if (incx == 0) {
105     BLAS_error(routine_name, -9, incx, NULL);
106   }
107   if (incy == 0) {
108     BLAS_error(routine_name, -12, incy, NULL);
109   }
110 
111   if ((order == blas_colmajor && uplo == blas_upper) ||
112       (order == blas_rowmajor && uplo == blas_lower)) {
113     incai = lda;
114     incaij = 1;
115     incaij2 = lda;
116   } else {
117     incai = 1;
118     incaij = lda;
119     incaij2 = 1;
120   }
121 
122 
123   incy *= 2;
124   incai *= 2;
125   incaij *= 2;
126   incaij2 *= 2;
127   xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
128   yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
129 
130 
131 
132   /* The most general form,   y <--- alpha * A * (x_head + x_tail) + beta * y   */
133   if (uplo == blas_lower) {
134     for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
135       sum1[0] = sum1[1] = 0.0;
136       sum2[0] = sum2[1] = 0.0;
137       for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
138 	a_elem[0] = a_i[aij];
139 	a_elem[1] = a_i[aij + 1];
140 	x_elem = x_head_i[xi];
141 	{
142 	  prod1[0] = a_elem[0] * x_elem;
143 	  prod1[1] = a_elem[1] * x_elem;
144 	}
145 	sum1[0] = sum1[0] + prod1[0];
146 	sum1[1] = sum1[1] + prod1[1];
147 	x_elem = x_tail_i[xi];
148 	{
149 	  prod2[0] = a_elem[0] * x_elem;
150 	  prod2[1] = a_elem[1] * x_elem;
151 	}
152 	sum2[0] = sum2[0] + prod2[0];
153 	sum2[1] = sum2[1] + prod2[1];
154       }
155 
156       diag_elem = a_i[aij];
157       x_elem = x_head_i[xi];
158       prod1[0] = x_elem * diag_elem;
159       prod1[1] = 0.0;
160       sum1[0] = sum1[0] + prod1[0];
161       sum1[1] = sum1[1] + prod1[1];
162       x_elem = x_tail_i[xi];
163       prod2[0] = x_elem * diag_elem;
164       prod2[1] = 0.0;
165       sum2[0] = sum2[0] + prod2[0];
166       sum2[1] = sum2[1] + prod2[1];
167       j++;
168       aij += incaij2;
169       xi += incx;
170 
171       for (; j < n; j++, aij += incaij2, xi += incx) {
172 	a_elem[0] = a_i[aij];
173 	a_elem[1] = a_i[aij + 1];
174 	a_elem[1] = -a_elem[1];
175 	x_elem = x_head_i[xi];
176 	{
177 	  prod1[0] = a_elem[0] * x_elem;
178 	  prod1[1] = a_elem[1] * x_elem;
179 	}
180 	sum1[0] = sum1[0] + prod1[0];
181 	sum1[1] = sum1[1] + prod1[1];
182 	x_elem = x_tail_i[xi];
183 	{
184 	  prod2[0] = a_elem[0] * x_elem;
185 	  prod2[1] = a_elem[1] * x_elem;
186 	}
187 	sum2[0] = sum2[0] + prod2[0];
188 	sum2[1] = sum2[1] + prod2[1];
189       }
190       sum1[0] = sum1[0] + sum2[0];
191       sum1[1] = sum1[1] + sum2[1];
192       {
193 	tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
194 	tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
195       }
196 
197       y_elem[0] = y_i[yi];
198       y_elem[1] = y_i[yi + 1];
199       {
200 	tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
201 	tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
202       }
203 
204       tmp3[0] = tmp1[0] + tmp2[0];
205       tmp3[1] = tmp1[1] + tmp2[1];
206       y_i[yi] = tmp3[0];
207       y_i[yi + 1] = tmp3[1];
208     }
209   } else {
210     /* uplo == blas_upper */
211     for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
212       sum1[0] = sum1[1] = 0.0;
213       sum2[0] = sum2[1] = 0.0;
214 
215       for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
216 	a_elem[0] = a_i[aij];
217 	a_elem[1] = a_i[aij + 1];
218 	a_elem[1] = -a_elem[1];
219 	x_elem = x_head_i[xi];
220 	{
221 	  prod1[0] = a_elem[0] * x_elem;
222 	  prod1[1] = a_elem[1] * x_elem;
223 	}
224 	sum1[0] = sum1[0] + prod1[0];
225 	sum1[1] = sum1[1] + prod1[1];
226 	x_elem = x_tail_i[xi];
227 	{
228 	  prod2[0] = a_elem[0] * x_elem;
229 	  prod2[1] = a_elem[1] * x_elem;
230 	}
231 	sum2[0] = sum2[0] + prod2[0];
232 	sum2[1] = sum2[1] + prod2[1];
233       }
234 
235       diag_elem = a_i[aij];
236       x_elem = x_head_i[xi];
237       prod1[0] = x_elem * diag_elem;
238       prod1[1] = 0.0;
239       sum1[0] = sum1[0] + prod1[0];
240       sum1[1] = sum1[1] + prod1[1];
241       x_elem = x_tail_i[xi];
242       prod2[0] = x_elem * diag_elem;
243       prod2[1] = 0.0;
244       sum2[0] = sum2[0] + prod2[0];
245       sum2[1] = sum2[1] + prod2[1];
246       j++;
247       aij += incaij2;
248       xi += incx;
249 
250       for (; j < n; j++, aij += incaij2, xi += incx) {
251 	a_elem[0] = a_i[aij];
252 	a_elem[1] = a_i[aij + 1];
253 	x_elem = x_head_i[xi];
254 	{
255 	  prod1[0] = a_elem[0] * x_elem;
256 	  prod1[1] = a_elem[1] * x_elem;
257 	}
258 	sum1[0] = sum1[0] + prod1[0];
259 	sum1[1] = sum1[1] + prod1[1];
260 	x_elem = x_tail_i[xi];
261 	{
262 	  prod2[0] = a_elem[0] * x_elem;
263 	  prod2[1] = a_elem[1] * x_elem;
264 	}
265 	sum2[0] = sum2[0] + prod2[0];
266 	sum2[1] = sum2[1] + prod2[1];
267       }
268       sum1[0] = sum1[0] + sum2[0];
269       sum1[1] = sum1[1] + sum2[1];
270       {
271 	tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
272 	tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
273       }
274 
275       y_elem[0] = y_i[yi];
276       y_elem[1] = y_i[yi + 1];
277       {
278 	tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
279 	tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
280       }
281 
282       tmp3[0] = tmp1[0] + tmp2[0];
283       tmp3[1] = tmp1[1] + tmp2[1];
284       y_i[yi] = tmp3[0];
285       y_i[yi + 1] = tmp3[1];
286     }
287   }
288 
289 
290 
291 }				/* end BLAS_chemv2_c_s */
292