1 #include <blas_extended.h>
2 #include <blas_extended_private.h>
3 #include <blas_fpu.h>
BLAS_chemv2_c_s(enum blas_order_type order,enum blas_uplo_type uplo,int n,const void * alpha,const void * a,int lda,const float * x_head,const float * x_tail,int incx,const void * beta,const float * y,int incy)4 void BLAS_chemv2_c_s(enum blas_order_type order, enum blas_uplo_type uplo,
5 int n, const void *alpha, const void *a, int lda,
6 const float *x_head, const float *x_tail, int incx,
7 const void *beta, const float *y, int incy)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * This routines computes the matrix product:
14 *
15 * y <- alpha * A * (x_head + x_tail) + beta * y
16 *
17 * where A is a complex Hermitian matrix.
18 *
19 * Arguments
20 * =========
21 *
22 * order (input) enum blas_order_type
23 * Storage format of input symmetric matrix A.
24 *
25 * uplo (input) enum blas_uplo_type
26 * Determines which half of matrix A (upper or lower triangle)
27 * is accessed.
28 *
29 * n (input) int
30 * Dimension of A and size of vectors x, y.
31 *
32 * alpha (input) const void*
33 *
34 * a (input) void*
35 * Matrix A.
36 *
37 * lda (input) int
38 * Leading dimension of matrix A.
39 *
40 * x_head (input) float*
41 * Vector x_head
42 *
43 * x_tail (input) float*
44 * Vector x_tail
45 *
46 * incx (input) int
47 * Stride for vector x.
48 *
49 * beta (input) const void*
50 *
51 * y (input) float*
52 * Vector y.
53 *
54 * incy (input) int
55 * Stride for vector y.
56 *
57 */
58 {
59 /* Routine name */
60 const char routine_name[] = "BLAS_chemv2_c_s";
61
62 int i, j;
63 int xi, yi, xi0, yi0;
64 int aij, ai;
65 int incai;
66 int incaij, incaij2;
67
68 const float *a_i = (float *) a;
69 const float *x_head_i = x_head;
70 const float *x_tail_i = x_tail;
71 float *y_i = (float *) y;
72 float *alpha_i = (float *) alpha;
73 float *beta_i = (float *) beta;
74 float a_elem[2];
75 float x_elem;
76 float y_elem[2];
77 float diag_elem;
78 float prod1[2];
79 float prod2[2];
80 float sum1[2];
81 float sum2[2];
82 float tmp1[2];
83 float tmp2[2];
84 float tmp3[2];
85
86
87
88 /* Test for no-op */
89 if (n <= 0) {
90 return;
91 }
92 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
93 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
94 return;
95 }
96
97 /* Check for error conditions. */
98 if (n < 0) {
99 BLAS_error(routine_name, -3, n, NULL);
100 }
101 if (lda < n) {
102 BLAS_error(routine_name, -6, n, NULL);
103 }
104 if (incx == 0) {
105 BLAS_error(routine_name, -9, incx, NULL);
106 }
107 if (incy == 0) {
108 BLAS_error(routine_name, -12, incy, NULL);
109 }
110
111 if ((order == blas_colmajor && uplo == blas_upper) ||
112 (order == blas_rowmajor && uplo == blas_lower)) {
113 incai = lda;
114 incaij = 1;
115 incaij2 = lda;
116 } else {
117 incai = 1;
118 incaij = lda;
119 incaij2 = 1;
120 }
121
122
123 incy *= 2;
124 incai *= 2;
125 incaij *= 2;
126 incaij2 *= 2;
127 xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
128 yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
129
130
131
132 /* The most general form, y <--- alpha * A * (x_head + x_tail) + beta * y */
133 if (uplo == blas_lower) {
134 for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
135 sum1[0] = sum1[1] = 0.0;
136 sum2[0] = sum2[1] = 0.0;
137 for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
138 a_elem[0] = a_i[aij];
139 a_elem[1] = a_i[aij + 1];
140 x_elem = x_head_i[xi];
141 {
142 prod1[0] = a_elem[0] * x_elem;
143 prod1[1] = a_elem[1] * x_elem;
144 }
145 sum1[0] = sum1[0] + prod1[0];
146 sum1[1] = sum1[1] + prod1[1];
147 x_elem = x_tail_i[xi];
148 {
149 prod2[0] = a_elem[0] * x_elem;
150 prod2[1] = a_elem[1] * x_elem;
151 }
152 sum2[0] = sum2[0] + prod2[0];
153 sum2[1] = sum2[1] + prod2[1];
154 }
155
156 diag_elem = a_i[aij];
157 x_elem = x_head_i[xi];
158 prod1[0] = x_elem * diag_elem;
159 prod1[1] = 0.0;
160 sum1[0] = sum1[0] + prod1[0];
161 sum1[1] = sum1[1] + prod1[1];
162 x_elem = x_tail_i[xi];
163 prod2[0] = x_elem * diag_elem;
164 prod2[1] = 0.0;
165 sum2[0] = sum2[0] + prod2[0];
166 sum2[1] = sum2[1] + prod2[1];
167 j++;
168 aij += incaij2;
169 xi += incx;
170
171 for (; j < n; j++, aij += incaij2, xi += incx) {
172 a_elem[0] = a_i[aij];
173 a_elem[1] = a_i[aij + 1];
174 a_elem[1] = -a_elem[1];
175 x_elem = x_head_i[xi];
176 {
177 prod1[0] = a_elem[0] * x_elem;
178 prod1[1] = a_elem[1] * x_elem;
179 }
180 sum1[0] = sum1[0] + prod1[0];
181 sum1[1] = sum1[1] + prod1[1];
182 x_elem = x_tail_i[xi];
183 {
184 prod2[0] = a_elem[0] * x_elem;
185 prod2[1] = a_elem[1] * x_elem;
186 }
187 sum2[0] = sum2[0] + prod2[0];
188 sum2[1] = sum2[1] + prod2[1];
189 }
190 sum1[0] = sum1[0] + sum2[0];
191 sum1[1] = sum1[1] + sum2[1];
192 {
193 tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
194 tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
195 }
196
197 y_elem[0] = y_i[yi];
198 y_elem[1] = y_i[yi + 1];
199 {
200 tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
201 tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
202 }
203
204 tmp3[0] = tmp1[0] + tmp2[0];
205 tmp3[1] = tmp1[1] + tmp2[1];
206 y_i[yi] = tmp3[0];
207 y_i[yi + 1] = tmp3[1];
208 }
209 } else {
210 /* uplo == blas_upper */
211 for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
212 sum1[0] = sum1[1] = 0.0;
213 sum2[0] = sum2[1] = 0.0;
214
215 for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
216 a_elem[0] = a_i[aij];
217 a_elem[1] = a_i[aij + 1];
218 a_elem[1] = -a_elem[1];
219 x_elem = x_head_i[xi];
220 {
221 prod1[0] = a_elem[0] * x_elem;
222 prod1[1] = a_elem[1] * x_elem;
223 }
224 sum1[0] = sum1[0] + prod1[0];
225 sum1[1] = sum1[1] + prod1[1];
226 x_elem = x_tail_i[xi];
227 {
228 prod2[0] = a_elem[0] * x_elem;
229 prod2[1] = a_elem[1] * x_elem;
230 }
231 sum2[0] = sum2[0] + prod2[0];
232 sum2[1] = sum2[1] + prod2[1];
233 }
234
235 diag_elem = a_i[aij];
236 x_elem = x_head_i[xi];
237 prod1[0] = x_elem * diag_elem;
238 prod1[1] = 0.0;
239 sum1[0] = sum1[0] + prod1[0];
240 sum1[1] = sum1[1] + prod1[1];
241 x_elem = x_tail_i[xi];
242 prod2[0] = x_elem * diag_elem;
243 prod2[1] = 0.0;
244 sum2[0] = sum2[0] + prod2[0];
245 sum2[1] = sum2[1] + prod2[1];
246 j++;
247 aij += incaij2;
248 xi += incx;
249
250 for (; j < n; j++, aij += incaij2, xi += incx) {
251 a_elem[0] = a_i[aij];
252 a_elem[1] = a_i[aij + 1];
253 x_elem = x_head_i[xi];
254 {
255 prod1[0] = a_elem[0] * x_elem;
256 prod1[1] = a_elem[1] * x_elem;
257 }
258 sum1[0] = sum1[0] + prod1[0];
259 sum1[1] = sum1[1] + prod1[1];
260 x_elem = x_tail_i[xi];
261 {
262 prod2[0] = a_elem[0] * x_elem;
263 prod2[1] = a_elem[1] * x_elem;
264 }
265 sum2[0] = sum2[0] + prod2[0];
266 sum2[1] = sum2[1] + prod2[1];
267 }
268 sum1[0] = sum1[0] + sum2[0];
269 sum1[1] = sum1[1] + sum2[1];
270 {
271 tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
272 tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
273 }
274
275 y_elem[0] = y_i[yi];
276 y_elem[1] = y_i[yi + 1];
277 {
278 tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
279 tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
280 }
281
282 tmp3[0] = tmp1[0] + tmp2[0];
283 tmp3[1] = tmp1[1] + tmp2[1];
284 y_i[yi] = tmp3[0];
285 y_i[yi + 1] = tmp3[1];
286 }
287 }
288
289
290
291 } /* end BLAS_chemv2_c_s */
292