1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
3
4 /*
5 * Purpose
6 * =======
7 *
8 * Computes y = alpha * ap * x + beta * y, where ap is a symmetric
9 * packed matrix.
10 *
11 * Arguments
12 * =========
13 *
14 * order (input) blas_order_type
15 * Order of ap; row or column major
16 *
17 * uplo (input) blas_uplo_type
18 * Whether ap is upper or lower
19 *
20 * n (input) int
21 * Dimension of ap and the length of vector x
22 *
23 * alpha (input) const void*
24 *
25 * ap (input) float*
26 *
27 * x (input) void*
28 *
29 * incx (input) int
30 * The stride for vector x.
31 *
32 * beta (input) const void*
33 *
34 * y (input/output) void*
35 *
36 * incy (input) int
37 * The stride for vector y.
38 *
39 */
BLAS_cspmv_s_c(enum blas_order_type order,enum blas_uplo_type uplo,int n,const void * alpha,const float * ap,const void * x,int incx,const void * beta,void * y,int incy)40 void BLAS_cspmv_s_c(enum blas_order_type order, enum blas_uplo_type uplo,
41 int n, const void *alpha, const float *ap,
42 const void *x, int incx, const void *beta,
43 void *y, int incy)
44 {
45 static const char routine_name[] = "BLAS_cspmv_s_c";
46
47 {
48 int matrix_row, step, ap_index, ap_start, x_index, x_start;
49 int y_start, y_index, incap;
50 float *alpha_i = (float *) alpha;
51 float *beta_i = (float *) beta;
52
53 const float *ap_i = ap;
54 const float *x_i = (float *) x;
55 float *y_i = (float *) y;
56 float rowsum[2];
57 float rowtmp[2];
58 float matval;
59 float vecval[2];
60 float resval[2];
61 float tmp1[2];
62 float tmp2[2];
63
64
65 incap = 1;
66
67 incx *= 2;
68 incy *= 2;
69
70 if (incx < 0)
71 x_start = (-n + 1) * incx;
72 else
73 x_start = 0;
74 if (incy < 0)
75 y_start = (-n + 1) * incy;
76 else
77 y_start = 0;
78
79 if (n < 1) {
80 return;
81 }
82 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
83 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
84 return;
85 }
86
87 /* Check for error conditions. */
88 if (order != blas_colmajor && order != blas_rowmajor) {
89 BLAS_error(routine_name, -1, order, NULL);
90 }
91 if (uplo != blas_upper && uplo != blas_lower) {
92 BLAS_error(routine_name, -2, uplo, NULL);
93 }
94 if (incx == 0) {
95 BLAS_error(routine_name, -7, incx, NULL);
96 }
97 if (incy == 0) {
98 BLAS_error(routine_name, -10, incy, NULL);
99 }
100
101
102
103 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
104 {
105 y_index = y_start;
106 for (matrix_row = 0; matrix_row < n; matrix_row++) {
107 resval[0] = y_i[y_index];
108 resval[1] = y_i[y_index + 1];
109
110 {
111 tmp2[0] = beta_i[0] * resval[0] - beta_i[1] * resval[1];
112 tmp2[1] = beta_i[0] * resval[1] + beta_i[1] * resval[0];
113 }
114
115
116 y_i[y_index] = tmp2[0];
117 y_i[y_index + 1] = tmp2[1];
118
119 y_index += incy;
120 }
121 }
122 } else {
123 if (uplo == blas_lower)
124 order = (order == blas_rowmajor) ? blas_colmajor : blas_rowmajor;
125 if (order == blas_rowmajor) {
126 if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
127 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
128 {
129 y_index = y_start;
130 ap_start = 0;
131 for (matrix_row = 0; matrix_row < n; matrix_row++) {
132 x_index = x_start;
133 ap_index = ap_start;
134 rowsum[0] = rowsum[1] = 0.0;
135 rowtmp[0] = rowtmp[1] = 0.0;
136 for (step = 0; step < matrix_row; step++) {
137 matval = ap_i[ap_index];
138 vecval[0] = x_i[x_index];
139 vecval[1] = x_i[x_index + 1];
140 {
141 rowtmp[0] = vecval[0] * matval;
142 rowtmp[1] = vecval[1] * matval;
143 }
144 rowsum[0] = rowsum[0] + rowtmp[0];
145 rowsum[1] = rowsum[1] + rowtmp[1];
146 ap_index += (n - step - 1) * incap;
147 x_index += incx;
148 }
149 for (step = matrix_row; step < n; step++) {
150 matval = ap_i[ap_index];
151 vecval[0] = x_i[x_index];
152 vecval[1] = x_i[x_index + 1];
153 {
154 rowtmp[0] = vecval[0] * matval;
155 rowtmp[1] = vecval[1] * matval;
156 }
157 rowsum[0] = rowsum[0] + rowtmp[0];
158 rowsum[1] = rowsum[1] + rowtmp[1];
159 ap_index += incap;
160 x_index += incx;
161 }
162 tmp1[0] = rowsum[0];
163 tmp1[1] = rowsum[1];
164 y_i[y_index] = tmp1[0];
165 y_i[y_index + 1] = tmp1[1];
166
167 y_index += incy;
168 ap_start += incap;
169 }
170 }
171 } else {
172 {
173 y_index = y_start;
174 ap_start = 0;
175 for (matrix_row = 0; matrix_row < n; matrix_row++) {
176 x_index = x_start;
177 ap_index = ap_start;
178 rowsum[0] = rowsum[1] = 0.0;
179 rowtmp[0] = rowtmp[1] = 0.0;
180 for (step = 0; step < matrix_row; step++) {
181 matval = ap_i[ap_index];
182 vecval[0] = x_i[x_index];
183 vecval[1] = x_i[x_index + 1];
184 {
185 rowtmp[0] = vecval[0] * matval;
186 rowtmp[1] = vecval[1] * matval;
187 }
188 rowsum[0] = rowsum[0] + rowtmp[0];
189 rowsum[1] = rowsum[1] + rowtmp[1];
190 ap_index += (n - step - 1) * incap;
191 x_index += incx;
192 }
193 for (step = matrix_row; step < n; step++) {
194 matval = ap_i[ap_index];
195 vecval[0] = x_i[x_index];
196 vecval[1] = x_i[x_index + 1];
197 {
198 rowtmp[0] = vecval[0] * matval;
199 rowtmp[1] = vecval[1] * matval;
200 }
201 rowsum[0] = rowsum[0] + rowtmp[0];
202 rowsum[1] = rowsum[1] + rowtmp[1];
203 ap_index += incap;
204 x_index += incx;
205 }
206 resval[0] = y_i[y_index];
207 resval[1] = y_i[y_index + 1];
208 tmp1[0] = rowsum[0];
209 tmp1[1] = rowsum[1];
210 {
211 tmp2[0] = beta_i[0] * resval[0] - beta_i[1] * resval[1];
212 tmp2[1] = beta_i[0] * resval[1] + beta_i[1] * resval[0];
213 }
214
215 tmp2[0] = tmp1[0] + tmp2[0];
216 tmp2[1] = tmp1[1] + tmp2[1];
217 y_i[y_index] = tmp2[0];
218 y_i[y_index + 1] = tmp2[1];
219
220 y_index += incy;
221 ap_start += incap;
222 }
223 }
224 }
225 } else {
226 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
227 {
228 y_index = y_start;
229 ap_start = 0;
230 for (matrix_row = 0; matrix_row < n; matrix_row++) {
231 x_index = x_start;
232 ap_index = ap_start;
233 rowsum[0] = rowsum[1] = 0.0;
234 rowtmp[0] = rowtmp[1] = 0.0;
235 for (step = 0; step < matrix_row; step++) {
236 matval = ap_i[ap_index];
237 vecval[0] = x_i[x_index];
238 vecval[1] = x_i[x_index + 1];
239 {
240 rowtmp[0] = vecval[0] * matval;
241 rowtmp[1] = vecval[1] * matval;
242 }
243 rowsum[0] = rowsum[0] + rowtmp[0];
244 rowsum[1] = rowsum[1] + rowtmp[1];
245 ap_index += (n - step - 1) * incap;
246 x_index += incx;
247 }
248 for (step = matrix_row; step < n; step++) {
249 matval = ap_i[ap_index];
250 vecval[0] = x_i[x_index];
251 vecval[1] = x_i[x_index + 1];
252 {
253 rowtmp[0] = vecval[0] * matval;
254 rowtmp[1] = vecval[1] * matval;
255 }
256 rowsum[0] = rowsum[0] + rowtmp[0];
257 rowsum[1] = rowsum[1] + rowtmp[1];
258 ap_index += incap;
259 x_index += incx;
260 }
261 {
262 tmp1[0] = rowsum[0] * alpha_i[0] - rowsum[1] * alpha_i[1];
263 tmp1[1] = rowsum[0] * alpha_i[1] + rowsum[1] * alpha_i[0];
264 }
265
266 y_i[y_index] = tmp1[0];
267 y_i[y_index + 1] = tmp1[1];
268
269 y_index += incy;
270 ap_start += incap;
271 }
272 }
273 } else {
274 {
275 y_index = y_start;
276 ap_start = 0;
277 for (matrix_row = 0; matrix_row < n; matrix_row++) {
278 x_index = x_start;
279 ap_index = ap_start;
280 rowsum[0] = rowsum[1] = 0.0;
281 rowtmp[0] = rowtmp[1] = 0.0;
282 for (step = 0; step < matrix_row; step++) {
283 matval = ap_i[ap_index];
284 vecval[0] = x_i[x_index];
285 vecval[1] = x_i[x_index + 1];
286 {
287 rowtmp[0] = vecval[0] * matval;
288 rowtmp[1] = vecval[1] * matval;
289 }
290 rowsum[0] = rowsum[0] + rowtmp[0];
291 rowsum[1] = rowsum[1] + rowtmp[1];
292 ap_index += (n - step - 1) * incap;
293 x_index += incx;
294 }
295 for (step = matrix_row; step < n; step++) {
296 matval = ap_i[ap_index];
297 vecval[0] = x_i[x_index];
298 vecval[1] = x_i[x_index + 1];
299 {
300 rowtmp[0] = vecval[0] * matval;
301 rowtmp[1] = vecval[1] * matval;
302 }
303 rowsum[0] = rowsum[0] + rowtmp[0];
304 rowsum[1] = rowsum[1] + rowtmp[1];
305 ap_index += incap;
306 x_index += incx;
307 }
308 resval[0] = y_i[y_index];
309 resval[1] = y_i[y_index + 1];
310 {
311 tmp1[0] = rowsum[0] * alpha_i[0] - rowsum[1] * alpha_i[1];
312 tmp1[1] = rowsum[0] * alpha_i[1] + rowsum[1] * alpha_i[0];
313 }
314
315 {
316 tmp2[0] = beta_i[0] * resval[0] - beta_i[1] * resval[1];
317 tmp2[1] = beta_i[0] * resval[1] + beta_i[1] * resval[0];
318 }
319
320 tmp2[0] = tmp1[0] + tmp2[0];
321 tmp2[1] = tmp1[1] + tmp2[1];
322 y_i[y_index] = tmp2[0];
323 y_i[y_index + 1] = tmp2[1];
324
325 y_index += incy;
326 ap_start += incap;
327 }
328 }
329 }
330 }
331 } else {
332 if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
333 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
334 {
335 y_index = y_start;
336 ap_start = 0;
337 for (matrix_row = 0; matrix_row < n; matrix_row++) {
338 x_index = x_start;
339 ap_index = ap_start;
340 rowsum[0] = rowsum[1] = 0.0;
341 rowtmp[0] = rowtmp[1] = 0.0;
342 for (step = 0; step < matrix_row; step++) {
343 matval = ap_i[ap_index];
344 vecval[0] = x_i[x_index];
345 vecval[1] = x_i[x_index + 1];
346 {
347 rowtmp[0] = vecval[0] * matval;
348 rowtmp[1] = vecval[1] * matval;
349 }
350 rowsum[0] = rowsum[0] + rowtmp[0];
351 rowsum[1] = rowsum[1] + rowtmp[1];
352 ap_index += incap;
353 x_index += incx;
354 }
355 for (step = matrix_row; step < n; step++) {
356 matval = ap_i[ap_index];
357 vecval[0] = x_i[x_index];
358 vecval[1] = x_i[x_index + 1];
359 {
360 rowtmp[0] = vecval[0] * matval;
361 rowtmp[1] = vecval[1] * matval;
362 }
363 rowsum[0] = rowsum[0] + rowtmp[0];
364 rowsum[1] = rowsum[1] + rowtmp[1];
365 ap_index += (step + 1) * incap;
366 x_index += incx;
367 }
368 tmp1[0] = rowsum[0];
369 tmp1[1] = rowsum[1];
370 y_i[y_index] = tmp1[0];
371 y_i[y_index + 1] = tmp1[1];
372
373 y_index += incy;
374 ap_start += (matrix_row + 1) * incap;
375 }
376 }
377 } else {
378 {
379 y_index = y_start;
380 ap_start = 0;
381 for (matrix_row = 0; matrix_row < n; matrix_row++) {
382 x_index = x_start;
383 ap_index = ap_start;
384 rowsum[0] = rowsum[1] = 0.0;
385 rowtmp[0] = rowtmp[1] = 0.0;
386 for (step = 0; step < matrix_row; step++) {
387 matval = ap_i[ap_index];
388 vecval[0] = x_i[x_index];
389 vecval[1] = x_i[x_index + 1];
390 {
391 rowtmp[0] = vecval[0] * matval;
392 rowtmp[1] = vecval[1] * matval;
393 }
394 rowsum[0] = rowsum[0] + rowtmp[0];
395 rowsum[1] = rowsum[1] + rowtmp[1];
396 ap_index += incap;
397 x_index += incx;
398 }
399 for (step = matrix_row; step < n; step++) {
400 matval = ap_i[ap_index];
401 vecval[0] = x_i[x_index];
402 vecval[1] = x_i[x_index + 1];
403 {
404 rowtmp[0] = vecval[0] * matval;
405 rowtmp[1] = vecval[1] * matval;
406 }
407 rowsum[0] = rowsum[0] + rowtmp[0];
408 rowsum[1] = rowsum[1] + rowtmp[1];
409 ap_index += (step + 1) * incap;
410 x_index += incx;
411 }
412 resval[0] = y_i[y_index];
413 resval[1] = y_i[y_index + 1];
414 tmp1[0] = rowsum[0];
415 tmp1[1] = rowsum[1];
416 {
417 tmp2[0] = beta_i[0] * resval[0] - beta_i[1] * resval[1];
418 tmp2[1] = beta_i[0] * resval[1] + beta_i[1] * resval[0];
419 }
420
421 tmp2[0] = tmp1[0] + tmp2[0];
422 tmp2[1] = tmp1[1] + tmp2[1];
423 y_i[y_index] = tmp2[0];
424 y_i[y_index + 1] = tmp2[1];
425
426 y_index += incy;
427 ap_start += (matrix_row + 1) * incap;
428 }
429 }
430 }
431 } else {
432 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
433 {
434 y_index = y_start;
435 ap_start = 0;
436 for (matrix_row = 0; matrix_row < n; matrix_row++) {
437 x_index = x_start;
438 ap_index = ap_start;
439 rowsum[0] = rowsum[1] = 0.0;
440 rowtmp[0] = rowtmp[1] = 0.0;
441 for (step = 0; step < matrix_row; step++) {
442 matval = ap_i[ap_index];
443 vecval[0] = x_i[x_index];
444 vecval[1] = x_i[x_index + 1];
445 {
446 rowtmp[0] = vecval[0] * matval;
447 rowtmp[1] = vecval[1] * matval;
448 }
449 rowsum[0] = rowsum[0] + rowtmp[0];
450 rowsum[1] = rowsum[1] + rowtmp[1];
451 ap_index += incap;
452 x_index += incx;
453 }
454 for (step = matrix_row; step < n; step++) {
455 matval = ap_i[ap_index];
456 vecval[0] = x_i[x_index];
457 vecval[1] = x_i[x_index + 1];
458 {
459 rowtmp[0] = vecval[0] * matval;
460 rowtmp[1] = vecval[1] * matval;
461 }
462 rowsum[0] = rowsum[0] + rowtmp[0];
463 rowsum[1] = rowsum[1] + rowtmp[1];
464 ap_index += (step + 1) * incap;
465 x_index += incx;
466 }
467 {
468 tmp1[0] = rowsum[0] * alpha_i[0] - rowsum[1] * alpha_i[1];
469 tmp1[1] = rowsum[0] * alpha_i[1] + rowsum[1] * alpha_i[0];
470 }
471
472 y_i[y_index] = tmp1[0];
473 y_i[y_index + 1] = tmp1[1];
474
475 y_index += incy;
476 ap_start += (matrix_row + 1) * incap;
477 }
478 }
479 } else {
480 {
481 y_index = y_start;
482 ap_start = 0;
483 for (matrix_row = 0; matrix_row < n; matrix_row++) {
484 x_index = x_start;
485 ap_index = ap_start;
486 rowsum[0] = rowsum[1] = 0.0;
487 rowtmp[0] = rowtmp[1] = 0.0;
488 for (step = 0; step < matrix_row; step++) {
489 matval = ap_i[ap_index];
490 vecval[0] = x_i[x_index];
491 vecval[1] = x_i[x_index + 1];
492 {
493 rowtmp[0] = vecval[0] * matval;
494 rowtmp[1] = vecval[1] * matval;
495 }
496 rowsum[0] = rowsum[0] + rowtmp[0];
497 rowsum[1] = rowsum[1] + rowtmp[1];
498 ap_index += incap;
499 x_index += incx;
500 }
501 for (step = matrix_row; step < n; step++) {
502 matval = ap_i[ap_index];
503 vecval[0] = x_i[x_index];
504 vecval[1] = x_i[x_index + 1];
505 {
506 rowtmp[0] = vecval[0] * matval;
507 rowtmp[1] = vecval[1] * matval;
508 }
509 rowsum[0] = rowsum[0] + rowtmp[0];
510 rowsum[1] = rowsum[1] + rowtmp[1];
511 ap_index += (step + 1) * incap;
512 x_index += incx;
513 }
514 resval[0] = y_i[y_index];
515 resval[1] = y_i[y_index + 1];
516 {
517 tmp1[0] = rowsum[0] * alpha_i[0] - rowsum[1] * alpha_i[1];
518 tmp1[1] = rowsum[0] * alpha_i[1] + rowsum[1] * alpha_i[0];
519 }
520
521 {
522 tmp2[0] = beta_i[0] * resval[0] - beta_i[1] * resval[1];
523 tmp2[1] = beta_i[0] * resval[1] + beta_i[1] * resval[0];
524 }
525
526 tmp2[0] = tmp1[0] + tmp2[0];
527 tmp2[1] = tmp1[1] + tmp2[1];
528 y_i[y_index] = tmp2[0];
529 y_i[y_index + 1] = tmp2[1];
530
531 y_index += incy;
532 ap_start += (matrix_row + 1) * incap;
533 }
534 }
535 }
536 }
537 } /* if order == ... */
538 } /* alpha != 0 */
539
540
541 }
542 }
543