1 #include <blas_extended.h>
2 #include <blas_extended_private.h>
3 #include <blas_fpu.h>
BLAS_zsymv2_z_c(enum blas_order_type order,enum blas_uplo_type uplo,int n,const void * alpha,const void * a,int lda,const void * x_head,const void * x_tail,int incx,const void * beta,void * y,int incy)4 void BLAS_zsymv2_z_c(enum blas_order_type order, enum blas_uplo_type uplo,
5 int n, const void *alpha, const void *a, int lda,
6 const void *x_head, const void *x_tail, int incx,
7 const void *beta, void *y, int incy)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * This routines computes the matrix product:
14 *
15 * y <- alpha * A * (x_head + x_tail) + beta * y
16 *
17 * where A is a symmetric matrix.
18 *
19 * Arguments
20 * =========
21 *
22 * order (input) enum blas_order_type
23 * Storage format of input symmetric matrix A.
24 *
25 * uplo (input) enum blas_uplo_type
26 * Determines which half of matrix A (upper or lower triangle)
27 * is accessed.
28 *
29 * n (input) int
30 * Dimension of A and size of vectors x, y.
31 *
32 * alpha (input) const void*
33 *
34 * a (input) void*
35 * Matrix A.
36 *
37 * lda (input) int
38 * Leading dimension of matrix A.
39 *
40 * x_head (input) void*
41 * Vector x_head
42 *
43 * x_tail (input) void*
44 * Vector x_tail
45 *
46 * incx (input) int
47 * Stride for vector x.
48 *
49 * beta (input) const void*
50 *
51 * y (input) void*
52 * Vector y.
53 *
54 * incy (input) int
55 * Stride for vector y.
56 *
57 */
58 {
59 /* Routine name */
60 const char routine_name[] = "BLAS_zsymv2_z_c";
61
62 int i, j;
63 int xi, yi, xi0, yi0;
64 int aij, ai;
65 int incai;
66 int incaij, incaij2;
67
68 const double *a_i = (double *) a;
69 const float *x_head_i = (float *) x_head;
70 const float *x_tail_i = (float *) x_tail;
71 double *y_i = (double *) y;
72 double *alpha_i = (double *) alpha;
73 double *beta_i = (double *) beta;
74 double a_elem[2];
75 float x_elem[2];
76 double y_elem[2];
77 double prod1[2];
78 double prod2[2];
79 double sum1[2];
80 double sum2[2];
81 double tmp1[2];
82 double tmp2[2];
83 double tmp3[2];
84
85
86
87 /* Test for no-op */
88 if (n <= 0) {
89 return;
90 }
91 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
92 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
93 return;
94 }
95
96 /* Check for error conditions. */
97 if (n < 0) {
98 BLAS_error(routine_name, -3, n, NULL);
99 }
100 if (lda < n) {
101 BLAS_error(routine_name, -6, n, NULL);
102 }
103 if (incx == 0) {
104 BLAS_error(routine_name, -9, incx, NULL);
105 }
106 if (incy == 0) {
107 BLAS_error(routine_name, -12, incy, NULL);
108 }
109
110 if ((order == blas_colmajor && uplo == blas_upper) ||
111 (order == blas_rowmajor && uplo == blas_lower)) {
112 incai = lda;
113 incaij = 1;
114 incaij2 = lda;
115 } else {
116 incai = 1;
117 incaij = lda;
118 incaij2 = 1;
119 }
120
121 incx *= 2;
122 incy *= 2;
123 incai *= 2;
124 incaij *= 2;
125 incaij2 *= 2;
126 xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
127 yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
128
129
130
131 /* The most general form, y <--- alpha * A * (x_head + x_tail) + beta * y */
132 for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
133 sum1[0] = sum1[1] = 0.0;
134 sum2[0] = sum2[1] = 0.0;
135
136 for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
137 a_elem[0] = a_i[aij];
138 a_elem[1] = a_i[aij + 1];
139 x_elem[0] = x_head_i[xi];
140 x_elem[1] = x_head_i[xi + 1];
141 {
142 prod1[0] =
143 (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1];
144 prod1[1] =
145 (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0];
146 }
147 sum1[0] = sum1[0] + prod1[0];
148 sum1[1] = sum1[1] + prod1[1];
149 x_elem[0] = x_tail_i[xi];
150 x_elem[1] = x_tail_i[xi + 1];
151 {
152 prod2[0] =
153 (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1];
154 prod2[1] =
155 (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0];
156 }
157 sum2[0] = sum2[0] + prod2[0];
158 sum2[1] = sum2[1] + prod2[1];
159 }
160 for (; j < n; j++, aij += incaij2, xi += incx) {
161 a_elem[0] = a_i[aij];
162 a_elem[1] = a_i[aij + 1];
163 x_elem[0] = x_head_i[xi];
164 x_elem[1] = x_head_i[xi + 1];
165 {
166 prod1[0] =
167 (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1];
168 prod1[1] =
169 (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0];
170 }
171 sum1[0] = sum1[0] + prod1[0];
172 sum1[1] = sum1[1] + prod1[1];
173 x_elem[0] = x_tail_i[xi];
174 x_elem[1] = x_tail_i[xi + 1];
175 {
176 prod2[0] =
177 (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1];
178 prod2[1] =
179 (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0];
180 }
181 sum2[0] = sum2[0] + prod2[0];
182 sum2[1] = sum2[1] + prod2[1];
183 }
184 sum1[0] = sum1[0] + sum2[0];
185 sum1[1] = sum1[1] + sum2[1];
186 {
187 tmp1[0] = (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
188 tmp1[1] = (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
189 }
190 y_elem[0] = y_i[yi];
191 y_elem[1] = y_i[yi + 1];
192 {
193 tmp2[0] =
194 (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
195 tmp2[1] =
196 (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
197 }
198 tmp3[0] = tmp1[0] + tmp2[0];
199 tmp3[1] = tmp1[1] + tmp2[1];
200 y_i[yi] = tmp3[0];
201 y_i[yi + 1] = tmp3[1];
202 }
203
204
205
206 } /* end BLAS_zsymv2_z_c */
207