1 #include <blas_extended.h>
2 #include <blas_extended_private.h>
3 #include <blas_fpu.h>
BLAS_zsymv2_z_c(enum blas_order_type order,enum blas_uplo_type uplo,int n,const void * alpha,const void * a,int lda,const void * x_head,const void * x_tail,int incx,const void * beta,void * y,int incy)4 void BLAS_zsymv2_z_c(enum blas_order_type order, enum blas_uplo_type uplo,
5 		     int n, const void *alpha, const void *a, int lda,
6 		     const void *x_head, const void *x_tail, int incx,
7 		     const void *beta, void *y, int incy)
8 
9 /*
10  * Purpose
11  * =======
12  *
13  * This routines computes the matrix product:
14  *
15  *     y  <-  alpha * A * (x_head + x_tail) + beta * y
16  *
17  * where A is a symmetric matrix.
18  *
19  * Arguments
20  * =========
21  *
22  * order   (input) enum blas_order_type
23  *         Storage format of input symmetric matrix A.
24  *
25  * uplo    (input) enum blas_uplo_type
26  *         Determines which half of matrix A (upper or lower triangle)
27  *         is accessed.
28  *
29  * n       (input) int
30  *         Dimension of A and size of vectors x, y.
31  *
32  * alpha   (input) const void*
33  *
34  * a       (input) void*
35  *         Matrix A.
36  *
37  * lda     (input) int
38  *         Leading dimension of matrix A.
39  *
40  * x_head  (input) void*
41  *         Vector x_head
42  *
43  * x_tail  (input) void*
44  *         Vector x_tail
45  *
46  * incx    (input) int
47  *         Stride for vector x.
48  *
49  * beta    (input) const void*
50  *
51  * y       (input) void*
52  *         Vector y.
53  *
54  * incy    (input) int
55  *         Stride for vector y.
56  *
57  */
58 {
59   /* Routine name */
60   const char routine_name[] = "BLAS_zsymv2_z_c";
61 
62   int i, j;
63   int xi, yi, xi0, yi0;
64   int aij, ai;
65   int incai;
66   int incaij, incaij2;
67 
68   const double *a_i = (double *) a;
69   const float *x_head_i = (float *) x_head;
70   const float *x_tail_i = (float *) x_tail;
71   double *y_i = (double *) y;
72   double *alpha_i = (double *) alpha;
73   double *beta_i = (double *) beta;
74   double a_elem[2];
75   float x_elem[2];
76   double y_elem[2];
77   double prod1[2];
78   double prod2[2];
79   double sum1[2];
80   double sum2[2];
81   double tmp1[2];
82   double tmp2[2];
83   double tmp3[2];
84 
85 
86 
87   /* Test for no-op */
88   if (n <= 0) {
89     return;
90   }
91   if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
92       && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
93     return;
94   }
95 
96   /* Check for error conditions. */
97   if (n < 0) {
98     BLAS_error(routine_name, -3, n, NULL);
99   }
100   if (lda < n) {
101     BLAS_error(routine_name, -6, n, NULL);
102   }
103   if (incx == 0) {
104     BLAS_error(routine_name, -9, incx, NULL);
105   }
106   if (incy == 0) {
107     BLAS_error(routine_name, -12, incy, NULL);
108   }
109 
110   if ((order == blas_colmajor && uplo == blas_upper) ||
111       (order == blas_rowmajor && uplo == blas_lower)) {
112     incai = lda;
113     incaij = 1;
114     incaij2 = lda;
115   } else {
116     incai = 1;
117     incaij = lda;
118     incaij2 = 1;
119   }
120 
121   incx *= 2;
122   incy *= 2;
123   incai *= 2;
124   incaij *= 2;
125   incaij2 *= 2;
126   xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
127   yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
128 
129 
130 
131   /* The most general form,   y <--- alpha * A * (x_head + x_tail) + beta * y   */
132   for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
133     sum1[0] = sum1[1] = 0.0;
134     sum2[0] = sum2[1] = 0.0;
135 
136     for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
137       a_elem[0] = a_i[aij];
138       a_elem[1] = a_i[aij + 1];
139       x_elem[0] = x_head_i[xi];
140       x_elem[1] = x_head_i[xi + 1];
141       {
142 	prod1[0] =
143 	  (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1];
144 	prod1[1] =
145 	  (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0];
146       }
147       sum1[0] = sum1[0] + prod1[0];
148       sum1[1] = sum1[1] + prod1[1];
149       x_elem[0] = x_tail_i[xi];
150       x_elem[1] = x_tail_i[xi + 1];
151       {
152 	prod2[0] =
153 	  (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1];
154 	prod2[1] =
155 	  (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0];
156       }
157       sum2[0] = sum2[0] + prod2[0];
158       sum2[1] = sum2[1] + prod2[1];
159     }
160     for (; j < n; j++, aij += incaij2, xi += incx) {
161       a_elem[0] = a_i[aij];
162       a_elem[1] = a_i[aij + 1];
163       x_elem[0] = x_head_i[xi];
164       x_elem[1] = x_head_i[xi + 1];
165       {
166 	prod1[0] =
167 	  (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1];
168 	prod1[1] =
169 	  (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0];
170       }
171       sum1[0] = sum1[0] + prod1[0];
172       sum1[1] = sum1[1] + prod1[1];
173       x_elem[0] = x_tail_i[xi];
174       x_elem[1] = x_tail_i[xi + 1];
175       {
176 	prod2[0] =
177 	  (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1];
178 	prod2[1] =
179 	  (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0];
180       }
181       sum2[0] = sum2[0] + prod2[0];
182       sum2[1] = sum2[1] + prod2[1];
183     }
184     sum1[0] = sum1[0] + sum2[0];
185     sum1[1] = sum1[1] + sum2[1];
186     {
187       tmp1[0] = (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
188       tmp1[1] = (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
189     }
190     y_elem[0] = y_i[yi];
191     y_elem[1] = y_i[yi + 1];
192     {
193       tmp2[0] =
194 	(double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
195       tmp2[1] =
196 	(double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
197     }
198     tmp3[0] = tmp1[0] + tmp2[0];
199     tmp3[1] = tmp1[1] + tmp2[1];
200     y_i[yi] = tmp3[0];
201     y_i[yi + 1] = tmp3[1];
202   }
203 
204 
205 
206 }				/* end BLAS_zsymv2_z_c */
207