1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_zsymm_d_z(enum blas_order_type order,enum blas_side_type side,enum blas_uplo_type uplo,int m,int n,const void * alpha,const double * a,int lda,const void * b,int ldb,const void * beta,void * c,int ldc)3 void BLAS_zsymm_d_z(enum blas_order_type order, enum blas_side_type side,
4 		    enum blas_uplo_type uplo, int m, int n,
5 		    const void *alpha, const double *a, int lda,
6 		    const void *b, int ldb, const void *beta,
7 		    void *c, int ldc)
8 
9 /*
10  * Purpose
11  * =======
12  *
13  * This routines computes one of the matrix product:
14  *
15  *     C  <-  alpha * A * B  +  beta * C
16  *     C  <-  alpha * B * A  +  beta * C
17  *
18  * where A is a symmetric matrix.
19  *
20  * Arguments
21  * =========
22  *
23  * order   (input) enum blas_order_type
24  *         Storage format of input matrices A, B, and C.
25  *
26  * side    (input) enum blas_side_type
27  *         Determines which side of matrix B is matrix A is multiplied.
28  *
29  * uplo    (input) enum blas_uplo_type
30  *         Determines which half of matrix A (upper or lower triangle)
31  *         is accessed.
32  *
33  * m n     (input) int
34  *         Size of matrices A, B, and C.
35  *         Matrix A is m-by-m if it is multiplied on the left,
36  *                     n-by-n otherwise.
37  *         Matrices B and C are m-by-n.
38  *
39  * alpha   (input) const void*
40  *
41  * a       (input) const double*
42  *         Matrix A.
43  *
44  * lda     (input) int
45  *         Leading dimension of matrix A.
46  *
47  * b       (input) const void*
48  *         Matrix B.
49  *
50  * ldb     (input) int
51  *         Leading dimension of matrix B.
52  *
53  * beta    (input) const void*
54  *
55  * c       (input/output) void*
56  *         Matrix C.
57  *
58  * ldc     (input) int
59  *         Leading dimension of matrix C.
60  *
61  */
62 {
63 
64   /* Integer Index Variables */
65   int i, j, k;
66 
67   int ai, bj, ci;
68   int aik, bkj, cij;
69 
70   int incai, incbj, incci;
71   int incaik1, incaik2, incbkj, inccij;
72 
73   int m_i, n_i;
74 
75   /* Input Matrices */
76   const double *a_i = a;
77   const double *b_i = (double *) b;
78 
79   /* Output Matrix */
80   double *c_i = (double *) c;
81 
82   /* Input Scalars */
83   double *alpha_i = (double *) alpha;
84   double *beta_i = (double *) beta;
85 
86   /* Temporary Floating-Point Variables */
87   double a_elem;
88   double b_elem[2];
89   double c_elem[2];
90   double prod[2];
91   double sum[2];
92   double tmp1[2];
93   double tmp2[2];
94 
95 
96 
97   /* Check for error conditions. */
98   if (m <= 0 || n <= 0) {
99     return;
100   }
101 
102   if (order == blas_colmajor && (ldb < m || ldc < m)) {
103     return;
104   }
105   if (order == blas_rowmajor && (ldb < n || ldc < n)) {
106     return;
107   }
108   if (side == blas_left_side && lda < m) {
109     return;
110   }
111   if (side == blas_right_side && lda < n) {
112     return;
113   }
114 
115   /* Test for no-op */
116   if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
117       && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
118     return;
119   }
120 
121   /* Set Index Parameters */
122   if (side == blas_left_side) {
123     m_i = m;
124     n_i = n;
125   } else {
126     m_i = n;
127     n_i = m;
128   }
129 
130   if ((order == blas_colmajor && side == blas_left_side) ||
131       (order == blas_rowmajor && side == blas_right_side)) {
132     incbj = ldb;
133     incbkj = 1;
134     incci = 1;
135     inccij = ldc;
136   } else {
137     incbj = 1;
138     incbkj = ldb;
139     incci = ldc;
140     inccij = 1;
141   }
142 
143   if ((order == blas_colmajor && uplo == blas_upper) ||
144       (order == blas_rowmajor && uplo == blas_lower)) {
145     incai = lda;
146     incaik1 = 1;
147     incaik2 = lda;
148   } else {
149     incai = 1;
150     incaik1 = lda;
151     incaik2 = 1;
152   }
153 
154 
155 
156   /* Adjustment to increments (if any) */
157   incci *= 2;
158   inccij *= 2;
159 
160 
161 
162   incbj *= 2;
163   incbkj *= 2;
164 
165   /* alpha = 0.  In this case, just return beta * C */
166   if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
167     for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
168       for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
169 	c_elem[0] = c_i[cij];
170 	c_elem[1] = c_i[cij + 1];
171 	{
172 	  tmp1[0] =
173 	    (double) c_elem[0] * beta_i[0] - (double) c_elem[1] * beta_i[1];
174 	  tmp1[1] =
175 	    (double) c_elem[0] * beta_i[1] + (double) c_elem[1] * beta_i[0];
176 	}
177 	c_i[cij] = tmp1[0];
178 	c_i[cij + 1] = tmp1[1];
179       }
180     }
181   } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
182 
183     /* Case alpha == 1. */
184 
185     if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
186       /* Case alpha = 1, beta = 0.  We compute  C <--- A * B   or  B * A */
187       for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
188 	for (j = 0, cij = ci, bj = 0; j < n_i;
189 	     j++, cij += inccij, bj += incbj) {
190 
191 	  sum[0] = sum[1] = 0.0;
192 
193 	  for (k = 0, aik = ai, bkj = bj; k < i;
194 	       k++, aik += incaik1, bkj += incbkj) {
195 	    a_elem = a_i[aik];
196 	    b_elem[0] = b_i[bkj];
197 	    b_elem[1] = b_i[bkj + 1];
198 	    {
199 	      prod[0] = b_elem[0] * a_elem;
200 	      prod[1] = b_elem[1] * a_elem;
201 	    }
202 	    sum[0] = sum[0] + prod[0];
203 	    sum[1] = sum[1] + prod[1];
204 	  }
205 	  for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
206 	    a_elem = a_i[aik];
207 	    b_elem[0] = b_i[bkj];
208 	    b_elem[1] = b_i[bkj + 1];
209 	    {
210 	      prod[0] = b_elem[0] * a_elem;
211 	      prod[1] = b_elem[1] * a_elem;
212 	    }
213 	    sum[0] = sum[0] + prod[0];
214 	    sum[1] = sum[1] + prod[1];
215 	  }
216 	  c_i[cij] = sum[0];
217 	  c_i[cij + 1] = sum[1];
218 	}
219       }
220     } else {
221       /* Case alpha = 1, but beta != 0.
222          We compute  C  <--- A * B + beta * C
223          or  C  <--- B * A + beta * C  */
224 
225       for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
226 	for (j = 0, cij = ci, bj = 0; j < n_i;
227 	     j++, cij += inccij, bj += incbj) {
228 
229 	  sum[0] = sum[1] = 0.0;
230 
231 	  for (k = 0, aik = ai, bkj = bj; k < i;
232 	       k++, aik += incaik1, bkj += incbkj) {
233 	    a_elem = a_i[aik];
234 	    b_elem[0] = b_i[bkj];
235 	    b_elem[1] = b_i[bkj + 1];
236 	    {
237 	      prod[0] = b_elem[0] * a_elem;
238 	      prod[1] = b_elem[1] * a_elem;
239 	    }
240 	    sum[0] = sum[0] + prod[0];
241 	    sum[1] = sum[1] + prod[1];
242 	  }
243 	  for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
244 	    a_elem = a_i[aik];
245 	    b_elem[0] = b_i[bkj];
246 	    b_elem[1] = b_i[bkj + 1];
247 	    {
248 	      prod[0] = b_elem[0] * a_elem;
249 	      prod[1] = b_elem[1] * a_elem;
250 	    }
251 	    sum[0] = sum[0] + prod[0];
252 	    sum[1] = sum[1] + prod[1];
253 	  }
254 	  c_elem[0] = c_i[cij];
255 	  c_elem[1] = c_i[cij + 1];
256 	  {
257 	    tmp2[0] =
258 	      (double) c_elem[0] * beta_i[0] - (double) c_elem[1] * beta_i[1];
259 	    tmp2[1] =
260 	      (double) c_elem[0] * beta_i[1] + (double) c_elem[1] * beta_i[0];
261 	  }
262 	  tmp1[0] = sum[0];
263 	  tmp1[1] = sum[1];
264 	  tmp1[0] = tmp2[0] + tmp1[0];
265 	  tmp1[1] = tmp2[1] + tmp1[1];
266 	  c_i[cij] = tmp1[0];
267 	  c_i[cij + 1] = tmp1[1];
268 	}
269       }
270     }
271 
272   } else {
273     /* The most general form,   C <--- alpha * A * B + beta * C
274        or   C <--- alpha * B * A + beta * C  */
275 
276     for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
277       for (j = 0, cij = ci, bj = 0; j < n_i; j++, cij += inccij, bj += incbj) {
278 
279 	sum[0] = sum[1] = 0.0;
280 
281 	for (k = 0, aik = ai, bkj = bj; k < i;
282 	     k++, aik += incaik1, bkj += incbkj) {
283 	  a_elem = a_i[aik];
284 	  b_elem[0] = b_i[bkj];
285 	  b_elem[1] = b_i[bkj + 1];
286 	  {
287 	    prod[0] = b_elem[0] * a_elem;
288 	    prod[1] = b_elem[1] * a_elem;
289 	  }
290 	  sum[0] = sum[0] + prod[0];
291 	  sum[1] = sum[1] + prod[1];
292 	}
293 	for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
294 	  a_elem = a_i[aik];
295 	  b_elem[0] = b_i[bkj];
296 	  b_elem[1] = b_i[bkj + 1];
297 	  {
298 	    prod[0] = b_elem[0] * a_elem;
299 	    prod[1] = b_elem[1] * a_elem;
300 	  }
301 	  sum[0] = sum[0] + prod[0];
302 	  sum[1] = sum[1] + prod[1];
303 	}
304 	{
305 	  tmp1[0] =
306 	    (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
307 	  tmp1[1] =
308 	    (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
309 	}
310 	c_elem[0] = c_i[cij];
311 	c_elem[1] = c_i[cij + 1];
312 	{
313 	  tmp2[0] =
314 	    (double) c_elem[0] * beta_i[0] - (double) c_elem[1] * beta_i[1];
315 	  tmp2[1] =
316 	    (double) c_elem[0] * beta_i[1] + (double) c_elem[1] * beta_i[0];
317 	}
318 	tmp1[0] = tmp1[0] + tmp2[0];
319 	tmp1[1] = tmp1[1] + tmp2[1];
320 	c_i[cij] = tmp1[0];
321 	c_i[cij + 1] = tmp1[1];
322       }
323     }
324   }
325 
326 
327 
328 }
329