1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_zsymm_d_z(enum blas_order_type order,enum blas_side_type side,enum blas_uplo_type uplo,int m,int n,const void * alpha,const double * a,int lda,const void * b,int ldb,const void * beta,void * c,int ldc)3 void BLAS_zsymm_d_z(enum blas_order_type order, enum blas_side_type side,
4 enum blas_uplo_type uplo, int m, int n,
5 const void *alpha, const double *a, int lda,
6 const void *b, int ldb, const void *beta,
7 void *c, int ldc)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * This routines computes one of the matrix product:
14 *
15 * C <- alpha * A * B + beta * C
16 * C <- alpha * B * A + beta * C
17 *
18 * where A is a symmetric matrix.
19 *
20 * Arguments
21 * =========
22 *
23 * order (input) enum blas_order_type
24 * Storage format of input matrices A, B, and C.
25 *
26 * side (input) enum blas_side_type
27 * Determines which side of matrix B is matrix A is multiplied.
28 *
29 * uplo (input) enum blas_uplo_type
30 * Determines which half of matrix A (upper or lower triangle)
31 * is accessed.
32 *
33 * m n (input) int
34 * Size of matrices A, B, and C.
35 * Matrix A is m-by-m if it is multiplied on the left,
36 * n-by-n otherwise.
37 * Matrices B and C are m-by-n.
38 *
39 * alpha (input) const void*
40 *
41 * a (input) const double*
42 * Matrix A.
43 *
44 * lda (input) int
45 * Leading dimension of matrix A.
46 *
47 * b (input) const void*
48 * Matrix B.
49 *
50 * ldb (input) int
51 * Leading dimension of matrix B.
52 *
53 * beta (input) const void*
54 *
55 * c (input/output) void*
56 * Matrix C.
57 *
58 * ldc (input) int
59 * Leading dimension of matrix C.
60 *
61 */
62 {
63
64 /* Integer Index Variables */
65 int i, j, k;
66
67 int ai, bj, ci;
68 int aik, bkj, cij;
69
70 int incai, incbj, incci;
71 int incaik1, incaik2, incbkj, inccij;
72
73 int m_i, n_i;
74
75 /* Input Matrices */
76 const double *a_i = a;
77 const double *b_i = (double *) b;
78
79 /* Output Matrix */
80 double *c_i = (double *) c;
81
82 /* Input Scalars */
83 double *alpha_i = (double *) alpha;
84 double *beta_i = (double *) beta;
85
86 /* Temporary Floating-Point Variables */
87 double a_elem;
88 double b_elem[2];
89 double c_elem[2];
90 double prod[2];
91 double sum[2];
92 double tmp1[2];
93 double tmp2[2];
94
95
96
97 /* Check for error conditions. */
98 if (m <= 0 || n <= 0) {
99 return;
100 }
101
102 if (order == blas_colmajor && (ldb < m || ldc < m)) {
103 return;
104 }
105 if (order == blas_rowmajor && (ldb < n || ldc < n)) {
106 return;
107 }
108 if (side == blas_left_side && lda < m) {
109 return;
110 }
111 if (side == blas_right_side && lda < n) {
112 return;
113 }
114
115 /* Test for no-op */
116 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
117 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
118 return;
119 }
120
121 /* Set Index Parameters */
122 if (side == blas_left_side) {
123 m_i = m;
124 n_i = n;
125 } else {
126 m_i = n;
127 n_i = m;
128 }
129
130 if ((order == blas_colmajor && side == blas_left_side) ||
131 (order == blas_rowmajor && side == blas_right_side)) {
132 incbj = ldb;
133 incbkj = 1;
134 incci = 1;
135 inccij = ldc;
136 } else {
137 incbj = 1;
138 incbkj = ldb;
139 incci = ldc;
140 inccij = 1;
141 }
142
143 if ((order == blas_colmajor && uplo == blas_upper) ||
144 (order == blas_rowmajor && uplo == blas_lower)) {
145 incai = lda;
146 incaik1 = 1;
147 incaik2 = lda;
148 } else {
149 incai = 1;
150 incaik1 = lda;
151 incaik2 = 1;
152 }
153
154
155
156 /* Adjustment to increments (if any) */
157 incci *= 2;
158 inccij *= 2;
159
160
161
162 incbj *= 2;
163 incbkj *= 2;
164
165 /* alpha = 0. In this case, just return beta * C */
166 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
167 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
168 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
169 c_elem[0] = c_i[cij];
170 c_elem[1] = c_i[cij + 1];
171 {
172 tmp1[0] =
173 (double) c_elem[0] * beta_i[0] - (double) c_elem[1] * beta_i[1];
174 tmp1[1] =
175 (double) c_elem[0] * beta_i[1] + (double) c_elem[1] * beta_i[0];
176 }
177 c_i[cij] = tmp1[0];
178 c_i[cij + 1] = tmp1[1];
179 }
180 }
181 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
182
183 /* Case alpha == 1. */
184
185 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
186 /* Case alpha = 1, beta = 0. We compute C <--- A * B or B * A */
187 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
188 for (j = 0, cij = ci, bj = 0; j < n_i;
189 j++, cij += inccij, bj += incbj) {
190
191 sum[0] = sum[1] = 0.0;
192
193 for (k = 0, aik = ai, bkj = bj; k < i;
194 k++, aik += incaik1, bkj += incbkj) {
195 a_elem = a_i[aik];
196 b_elem[0] = b_i[bkj];
197 b_elem[1] = b_i[bkj + 1];
198 {
199 prod[0] = b_elem[0] * a_elem;
200 prod[1] = b_elem[1] * a_elem;
201 }
202 sum[0] = sum[0] + prod[0];
203 sum[1] = sum[1] + prod[1];
204 }
205 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
206 a_elem = a_i[aik];
207 b_elem[0] = b_i[bkj];
208 b_elem[1] = b_i[bkj + 1];
209 {
210 prod[0] = b_elem[0] * a_elem;
211 prod[1] = b_elem[1] * a_elem;
212 }
213 sum[0] = sum[0] + prod[0];
214 sum[1] = sum[1] + prod[1];
215 }
216 c_i[cij] = sum[0];
217 c_i[cij + 1] = sum[1];
218 }
219 }
220 } else {
221 /* Case alpha = 1, but beta != 0.
222 We compute C <--- A * B + beta * C
223 or C <--- B * A + beta * C */
224
225 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
226 for (j = 0, cij = ci, bj = 0; j < n_i;
227 j++, cij += inccij, bj += incbj) {
228
229 sum[0] = sum[1] = 0.0;
230
231 for (k = 0, aik = ai, bkj = bj; k < i;
232 k++, aik += incaik1, bkj += incbkj) {
233 a_elem = a_i[aik];
234 b_elem[0] = b_i[bkj];
235 b_elem[1] = b_i[bkj + 1];
236 {
237 prod[0] = b_elem[0] * a_elem;
238 prod[1] = b_elem[1] * a_elem;
239 }
240 sum[0] = sum[0] + prod[0];
241 sum[1] = sum[1] + prod[1];
242 }
243 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
244 a_elem = a_i[aik];
245 b_elem[0] = b_i[bkj];
246 b_elem[1] = b_i[bkj + 1];
247 {
248 prod[0] = b_elem[0] * a_elem;
249 prod[1] = b_elem[1] * a_elem;
250 }
251 sum[0] = sum[0] + prod[0];
252 sum[1] = sum[1] + prod[1];
253 }
254 c_elem[0] = c_i[cij];
255 c_elem[1] = c_i[cij + 1];
256 {
257 tmp2[0] =
258 (double) c_elem[0] * beta_i[0] - (double) c_elem[1] * beta_i[1];
259 tmp2[1] =
260 (double) c_elem[0] * beta_i[1] + (double) c_elem[1] * beta_i[0];
261 }
262 tmp1[0] = sum[0];
263 tmp1[1] = sum[1];
264 tmp1[0] = tmp2[0] + tmp1[0];
265 tmp1[1] = tmp2[1] + tmp1[1];
266 c_i[cij] = tmp1[0];
267 c_i[cij + 1] = tmp1[1];
268 }
269 }
270 }
271
272 } else {
273 /* The most general form, C <--- alpha * A * B + beta * C
274 or C <--- alpha * B * A + beta * C */
275
276 for (i = 0, ci = 0, ai = 0; i < m_i; i++, ci += incci, ai += incai) {
277 for (j = 0, cij = ci, bj = 0; j < n_i; j++, cij += inccij, bj += incbj) {
278
279 sum[0] = sum[1] = 0.0;
280
281 for (k = 0, aik = ai, bkj = bj; k < i;
282 k++, aik += incaik1, bkj += incbkj) {
283 a_elem = a_i[aik];
284 b_elem[0] = b_i[bkj];
285 b_elem[1] = b_i[bkj + 1];
286 {
287 prod[0] = b_elem[0] * a_elem;
288 prod[1] = b_elem[1] * a_elem;
289 }
290 sum[0] = sum[0] + prod[0];
291 sum[1] = sum[1] + prod[1];
292 }
293 for (; k < m_i; k++, aik += incaik2, bkj += incbkj) {
294 a_elem = a_i[aik];
295 b_elem[0] = b_i[bkj];
296 b_elem[1] = b_i[bkj + 1];
297 {
298 prod[0] = b_elem[0] * a_elem;
299 prod[1] = b_elem[1] * a_elem;
300 }
301 sum[0] = sum[0] + prod[0];
302 sum[1] = sum[1] + prod[1];
303 }
304 {
305 tmp1[0] =
306 (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
307 tmp1[1] =
308 (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
309 }
310 c_elem[0] = c_i[cij];
311 c_elem[1] = c_i[cij + 1];
312 {
313 tmp2[0] =
314 (double) c_elem[0] * beta_i[0] - (double) c_elem[1] * beta_i[1];
315 tmp2[1] =
316 (double) c_elem[0] * beta_i[1] + (double) c_elem[1] * beta_i[0];
317 }
318 tmp1[0] = tmp1[0] + tmp2[0];
319 tmp1[1] = tmp1[1] + tmp2[1];
320 c_i[cij] = tmp1[0];
321 c_i[cij + 1] = tmp1[1];
322 }
323 }
324 }
325
326
327
328 }
329