1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_zgemm_z_c(enum blas_order_type order,enum blas_trans_type transa,enum blas_trans_type transb,int m,int n,int k,const void * alpha,const void * a,int lda,const void * b,int ldb,const void * beta,void * c,int ldc)3 void BLAS_zgemm_z_c(enum blas_order_type order, enum blas_trans_type transa,
4 enum blas_trans_type transb, int m, int n, int k,
5 const void *alpha, const void *a, int lda, const void *b,
6 int ldb, const void *beta, void *c, int ldc)
7
8 /*
9 * Purpose
10 * =======
11 *
12 * This routine computes the matrix product:
13 *
14 * C <- alpha * op(A) * op(B) + beta * C .
15 *
16 * where op(M) represents either M, M transpose,
17 * or M conjugate transpose.
18 *
19 * Arguments
20 * =========
21 *
22 * order (input) enum blas_order_type
23 * Storage format of input matrices A, B, and C.
24 *
25 * transa (input) enum blas_trans_type
26 * Operation to be done on matrix A before multiplication.
27 * Can be no operation, transposition, or conjugate transposition.
28 *
29 * transb (input) enum blas_trans_type
30 * Operation to be done on matrix B before multiplication.
31 * Can be no operation, transposition, or conjugate transposition.
32 *
33 * m n k (input) int
34 * The dimensions of matrices A, B, and C.
35 * Matrix C is m-by-n matrix.
36 * Matrix A is m-by-k if A is not transposed,
37 * k-by-m otherwise.
38 * Matrix B is k-by-n if B is not transposed,
39 * n-by-k otherwise.
40 *
41 * alpha (input) const void*
42 *
43 * a (input) const void*
44 * matrix A.
45 *
46 * lda (input) int
47 * leading dimension of A.
48 *
49 * b (input) const void*
50 * matrix B
51 *
52 * ldb (input) int
53 * leading dimension of B.
54 *
55 * beta (input) const void*
56 *
57 * c (input/output) void*
58 * matrix C
59 *
60 * ldc (input) int
61 * leading dimension of C.
62 *
63 */
64 {
65 static const char routine_name[] = "BLAS_zgemm_z_c";
66
67
68 /* Integer Index Variables */
69 int i, j, h;
70
71 int ai, bj, ci;
72 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
73
74 int incai, incaih; /* Index increments for matrix a */
75 int incbj, incbhj; /* Index increments for matrix b */
76 int incci, inccij; /* Index increments for matrix c */
77
78 /* Input Matrices */
79 const double *a_i = (double *) a;
80 const float *b_i = (float *) b;
81
82 /* Output Matrix */
83 double *c_i = (double *) c;
84
85 /* Input Scalars */
86 double *alpha_i = (double *) alpha;
87 double *beta_i = (double *) beta;
88
89 /* Temporary Floating-Point Variables */
90 double a_elem[2];
91 float b_elem[2];
92 double c_elem[2];
93 double prod[2];
94 double sum[2];
95 double tmp1[2];
96 double tmp2[2];
97
98
99
100 /* Test for error conditions */
101 if (m < 0)
102 BLAS_error(routine_name, -4, m, NULL);
103 if (n < 0)
104 BLAS_error(routine_name, -5, n, NULL);
105 if (k < 0)
106 BLAS_error(routine_name, -6, k, NULL);
107
108 if (order == blas_colmajor) {
109
110 if (ldc < m)
111 BLAS_error(routine_name, -14, ldc, NULL);
112
113 if (transa == blas_no_trans) {
114 if (lda < m)
115 BLAS_error(routine_name, -9, lda, NULL);
116 } else {
117 if (lda < k)
118 BLAS_error(routine_name, -9, lda, NULL);
119 }
120
121 if (transb == blas_no_trans) {
122 if (ldb < k)
123 BLAS_error(routine_name, -11, ldb, NULL);
124 } else {
125 if (ldb < n)
126 BLAS_error(routine_name, -11, ldb, NULL);
127 }
128
129 } else {
130 /* row major */
131 if (ldc < n)
132 BLAS_error(routine_name, -14, ldc, NULL);
133
134 if (transa == blas_no_trans) {
135 if (lda < k)
136 BLAS_error(routine_name, -9, lda, NULL);
137 } else {
138 if (lda < m)
139 BLAS_error(routine_name, -9, lda, NULL);
140 }
141
142 if (transb == blas_no_trans) {
143 if (ldb < n)
144 BLAS_error(routine_name, -11, ldb, NULL);
145 } else {
146 if (ldb < k)
147 BLAS_error(routine_name, -11, ldb, NULL);
148 }
149 }
150
151 /* Test for no-op */
152 if (n == 0 || m == 0 || k == 0)
153 return;
154 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
155 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
156 return;
157 }
158
159 /* Set Index Parameters */
160 if (order == blas_colmajor) {
161 incci = 1;
162 inccij = ldc;
163
164 if (transa == blas_no_trans) {
165 incai = 1;
166 incaih = lda;
167 } else {
168 incai = lda;
169 incaih = 1;
170 }
171
172 if (transb == blas_no_trans) {
173 incbj = ldb;
174 incbhj = 1;
175 } else {
176 incbj = 1;
177 incbhj = ldb;
178 }
179
180 } else {
181 /* row major */
182 incci = ldc;
183 inccij = 1;
184
185 if (transa == blas_no_trans) {
186 incai = lda;
187 incaih = 1;
188 } else {
189 incai = 1;
190 incaih = lda;
191 }
192
193 if (transb == blas_no_trans) {
194 incbj = 1;
195 incbhj = ldb;
196 } else {
197 incbj = ldb;
198 incbhj = 1;
199 }
200
201 }
202
203
204
205 /* Ajustment to increments */
206 incci *= 2;
207 inccij *= 2;
208 incai *= 2;
209 incaih *= 2;
210 incbj *= 2;
211 incbhj *= 2;
212
213 /* alpha = 0. In this case, just return beta * C */
214 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
215
216 ci = 0;
217 for (i = 0; i < m; i++, ci += incci) {
218 cij = ci;
219 for (j = 0; j < n; j++, cij += inccij) {
220 c_elem[0] = c_i[cij];
221 c_elem[1] = c_i[cij + 1];
222 {
223 tmp1[0] =
224 (double) c_elem[0] * beta_i[0] - (double) c_elem[1] * beta_i[1];
225 tmp1[1] =
226 (double) c_elem[0] * beta_i[1] + (double) c_elem[1] * beta_i[0];
227 }
228 c_i[cij] = tmp1[0];
229 c_i[cij + 1] = tmp1[1];
230 }
231 }
232
233 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
234
235 /* Case alpha == 1. */
236
237 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
238 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
239
240 ci = 0;
241 ai = 0;
242 for (i = 0; i < m; i++, ci += incci, ai += incai) {
243
244 cij = ci;
245 bj = 0;
246
247 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
248
249 aih = ai;
250 bhj = bj;
251
252 sum[0] = sum[1] = 0.0;
253
254 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
255 a_elem[0] = a_i[aih];
256 a_elem[1] = a_i[aih + 1];
257 b_elem[0] = b_i[bhj];
258 b_elem[1] = b_i[bhj + 1];
259 if (transa == blas_conj_trans) {
260 a_elem[1] = -a_elem[1];
261 }
262 if (transb == blas_conj_trans) {
263 b_elem[1] = -b_elem[1];
264 }
265 {
266 prod[0] =
267 (double) a_elem[0] * b_elem[0] -
268 (double) a_elem[1] * b_elem[1];
269 prod[1] =
270 (double) a_elem[0] * b_elem[1] +
271 (double) a_elem[1] * b_elem[0];
272 }
273 sum[0] = sum[0] + prod[0];
274 sum[1] = sum[1] + prod[1];
275 }
276 c_i[cij] = sum[0];
277 c_i[cij + 1] = sum[1];
278 }
279 }
280
281 } else {
282 /* Case alpha == 1, but beta != 0.
283 We compute C <--- A * B + beta * C */
284
285 ci = 0;
286 ai = 0;
287 for (i = 0; i < m; i++, ci += incci, ai += incai) {
288
289 cij = ci;
290 bj = 0;
291
292 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
293
294 aih = ai;
295 bhj = bj;
296
297 sum[0] = sum[1] = 0.0;
298
299 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
300 a_elem[0] = a_i[aih];
301 a_elem[1] = a_i[aih + 1];
302 b_elem[0] = b_i[bhj];
303 b_elem[1] = b_i[bhj + 1];
304 if (transa == blas_conj_trans) {
305 a_elem[1] = -a_elem[1];
306 }
307 if (transb == blas_conj_trans) {
308 b_elem[1] = -b_elem[1];
309 }
310 {
311 prod[0] =
312 (double) a_elem[0] * b_elem[0] -
313 (double) a_elem[1] * b_elem[1];
314 prod[1] =
315 (double) a_elem[0] * b_elem[1] +
316 (double) a_elem[1] * b_elem[0];
317 }
318 sum[0] = sum[0] + prod[0];
319 sum[1] = sum[1] + prod[1];
320 }
321
322 c_elem[0] = c_i[cij];
323 c_elem[1] = c_i[cij + 1];
324 {
325 tmp2[0] =
326 (double) c_elem[0] * beta_i[0] - (double) c_elem[1] * beta_i[1];
327 tmp2[1] =
328 (double) c_elem[0] * beta_i[1] + (double) c_elem[1] * beta_i[0];
329 }
330 tmp1[0] = sum[0];
331 tmp1[1] = sum[1];
332 tmp1[0] = tmp2[0] + tmp1[0];
333 tmp1[1] = tmp2[1] + tmp1[1];
334 c_i[cij] = tmp1[0];
335 c_i[cij + 1] = tmp1[1];
336 }
337 }
338 }
339
340 } else {
341
342 /* The most general form, C <-- alpha * A * B + beta * C */
343 ci = 0;
344 ai = 0;
345 for (i = 0; i < m; i++, ci += incci, ai += incai) {
346
347 cij = ci;
348 bj = 0;
349
350 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
351
352 aih = ai;
353 bhj = bj;
354
355 sum[0] = sum[1] = 0.0;
356
357 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
358 a_elem[0] = a_i[aih];
359 a_elem[1] = a_i[aih + 1];
360 b_elem[0] = b_i[bhj];
361 b_elem[1] = b_i[bhj + 1];
362 if (transa == blas_conj_trans) {
363 a_elem[1] = -a_elem[1];
364 }
365 if (transb == blas_conj_trans) {
366 b_elem[1] = -b_elem[1];
367 }
368 {
369 prod[0] =
370 (double) a_elem[0] * b_elem[0] - (double) a_elem[1] * b_elem[1];
371 prod[1] =
372 (double) a_elem[0] * b_elem[1] + (double) a_elem[1] * b_elem[0];
373 }
374 sum[0] = sum[0] + prod[0];
375 sum[1] = sum[1] + prod[1];
376 }
377
378 {
379 tmp1[0] =
380 (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
381 tmp1[1] =
382 (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
383 }
384 c_elem[0] = c_i[cij];
385 c_elem[1] = c_i[cij + 1];
386 {
387 tmp2[0] =
388 (double) c_elem[0] * beta_i[0] - (double) c_elem[1] * beta_i[1];
389 tmp2[1] =
390 (double) c_elem[0] * beta_i[1] + (double) c_elem[1] * beta_i[0];
391 }
392 tmp1[0] = tmp1[0] + tmp2[0];
393 tmp1[1] = tmp1[1] + tmp2[1];
394 c_i[cij] = tmp1[0];
395 c_i[cij + 1] = tmp1[1];
396 }
397 }
398
399 }
400
401
402
403 }
404