1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_cgemm_s_c(enum blas_order_type order,enum blas_trans_type transa,enum blas_trans_type transb,int m,int n,int k,const void * alpha,const float * a,int lda,const void * b,int ldb,const void * beta,void * c,int ldc)3 void BLAS_cgemm_s_c(enum blas_order_type order, enum blas_trans_type transa,
4 enum blas_trans_type transb, int m, int n, int k,
5 const void *alpha, const float *a, int lda, const void *b,
6 int ldb, const void *beta, void *c, int ldc)
7
8 /*
9 * Purpose
10 * =======
11 *
12 * This routine computes the matrix product:
13 *
14 * C <- alpha * op(A) * op(B) + beta * C .
15 *
16 * where op(M) represents either M, M transpose,
17 * or M conjugate transpose.
18 *
19 * Arguments
20 * =========
21 *
22 * order (input) enum blas_order_type
23 * Storage format of input matrices A, B, and C.
24 *
25 * transa (input) enum blas_trans_type
26 * Operation to be done on matrix A before multiplication.
27 * Can be no operation, transposition, or conjugate transposition.
28 *
29 * transb (input) enum blas_trans_type
30 * Operation to be done on matrix B before multiplication.
31 * Can be no operation, transposition, or conjugate transposition.
32 *
33 * m n k (input) int
34 * The dimensions of matrices A, B, and C.
35 * Matrix C is m-by-n matrix.
36 * Matrix A is m-by-k if A is not transposed,
37 * k-by-m otherwise.
38 * Matrix B is k-by-n if B is not transposed,
39 * n-by-k otherwise.
40 *
41 * alpha (input) const void*
42 *
43 * a (input) const float*
44 * matrix A.
45 *
46 * lda (input) int
47 * leading dimension of A.
48 *
49 * b (input) const void*
50 * matrix B
51 *
52 * ldb (input) int
53 * leading dimension of B.
54 *
55 * beta (input) const void*
56 *
57 * c (input/output) void*
58 * matrix C
59 *
60 * ldc (input) int
61 * leading dimension of C.
62 *
63 */
64 {
65 static const char routine_name[] = "BLAS_cgemm_s_c";
66
67
68 /* Integer Index Variables */
69 int i, j, h;
70
71 int ai, bj, ci;
72 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
73
74 int incai, incaih; /* Index increments for matrix a */
75 int incbj, incbhj; /* Index increments for matrix b */
76 int incci, inccij; /* Index increments for matrix c */
77
78 /* Input Matrices */
79 const float *a_i = a;
80 const float *b_i = (float *) b;
81
82 /* Output Matrix */
83 float *c_i = (float *) c;
84
85 /* Input Scalars */
86 float *alpha_i = (float *) alpha;
87 float *beta_i = (float *) beta;
88
89 /* Temporary Floating-Point Variables */
90 float a_elem;
91 float b_elem[2];
92 float c_elem[2];
93 float prod[2];
94 float sum[2];
95 float tmp1[2];
96 float tmp2[2];
97
98
99
100 /* Test for error conditions */
101 if (m < 0)
102 BLAS_error(routine_name, -4, m, NULL);
103 if (n < 0)
104 BLAS_error(routine_name, -5, n, NULL);
105 if (k < 0)
106 BLAS_error(routine_name, -6, k, NULL);
107
108 if (order == blas_colmajor) {
109
110 if (ldc < m)
111 BLAS_error(routine_name, -14, ldc, NULL);
112
113 if (transa == blas_no_trans) {
114 if (lda < m)
115 BLAS_error(routine_name, -9, lda, NULL);
116 } else {
117 if (lda < k)
118 BLAS_error(routine_name, -9, lda, NULL);
119 }
120
121 if (transb == blas_no_trans) {
122 if (ldb < k)
123 BLAS_error(routine_name, -11, ldb, NULL);
124 } else {
125 if (ldb < n)
126 BLAS_error(routine_name, -11, ldb, NULL);
127 }
128
129 } else {
130 /* row major */
131 if (ldc < n)
132 BLAS_error(routine_name, -14, ldc, NULL);
133
134 if (transa == blas_no_trans) {
135 if (lda < k)
136 BLAS_error(routine_name, -9, lda, NULL);
137 } else {
138 if (lda < m)
139 BLAS_error(routine_name, -9, lda, NULL);
140 }
141
142 if (transb == blas_no_trans) {
143 if (ldb < n)
144 BLAS_error(routine_name, -11, ldb, NULL);
145 } else {
146 if (ldb < k)
147 BLAS_error(routine_name, -11, ldb, NULL);
148 }
149 }
150
151 /* Test for no-op */
152 if (n == 0 || m == 0 || k == 0)
153 return;
154 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
155 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
156 return;
157 }
158
159 /* Set Index Parameters */
160 if (order == blas_colmajor) {
161 incci = 1;
162 inccij = ldc;
163
164 if (transa == blas_no_trans) {
165 incai = 1;
166 incaih = lda;
167 } else {
168 incai = lda;
169 incaih = 1;
170 }
171
172 if (transb == blas_no_trans) {
173 incbj = ldb;
174 incbhj = 1;
175 } else {
176 incbj = 1;
177 incbhj = ldb;
178 }
179
180 } else {
181 /* row major */
182 incci = ldc;
183 inccij = 1;
184
185 if (transa == blas_no_trans) {
186 incai = lda;
187 incaih = 1;
188 } else {
189 incai = 1;
190 incaih = lda;
191 }
192
193 if (transb == blas_no_trans) {
194 incbj = 1;
195 incbhj = ldb;
196 } else {
197 incbj = ldb;
198 incbhj = 1;
199 }
200
201 }
202
203
204
205 /* Ajustment to increments */
206 incci *= 2;
207 inccij *= 2;
208
209
210 incbj *= 2;
211 incbhj *= 2;
212
213 /* alpha = 0. In this case, just return beta * C */
214 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
215
216 ci = 0;
217 for (i = 0; i < m; i++, ci += incci) {
218 cij = ci;
219 for (j = 0; j < n; j++, cij += inccij) {
220 c_elem[0] = c_i[cij];
221 c_elem[1] = c_i[cij + 1];
222 {
223 tmp1[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
224 tmp1[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
225 }
226
227 c_i[cij] = tmp1[0];
228 c_i[cij + 1] = tmp1[1];
229 }
230 }
231
232 } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
233
234 /* Case alpha == 1. */
235
236 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
237 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
238
239 ci = 0;
240 ai = 0;
241 for (i = 0; i < m; i++, ci += incci, ai += incai) {
242
243 cij = ci;
244 bj = 0;
245
246 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
247
248 aih = ai;
249 bhj = bj;
250
251 sum[0] = sum[1] = 0.0;
252
253 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
254 a_elem = a_i[aih];
255 b_elem[0] = b_i[bhj];
256 b_elem[1] = b_i[bhj + 1];
257 if (transa == blas_conj_trans) {
258
259 }
260 if (transb == blas_conj_trans) {
261 b_elem[1] = -b_elem[1];
262 }
263 {
264 prod[0] = b_elem[0] * a_elem;
265 prod[1] = b_elem[1] * a_elem;
266 }
267 sum[0] = sum[0] + prod[0];
268 sum[1] = sum[1] + prod[1];
269 }
270 c_i[cij] = sum[0];
271 c_i[cij + 1] = sum[1];
272 }
273 }
274
275 } else {
276 /* Case alpha == 1, but beta != 0.
277 We compute C <--- A * B + beta * C */
278
279 ci = 0;
280 ai = 0;
281 for (i = 0; i < m; i++, ci += incci, ai += incai) {
282
283 cij = ci;
284 bj = 0;
285
286 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
287
288 aih = ai;
289 bhj = bj;
290
291 sum[0] = sum[1] = 0.0;
292
293 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
294 a_elem = a_i[aih];
295 b_elem[0] = b_i[bhj];
296 b_elem[1] = b_i[bhj + 1];
297 if (transa == blas_conj_trans) {
298
299 }
300 if (transb == blas_conj_trans) {
301 b_elem[1] = -b_elem[1];
302 }
303 {
304 prod[0] = b_elem[0] * a_elem;
305 prod[1] = b_elem[1] * a_elem;
306 }
307 sum[0] = sum[0] + prod[0];
308 sum[1] = sum[1] + prod[1];
309 }
310
311 c_elem[0] = c_i[cij];
312 c_elem[1] = c_i[cij + 1];
313 {
314 tmp2[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
315 tmp2[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
316 }
317
318 tmp1[0] = sum[0];
319 tmp1[1] = sum[1];
320 tmp1[0] = tmp2[0] + tmp1[0];
321 tmp1[1] = tmp2[1] + tmp1[1];
322 c_i[cij] = tmp1[0];
323 c_i[cij + 1] = tmp1[1];
324 }
325 }
326 }
327
328 } else {
329
330 /* The most general form, C <-- alpha * A * B + beta * C */
331 ci = 0;
332 ai = 0;
333 for (i = 0; i < m; i++, ci += incci, ai += incai) {
334
335 cij = ci;
336 bj = 0;
337
338 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
339
340 aih = ai;
341 bhj = bj;
342
343 sum[0] = sum[1] = 0.0;
344
345 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
346 a_elem = a_i[aih];
347 b_elem[0] = b_i[bhj];
348 b_elem[1] = b_i[bhj + 1];
349 if (transa == blas_conj_trans) {
350
351 }
352 if (transb == blas_conj_trans) {
353 b_elem[1] = -b_elem[1];
354 }
355 {
356 prod[0] = b_elem[0] * a_elem;
357 prod[1] = b_elem[1] * a_elem;
358 }
359 sum[0] = sum[0] + prod[0];
360 sum[1] = sum[1] + prod[1];
361 }
362
363 {
364 tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
365 tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
366 }
367
368 c_elem[0] = c_i[cij];
369 c_elem[1] = c_i[cij + 1];
370 {
371 tmp2[0] = c_elem[0] * beta_i[0] - c_elem[1] * beta_i[1];
372 tmp2[1] = c_elem[0] * beta_i[1] + c_elem[1] * beta_i[0];
373 }
374
375 tmp1[0] = tmp1[0] + tmp2[0];
376 tmp1[1] = tmp1[1] + tmp2[1];
377 c_i[cij] = tmp1[0];
378 c_i[cij + 1] = tmp1[1];
379 }
380 }
381
382 }
383
384
385
386 }
387