1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_dgemm_s_s(enum blas_order_type order,enum blas_trans_type transa,enum blas_trans_type transb,int m,int n,int k,double alpha,const float * a,int lda,const float * b,int ldb,double beta,double * c,int ldc)3 void BLAS_dgemm_s_s(enum blas_order_type order, enum blas_trans_type transa,
4 enum blas_trans_type transb, int m, int n, int k,
5 double alpha, const float *a, int lda, const float *b,
6 int ldb, double beta, double *c, int ldc)
7
8 /*
9 * Purpose
10 * =======
11 *
12 * This routine computes the matrix product:
13 *
14 * C <- alpha * op(A) * op(B) + beta * C .
15 *
16 * where op(M) represents either M, M transpose,
17 * or M conjugate transpose.
18 *
19 * Arguments
20 * =========
21 *
22 * order (input) enum blas_order_type
23 * Storage format of input matrices A, B, and C.
24 *
25 * transa (input) enum blas_trans_type
26 * Operation to be done on matrix A before multiplication.
27 * Can be no operation, transposition, or conjugate transposition.
28 *
29 * transb (input) enum blas_trans_type
30 * Operation to be done on matrix B before multiplication.
31 * Can be no operation, transposition, or conjugate transposition.
32 *
33 * m n k (input) int
34 * The dimensions of matrices A, B, and C.
35 * Matrix C is m-by-n matrix.
36 * Matrix A is m-by-k if A is not transposed,
37 * k-by-m otherwise.
38 * Matrix B is k-by-n if B is not transposed,
39 * n-by-k otherwise.
40 *
41 * alpha (input) double
42 *
43 * a (input) const float*
44 * matrix A.
45 *
46 * lda (input) int
47 * leading dimension of A.
48 *
49 * b (input) const float*
50 * matrix B
51 *
52 * ldb (input) int
53 * leading dimension of B.
54 *
55 * beta (input) double
56 *
57 * c (input/output) double*
58 * matrix C
59 *
60 * ldc (input) int
61 * leading dimension of C.
62 *
63 */
64 {
65 static const char routine_name[] = "BLAS_dgemm_s_s";
66
67
68 /* Integer Index Variables */
69 int i, j, h;
70
71 int ai, bj, ci;
72 int aih, bhj, cij; /* Index into matrices a, b, c during multiply */
73
74 int incai, incaih; /* Index increments for matrix a */
75 int incbj, incbhj; /* Index increments for matrix b */
76 int incci, inccij; /* Index increments for matrix c */
77
78 /* Input Matrices */
79 const float *a_i = a;
80 const float *b_i = b;
81
82 /* Output Matrix */
83 double *c_i = c;
84
85 /* Input Scalars */
86 double alpha_i = alpha;
87 double beta_i = beta;
88
89 /* Temporary Floating-Point Variables */
90 float a_elem;
91 float b_elem;
92 double c_elem;
93 double prod;
94 double sum;
95 double tmp1;
96 double tmp2;
97
98
99
100 /* Test for error conditions */
101 if (m < 0)
102 BLAS_error(routine_name, -4, m, NULL);
103 if (n < 0)
104 BLAS_error(routine_name, -5, n, NULL);
105 if (k < 0)
106 BLAS_error(routine_name, -6, k, NULL);
107
108 if (order == blas_colmajor) {
109
110 if (ldc < m)
111 BLAS_error(routine_name, -14, ldc, NULL);
112
113 if (transa == blas_no_trans) {
114 if (lda < m)
115 BLAS_error(routine_name, -9, lda, NULL);
116 } else {
117 if (lda < k)
118 BLAS_error(routine_name, -9, lda, NULL);
119 }
120
121 if (transb == blas_no_trans) {
122 if (ldb < k)
123 BLAS_error(routine_name, -11, ldb, NULL);
124 } else {
125 if (ldb < n)
126 BLAS_error(routine_name, -11, ldb, NULL);
127 }
128
129 } else {
130 /* row major */
131 if (ldc < n)
132 BLAS_error(routine_name, -14, ldc, NULL);
133
134 if (transa == blas_no_trans) {
135 if (lda < k)
136 BLAS_error(routine_name, -9, lda, NULL);
137 } else {
138 if (lda < m)
139 BLAS_error(routine_name, -9, lda, NULL);
140 }
141
142 if (transb == blas_no_trans) {
143 if (ldb < n)
144 BLAS_error(routine_name, -11, ldb, NULL);
145 } else {
146 if (ldb < k)
147 BLAS_error(routine_name, -11, ldb, NULL);
148 }
149 }
150
151 /* Test for no-op */
152 if (n == 0 || m == 0 || k == 0)
153 return;
154 if (alpha_i == 0.0 && beta_i == 1.0) {
155 return;
156 }
157
158 /* Set Index Parameters */
159 if (order == blas_colmajor) {
160 incci = 1;
161 inccij = ldc;
162
163 if (transa == blas_no_trans) {
164 incai = 1;
165 incaih = lda;
166 } else {
167 incai = lda;
168 incaih = 1;
169 }
170
171 if (transb == blas_no_trans) {
172 incbj = ldb;
173 incbhj = 1;
174 } else {
175 incbj = 1;
176 incbhj = ldb;
177 }
178
179 } else {
180 /* row major */
181 incci = ldc;
182 inccij = 1;
183
184 if (transa == blas_no_trans) {
185 incai = lda;
186 incaih = 1;
187 } else {
188 incai = 1;
189 incaih = lda;
190 }
191
192 if (transb == blas_no_trans) {
193 incbj = 1;
194 incbhj = ldb;
195 } else {
196 incbj = ldb;
197 incbhj = 1;
198 }
199
200 }
201
202
203
204 /* Ajustment to increments */
205
206
207
208
209
210
211
212 /* alpha = 0. In this case, just return beta * C */
213 if (alpha_i == 0.0) {
214
215 ci = 0;
216 for (i = 0; i < m; i++, ci += incci) {
217 cij = ci;
218 for (j = 0; j < n; j++, cij += inccij) {
219 c_elem = c_i[cij];
220 tmp1 = c_elem * beta_i;
221 c_i[cij] = tmp1;
222 }
223 }
224
225 } else if (alpha_i == 1.0) {
226
227 /* Case alpha == 1. */
228
229 if (beta_i == 0.0) {
230 /* Case alpha == 1, beta == 0. We compute C <--- A * B */
231
232 ci = 0;
233 ai = 0;
234 for (i = 0; i < m; i++, ci += incci, ai += incai) {
235
236 cij = ci;
237 bj = 0;
238
239 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
240
241 aih = ai;
242 bhj = bj;
243
244 sum = 0.0;
245
246 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
247 a_elem = a_i[aih];
248 b_elem = b_i[bhj];
249 if (transa == blas_conj_trans) {
250
251 }
252 if (transb == blas_conj_trans) {
253
254 }
255 prod = (double) a_elem *b_elem;
256 sum = sum + prod;
257 }
258 c_i[cij] = sum;
259 }
260 }
261
262 } else {
263 /* Case alpha == 1, but beta != 0.
264 We compute C <--- A * B + beta * C */
265
266 ci = 0;
267 ai = 0;
268 for (i = 0; i < m; i++, ci += incci, ai += incai) {
269
270 cij = ci;
271 bj = 0;
272
273 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
274
275 aih = ai;
276 bhj = bj;
277
278 sum = 0.0;
279
280 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
281 a_elem = a_i[aih];
282 b_elem = b_i[bhj];
283 if (transa == blas_conj_trans) {
284
285 }
286 if (transb == blas_conj_trans) {
287
288 }
289 prod = (double) a_elem *b_elem;
290 sum = sum + prod;
291 }
292
293 c_elem = c_i[cij];
294 tmp2 = c_elem * beta_i;
295 tmp1 = sum;
296 tmp1 = tmp2 + tmp1;
297 c_i[cij] = tmp1;
298 }
299 }
300 }
301
302 } else {
303
304 /* The most general form, C <-- alpha * A * B + beta * C */
305 ci = 0;
306 ai = 0;
307 for (i = 0; i < m; i++, ci += incci, ai += incai) {
308
309 cij = ci;
310 bj = 0;
311
312 for (j = 0; j < n; j++, cij += inccij, bj += incbj) {
313
314 aih = ai;
315 bhj = bj;
316
317 sum = 0.0;
318
319 for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
320 a_elem = a_i[aih];
321 b_elem = b_i[bhj];
322 if (transa == blas_conj_trans) {
323
324 }
325 if (transb == blas_conj_trans) {
326
327 }
328 prod = (double) a_elem *b_elem;
329 sum = sum + prod;
330 }
331
332 tmp1 = sum * alpha_i;
333 c_elem = c_i[cij];
334 tmp2 = c_elem * beta_i;
335 tmp1 = tmp1 + tmp2;
336 c_i[cij] = tmp1;
337 }
338 }
339
340 }
341
342
343
344 }
345