1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "blis1.h"
12
bl1_strmmsx(side1_t side,uplo1_t uplo,trans1_t trans,diag1_t diag,int m,int n,float * alpha,float * a,int a_rs,int a_cs,float * b,int b_rs,int b_cs,float * beta,float * c,int c_rs,int c_cs)13 void bl1_strmmsx( side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float* alpha, float* a, int a_rs, int a_cs, float* b, int b_rs, int b_cs, float* beta, float* c, int c_rs, int c_cs )
14 {
15 int m_save = m;
16 int n_save = n;
17 float* a_save = a;
18 float* b_save = b;
19 float* c_save = c;
20 int a_rs_save = a_rs;
21 int a_cs_save = a_cs;
22 int b_rs_save = b_rs;
23 int b_cs_save = b_cs;
24 int c_rs_save = c_rs;
25 int c_cs_save = c_cs;
26 float one = bl1_s1();
27 float* b_copy;
28 int dim_a;
29 int b_copy_rs, b_copy_cs;
30
31 // Return early if possible.
32 if ( bl1_zero_dim2( m, n ) ) return;
33
34 // If necessary, allocate, initialize, and use a temporary contiguous
35 // copy of each matrix rather than the original matrices.
36 bl1_set_dim_with_side( side, m, n, &dim_a );
37 bl1_screate_contigmr( uplo,
38 dim_a,
39 dim_a,
40 a_save, a_rs_save, a_cs_save,
41 &a, &a_rs, &a_cs );
42
43 bl1_screate_contigm( m,
44 n,
45 b_save, b_rs_save, b_cs_save,
46 &b, &b_rs, &b_cs );
47
48 bl1_screate_contigm( m,
49 n,
50 c_save, c_rs_save, c_cs_save,
51 &c, &c_rs, &c_cs );
52
53 // Create a copy of B to use in the computation so the original matrix is
54 // left untouched.
55 b_copy = bl1_sallocm( m, n );
56
57 // Match the strides of B_copy to that of B.
58 if ( bl1_is_col_storage( b_rs, b_cs ) )
59 {
60 b_copy_rs = 1;
61 b_copy_cs = m;
62 }
63 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
64 {
65 b_copy_rs = n;
66 b_copy_cs = 1;
67 }
68
69 // Copy the contents of B to B_copy.
70 bl1_scopymt( BLIS1_NO_TRANSPOSE,
71 m,
72 n,
73 b, b_rs, b_cs,
74 b_copy, b_copy_rs, b_copy_cs );
75
76 // Perform the operation on B_copy.
77 bl1_strmm( side,
78 uplo,
79 trans,
80 diag,
81 m,
82 n,
83 alpha,
84 a, a_rs, a_cs,
85 b_copy, b_copy_rs, b_copy_cs );
86
87 // Scale C by beta.
88 bl1_sscalm( BLIS1_NO_CONJUGATE,
89 m,
90 n,
91 beta,
92 c, c_rs, c_cs );
93
94 // Add B_copy into C.
95 bl1_saxpymt( BLIS1_NO_TRANSPOSE,
96 m,
97 n,
98 &one,
99 b_copy, b_copy_rs, b_copy_cs,
100 c, c_rs, c_cs );
101
102 // Free the copy of B.
103 bl1_sfree( b_copy );
104
105 // Free any temporary contiguous matrices, copying the result back to
106 // the original matrix.
107 bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
108 &a, &a_rs, &a_cs );
109
110 bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
111 &b, &b_rs, &b_cs );
112
113 bl1_sfree_saved_contigm( m_save,
114 n_save,
115 c_save, c_rs_save, c_cs_save,
116 &c, &c_rs, &c_cs );
117 }
118
bl1_dtrmmsx(side1_t side,uplo1_t uplo,trans1_t trans,diag1_t diag,int m,int n,double * alpha,double * a,int a_rs,int a_cs,double * b,int b_rs,int b_cs,double * beta,double * c,int c_rs,int c_cs)119 void bl1_dtrmmsx( side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double* alpha, double* a, int a_rs, int a_cs, double* b, int b_rs, int b_cs, double* beta, double* c, int c_rs, int c_cs )
120 {
121 int m_save = m;
122 int n_save = n;
123 double* a_save = a;
124 double* b_save = b;
125 double* c_save = c;
126 int a_rs_save = a_rs;
127 int a_cs_save = a_cs;
128 int b_rs_save = b_rs;
129 int b_cs_save = b_cs;
130 int c_rs_save = c_rs;
131 int c_cs_save = c_cs;
132 double one = bl1_d1();
133 double* b_copy;
134 int dim_a;
135 int b_copy_rs, b_copy_cs;
136
137 // Return early if possible.
138 if ( bl1_zero_dim2( m, n ) ) return;
139
140 // If necessary, allocate, initialize, and use a temporary contiguous
141 // copy of each matrix rather than the original matrices.
142 bl1_set_dim_with_side( side, m, n, &dim_a );
143 bl1_dcreate_contigmr( uplo,
144 dim_a,
145 dim_a,
146 a_save, a_rs_save, a_cs_save,
147 &a, &a_rs, &a_cs );
148
149 bl1_dcreate_contigm( m,
150 n,
151 b_save, b_rs_save, b_cs_save,
152 &b, &b_rs, &b_cs );
153
154 bl1_dcreate_contigm( m,
155 n,
156 c_save, c_rs_save, c_cs_save,
157 &c, &c_rs, &c_cs );
158
159 // Create a copy of B to use in the computation so the original matrix is
160 // left untouched.
161 b_copy = bl1_dallocm( m, n );
162
163 // Match the strides of B_copy to that of B.
164 if ( bl1_is_col_storage( b_rs, b_cs ) )
165 {
166 b_copy_rs = 1;
167 b_copy_cs = m;
168 }
169 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
170 {
171 b_copy_rs = n;
172 b_copy_cs = 1;
173 }
174
175 // Copy the contents of B to B_copy.
176 bl1_dcopymt( BLIS1_NO_TRANSPOSE,
177 m,
178 n,
179 b, b_rs, b_cs,
180 b_copy, b_copy_rs, b_copy_cs );
181
182 // Perform the operation on B_copy.
183 bl1_dtrmm( side,
184 uplo,
185 trans,
186 diag,
187 m,
188 n,
189 alpha,
190 a, a_rs, a_cs,
191 b_copy, b_copy_rs, b_copy_cs );
192
193 // Scale C by beta.
194 bl1_dscalm( BLIS1_NO_CONJUGATE,
195 m,
196 n,
197 beta,
198 c, c_rs, c_cs );
199
200 // Add B_copy into C.
201 bl1_daxpymt( BLIS1_NO_TRANSPOSE,
202 m,
203 n,
204 &one,
205 b_copy, b_copy_rs, b_copy_cs,
206 c, c_rs, c_cs );
207
208 // Free the copy of B.
209 bl1_dfree( b_copy );
210
211 // Free any temporary contiguous matrices, copying the result back to
212 // the original matrix.
213 bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
214 &a, &a_rs, &a_cs );
215
216 bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
217 &b, &b_rs, &b_cs );
218
219 bl1_dfree_saved_contigm( m_save,
220 n_save,
221 c_save, c_rs_save, c_cs_save,
222 &c, &c_rs, &c_cs );
223 }
224
bl1_ctrmmsx(side1_t side,uplo1_t uplo,trans1_t trans,diag1_t diag,int m,int n,scomplex * alpha,scomplex * a,int a_rs,int a_cs,scomplex * b,int b_rs,int b_cs,scomplex * beta,scomplex * c,int c_rs,int c_cs)225 void bl1_ctrmmsx( side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex* alpha, scomplex* a, int a_rs, int a_cs, scomplex* b, int b_rs, int b_cs, scomplex* beta, scomplex* c, int c_rs, int c_cs )
226 {
227 int m_save = m;
228 int n_save = n;
229 scomplex* a_save = a;
230 scomplex* b_save = b;
231 scomplex* c_save = c;
232 int a_rs_save = a_rs;
233 int a_cs_save = a_cs;
234 int b_rs_save = b_rs;
235 int b_cs_save = b_cs;
236 int c_rs_save = c_rs;
237 int c_cs_save = c_cs;
238 scomplex one = bl1_c1();
239 scomplex* b_copy;
240 int dim_a;
241 int b_copy_rs, b_copy_cs;
242
243 // Return early if possible.
244 if ( bl1_zero_dim2( m, n ) ) return;
245
246 // If necessary, allocate, initialize, and use a temporary contiguous
247 // copy of each matrix rather than the original matrices.
248 bl1_set_dim_with_side( side, m, n, &dim_a );
249 bl1_ccreate_contigmr( uplo,
250 dim_a,
251 dim_a,
252 a_save, a_rs_save, a_cs_save,
253 &a, &a_rs, &a_cs );
254
255 bl1_ccreate_contigm( m,
256 n,
257 b_save, b_rs_save, b_cs_save,
258 &b, &b_rs, &b_cs );
259
260 bl1_ccreate_contigm( m,
261 n,
262 c_save, c_rs_save, c_cs_save,
263 &c, &c_rs, &c_cs );
264
265 // Create a copy of B to use in the computation so the original matrix is
266 // left untouched.
267 b_copy = bl1_callocm( m, n );
268
269 // Match the strides of B_copy to that of B.
270 if ( bl1_is_col_storage( b_rs, b_cs ) )
271 {
272 b_copy_rs = 1;
273 b_copy_cs = m;
274 }
275 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
276 {
277 b_copy_rs = n;
278 b_copy_cs = 1;
279 }
280
281 // Copy the contents of B to B_copy.
282 bl1_ccopymt( BLIS1_NO_TRANSPOSE,
283 m,
284 n,
285 b, b_rs, b_cs,
286 b_copy, b_copy_rs, b_copy_cs );
287
288 // Perform the operation on B_copy.
289 bl1_ctrmm( side,
290 uplo,
291 trans,
292 diag,
293 m,
294 n,
295 alpha,
296 a, a_rs, a_cs,
297 b_copy, b_copy_rs, b_copy_cs );
298
299 // Scale C by beta.
300 bl1_cscalm( BLIS1_NO_CONJUGATE,
301 m,
302 n,
303 beta,
304 c, c_rs, c_cs );
305
306 // Add B_copy into C.
307 bl1_caxpymt( BLIS1_NO_TRANSPOSE,
308 m,
309 n,
310 &one,
311 b_copy, b_copy_rs, b_copy_cs,
312 c, c_rs, c_cs );
313
314 // Free the copy of B.
315 bl1_cfree( b_copy );
316
317 // Free any temporary contiguous matrices, copying the result back to
318 // the original matrix.
319 bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
320 &a, &a_rs, &a_cs );
321
322 bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
323 &b, &b_rs, &b_cs );
324
325 bl1_cfree_saved_contigm( m_save,
326 n_save,
327 c_save, c_rs_save, c_cs_save,
328 &c, &c_rs, &c_cs );
329 }
330
bl1_ztrmmsx(side1_t side,uplo1_t uplo,trans1_t trans,diag1_t diag,int m,int n,dcomplex * alpha,dcomplex * a,int a_rs,int a_cs,dcomplex * b,int b_rs,int b_cs,dcomplex * beta,dcomplex * c,int c_rs,int c_cs)331 void bl1_ztrmmsx( side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex* alpha, dcomplex* a, int a_rs, int a_cs, dcomplex* b, int b_rs, int b_cs, dcomplex* beta, dcomplex* c, int c_rs, int c_cs )
332 {
333 int m_save = m;
334 int n_save = n;
335 dcomplex* a_save = a;
336 dcomplex* b_save = b;
337 dcomplex* c_save = c;
338 int a_rs_save = a_rs;
339 int a_cs_save = a_cs;
340 int b_rs_save = b_rs;
341 int b_cs_save = b_cs;
342 int c_rs_save = c_rs;
343 int c_cs_save = c_cs;
344 dcomplex one = bl1_z1();
345 dcomplex* b_copy;
346 int dim_a;
347 int b_copy_rs, b_copy_cs;
348
349 // Return early if possible.
350 if ( bl1_zero_dim2( m, n ) ) return;
351
352 // If necessary, allocate, initialize, and use a temporary contiguous
353 // copy of each matrix rather than the original matrices.
354 bl1_set_dim_with_side( side, m, n, &dim_a );
355 bl1_zcreate_contigmr( uplo,
356 dim_a,
357 dim_a,
358 a_save, a_rs_save, a_cs_save,
359 &a, &a_rs, &a_cs );
360
361 bl1_zcreate_contigm( m,
362 n,
363 b_save, b_rs_save, b_cs_save,
364 &b, &b_rs, &b_cs );
365
366 bl1_zcreate_contigm( m,
367 n,
368 c_save, c_rs_save, c_cs_save,
369 &c, &c_rs, &c_cs );
370
371 // Create a copy of B to use in the computation so the original matrix is
372 // left untouched.
373 b_copy = bl1_zallocm( m, n );
374
375 // Match the strides of B_copy to that of B.
376 if ( bl1_is_col_storage( b_rs, b_cs ) )
377 {
378 b_copy_rs = 1;
379 b_copy_cs = m;
380 }
381 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
382 {
383 b_copy_rs = n;
384 b_copy_cs = 1;
385 }
386
387 // Copy the contents of B to B_copy.
388 bl1_zcopymt( BLIS1_NO_TRANSPOSE,
389 m,
390 n,
391 b, b_rs, b_cs,
392 b_copy, b_copy_rs, b_copy_cs );
393
394 // Perform the operation on B_copy.
395 bl1_ztrmm( side,
396 uplo,
397 trans,
398 diag,
399 m,
400 n,
401 alpha,
402 a, a_rs, a_cs,
403 b_copy, b_copy_rs, b_copy_cs );
404
405 // Scale C by beta.
406 bl1_zscalm( BLIS1_NO_CONJUGATE,
407 m,
408 n,
409 beta,
410 c, c_rs, c_cs );
411
412 // Add B_copy into C.
413 bl1_zaxpymt( BLIS1_NO_TRANSPOSE,
414 m,
415 n,
416 &one,
417 b_copy, b_copy_rs, b_copy_cs,
418 c, c_rs, c_cs );
419
420 // Free the copy of B.
421 bl1_zfree( b_copy );
422
423 // Free any temporary contiguous matrices, copying the result back to
424 // the original matrix.
425 bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
426 &a, &a_rs, &a_cs );
427
428 bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
429 &b, &b_rs, &b_cs );
430
431 bl1_zfree_saved_contigm( m_save,
432 n_save,
433 c_save, c_rs_save, c_cs_save,
434 &c, &c_rs, &c_cs );
435 }
436
437