1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "blis1.h"
12 
bl1_strmmsx(side1_t side,uplo1_t uplo,trans1_t trans,diag1_t diag,int m,int n,float * alpha,float * a,int a_rs,int a_cs,float * b,int b_rs,int b_cs,float * beta,float * c,int c_rs,int c_cs)13 void bl1_strmmsx( side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float* alpha, float* a, int a_rs, int a_cs, float* b, int b_rs, int b_cs, float* beta, float* c, int c_rs, int c_cs )
14 {
15 	int       m_save    = m;
16 	int       n_save    = n;
17 	float*    a_save    = a;
18 	float*    b_save    = b;
19 	float*    c_save    = c;
20 	int       a_rs_save = a_rs;
21 	int       a_cs_save = a_cs;
22 	int       b_rs_save = b_rs;
23 	int       b_cs_save = b_cs;
24 	int       c_rs_save = c_rs;
25 	int       c_cs_save = c_cs;
26 	float     one = bl1_s1();
27 	float*    b_copy;
28 	int       dim_a;
29 	int       b_copy_rs, b_copy_cs;
30 
31 	// Return early if possible.
32 	if ( bl1_zero_dim2( m, n ) ) return;
33 
34 	// If necessary, allocate, initialize, and use a temporary contiguous
35 	// copy of each matrix rather than the original matrices.
36 	bl1_set_dim_with_side( side, m, n, &dim_a );
37 	bl1_screate_contigmr( uplo,
38 	                      dim_a,
39 	                      dim_a,
40 	                      a_save, a_rs_save, a_cs_save,
41 	                      &a,     &a_rs,     &a_cs );
42 
43 	bl1_screate_contigm( m,
44 	                     n,
45 	                     b_save, b_rs_save, b_cs_save,
46 	                     &b,     &b_rs,     &b_cs );
47 
48 	bl1_screate_contigm( m,
49 	                     n,
50 	                     c_save, c_rs_save, c_cs_save,
51 	                     &c,     &c_rs,     &c_cs );
52 
53 	// Create a copy of B to use in the computation so the original matrix is
54 	// left untouched.
55 	b_copy = bl1_sallocm( m, n );
56 
57 	// Match the strides of B_copy to that of B.
58 	if ( bl1_is_col_storage( b_rs, b_cs ) )
59 	{
60 		b_copy_rs = 1;
61 		b_copy_cs = m;
62 	}
63 	else // if ( bl1_is_row_storage( b_rs, b_cs ) )
64 	{
65 		b_copy_rs = n;
66 		b_copy_cs = 1;
67 	}
68 
69 	// Copy the contents of B to B_copy.
70 	bl1_scopymt( BLIS1_NO_TRANSPOSE,
71 	             m,
72 	             n,
73 	             b,      b_rs,      b_cs,
74 	             b_copy, b_copy_rs, b_copy_cs );
75 
76 	// Perform the operation on B_copy.
77 	bl1_strmm( side,
78 	           uplo,
79 	           trans,
80 	           diag,
81 	           m,
82 	           n,
83 		       alpha,
84 	           a,      a_rs,      a_cs,
85 	           b_copy, b_copy_rs, b_copy_cs );
86 
87 	// Scale C by beta.
88 	bl1_sscalm( BLIS1_NO_CONJUGATE,
89 	            m,
90 	            n,
91 	            beta,
92 	            c, c_rs, c_cs );
93 
94 	// Add B_copy into C.
95 	bl1_saxpymt( BLIS1_NO_TRANSPOSE,
96 	             m,
97 	             n,
98 	             &one,
99 	             b_copy, b_copy_rs, b_copy_cs,
100 	             c,      c_rs,      c_cs );
101 
102 	// Free the copy of B.
103 	bl1_sfree( b_copy );
104 
105 	// Free any temporary contiguous matrices, copying the result back to
106 	// the original matrix.
107 	bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
108 	                   &a,     &a_rs,     &a_cs );
109 
110 	bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
111 	                   &b,     &b_rs,     &b_cs );
112 
113 	bl1_sfree_saved_contigm( m_save,
114 	                         n_save,
115 	                         c_save, c_rs_save, c_cs_save,
116 	                         &c,     &c_rs,     &c_cs );
117 }
118 
bl1_dtrmmsx(side1_t side,uplo1_t uplo,trans1_t trans,diag1_t diag,int m,int n,double * alpha,double * a,int a_rs,int a_cs,double * b,int b_rs,int b_cs,double * beta,double * c,int c_rs,int c_cs)119 void bl1_dtrmmsx( side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double* alpha, double* a, int a_rs, int a_cs, double* b, int b_rs, int b_cs, double* beta, double* c, int c_rs, int c_cs )
120 {
121 	int       m_save    = m;
122 	int       n_save    = n;
123 	double*   a_save    = a;
124 	double*   b_save    = b;
125 	double*   c_save    = c;
126 	int       a_rs_save = a_rs;
127 	int       a_cs_save = a_cs;
128 	int       b_rs_save = b_rs;
129 	int       b_cs_save = b_cs;
130 	int       c_rs_save = c_rs;
131 	int       c_cs_save = c_cs;
132 	double    one = bl1_d1();
133 	double*   b_copy;
134 	int       dim_a;
135 	int       b_copy_rs, b_copy_cs;
136 
137 	// Return early if possible.
138 	if ( bl1_zero_dim2( m, n ) ) return;
139 
140 	// If necessary, allocate, initialize, and use a temporary contiguous
141 	// copy of each matrix rather than the original matrices.
142 	bl1_set_dim_with_side( side, m, n, &dim_a );
143 	bl1_dcreate_contigmr( uplo,
144 	                      dim_a,
145 	                      dim_a,
146 	                      a_save, a_rs_save, a_cs_save,
147 	                      &a,     &a_rs,     &a_cs );
148 
149 	bl1_dcreate_contigm( m,
150 	                     n,
151 	                     b_save, b_rs_save, b_cs_save,
152 	                     &b,     &b_rs,     &b_cs );
153 
154 	bl1_dcreate_contigm( m,
155 	                     n,
156 	                     c_save, c_rs_save, c_cs_save,
157 	                     &c,     &c_rs,     &c_cs );
158 
159 	// Create a copy of B to use in the computation so the original matrix is
160 	// left untouched.
161 	b_copy = bl1_dallocm( m, n );
162 
163 	// Match the strides of B_copy to that of B.
164 	if ( bl1_is_col_storage( b_rs, b_cs ) )
165 	{
166 		b_copy_rs = 1;
167 		b_copy_cs = m;
168 	}
169 	else // if ( bl1_is_row_storage( b_rs, b_cs ) )
170 	{
171 		b_copy_rs = n;
172 		b_copy_cs = 1;
173 	}
174 
175 	// Copy the contents of B to B_copy.
176 	bl1_dcopymt( BLIS1_NO_TRANSPOSE,
177 	             m,
178 	             n,
179 	             b,      b_rs,      b_cs,
180 	             b_copy, b_copy_rs, b_copy_cs );
181 
182 	// Perform the operation on B_copy.
183 	bl1_dtrmm( side,
184 	           uplo,
185 	           trans,
186 	           diag,
187 	           m,
188 	           n,
189 		       alpha,
190 	           a,      a_rs,      a_cs,
191 	           b_copy, b_copy_rs, b_copy_cs );
192 
193 	// Scale C by beta.
194 	bl1_dscalm( BLIS1_NO_CONJUGATE,
195 	            m,
196 	            n,
197 	            beta,
198 	            c, c_rs, c_cs );
199 
200 	// Add B_copy into C.
201 	bl1_daxpymt( BLIS1_NO_TRANSPOSE,
202 	             m,
203 	             n,
204 	             &one,
205 	             b_copy, b_copy_rs, b_copy_cs,
206 	             c,      c_rs,      c_cs );
207 
208 	// Free the copy of B.
209 	bl1_dfree( b_copy );
210 
211 	// Free any temporary contiguous matrices, copying the result back to
212 	// the original matrix.
213 	bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
214 	                   &a,     &a_rs,     &a_cs );
215 
216 	bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
217 	                   &b,     &b_rs,     &b_cs );
218 
219 	bl1_dfree_saved_contigm( m_save,
220 	                         n_save,
221 	                         c_save, c_rs_save, c_cs_save,
222 	                         &c,     &c_rs,     &c_cs );
223 }
224 
bl1_ctrmmsx(side1_t side,uplo1_t uplo,trans1_t trans,diag1_t diag,int m,int n,scomplex * alpha,scomplex * a,int a_rs,int a_cs,scomplex * b,int b_rs,int b_cs,scomplex * beta,scomplex * c,int c_rs,int c_cs)225 void bl1_ctrmmsx( side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex* alpha, scomplex* a, int a_rs, int a_cs, scomplex* b, int b_rs, int b_cs, scomplex* beta, scomplex* c, int c_rs, int c_cs )
226 {
227 	int       m_save    = m;
228 	int       n_save    = n;
229 	scomplex* a_save    = a;
230 	scomplex* b_save    = b;
231 	scomplex* c_save    = c;
232 	int       a_rs_save = a_rs;
233 	int       a_cs_save = a_cs;
234 	int       b_rs_save = b_rs;
235 	int       b_cs_save = b_cs;
236 	int       c_rs_save = c_rs;
237 	int       c_cs_save = c_cs;
238 	scomplex  one = bl1_c1();
239 	scomplex* b_copy;
240 	int       dim_a;
241 	int       b_copy_rs, b_copy_cs;
242 
243 	// Return early if possible.
244 	if ( bl1_zero_dim2( m, n ) ) return;
245 
246 	// If necessary, allocate, initialize, and use a temporary contiguous
247 	// copy of each matrix rather than the original matrices.
248 	bl1_set_dim_with_side( side, m, n, &dim_a );
249 	bl1_ccreate_contigmr( uplo,
250 	                      dim_a,
251 	                      dim_a,
252 	                      a_save, a_rs_save, a_cs_save,
253 	                      &a,     &a_rs,     &a_cs );
254 
255 	bl1_ccreate_contigm( m,
256 	                     n,
257 	                     b_save, b_rs_save, b_cs_save,
258 	                     &b,     &b_rs,     &b_cs );
259 
260 	bl1_ccreate_contigm( m,
261 	                     n,
262 	                     c_save, c_rs_save, c_cs_save,
263 	                     &c,     &c_rs,     &c_cs );
264 
265 	// Create a copy of B to use in the computation so the original matrix is
266 	// left untouched.
267 	b_copy = bl1_callocm( m, n );
268 
269 	// Match the strides of B_copy to that of B.
270 	if ( bl1_is_col_storage( b_rs, b_cs ) )
271 	{
272 		b_copy_rs = 1;
273 		b_copy_cs = m;
274 	}
275 	else // if ( bl1_is_row_storage( b_rs, b_cs ) )
276 	{
277 		b_copy_rs = n;
278 		b_copy_cs = 1;
279 	}
280 
281 	// Copy the contents of B to B_copy.
282 	bl1_ccopymt( BLIS1_NO_TRANSPOSE,
283 	             m,
284 	             n,
285 	             b,      b_rs,      b_cs,
286 	             b_copy, b_copy_rs, b_copy_cs );
287 
288 	// Perform the operation on B_copy.
289 	bl1_ctrmm( side,
290 	           uplo,
291 	           trans,
292 	           diag,
293 	           m,
294 	           n,
295 		       alpha,
296 	           a,      a_rs,      a_cs,
297 	           b_copy, b_copy_rs, b_copy_cs );
298 
299 	// Scale C by beta.
300 	bl1_cscalm( BLIS1_NO_CONJUGATE,
301 	            m,
302 	            n,
303 	            beta,
304 	            c, c_rs, c_cs );
305 
306 	// Add B_copy into C.
307 	bl1_caxpymt( BLIS1_NO_TRANSPOSE,
308 	             m,
309 	             n,
310 	             &one,
311 	             b_copy, b_copy_rs, b_copy_cs,
312 	             c,      c_rs,      c_cs );
313 
314 	// Free the copy of B.
315 	bl1_cfree( b_copy );
316 
317 	// Free any temporary contiguous matrices, copying the result back to
318 	// the original matrix.
319 	bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
320 	                   &a,     &a_rs,     &a_cs );
321 
322 	bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
323 	                   &b,     &b_rs,     &b_cs );
324 
325 	bl1_cfree_saved_contigm( m_save,
326 	                         n_save,
327 	                         c_save, c_rs_save, c_cs_save,
328 	                         &c,     &c_rs,     &c_cs );
329 }
330 
bl1_ztrmmsx(side1_t side,uplo1_t uplo,trans1_t trans,diag1_t diag,int m,int n,dcomplex * alpha,dcomplex * a,int a_rs,int a_cs,dcomplex * b,int b_rs,int b_cs,dcomplex * beta,dcomplex * c,int c_rs,int c_cs)331 void bl1_ztrmmsx( side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex* alpha, dcomplex* a, int a_rs, int a_cs, dcomplex* b, int b_rs, int b_cs, dcomplex* beta, dcomplex* c, int c_rs, int c_cs )
332 {
333 	int       m_save    = m;
334 	int       n_save    = n;
335 	dcomplex* a_save    = a;
336 	dcomplex* b_save    = b;
337 	dcomplex* c_save    = c;
338 	int       a_rs_save = a_rs;
339 	int       a_cs_save = a_cs;
340 	int       b_rs_save = b_rs;
341 	int       b_cs_save = b_cs;
342 	int       c_rs_save = c_rs;
343 	int       c_cs_save = c_cs;
344 	dcomplex  one = bl1_z1();
345 	dcomplex* b_copy;
346 	int       dim_a;
347 	int       b_copy_rs, b_copy_cs;
348 
349 	// Return early if possible.
350 	if ( bl1_zero_dim2( m, n ) ) return;
351 
352 	// If necessary, allocate, initialize, and use a temporary contiguous
353 	// copy of each matrix rather than the original matrices.
354 	bl1_set_dim_with_side( side, m, n, &dim_a );
355 	bl1_zcreate_contigmr( uplo,
356 	                      dim_a,
357 	                      dim_a,
358 	                      a_save, a_rs_save, a_cs_save,
359 	                      &a,     &a_rs,     &a_cs );
360 
361 	bl1_zcreate_contigm( m,
362 	                     n,
363 	                     b_save, b_rs_save, b_cs_save,
364 	                     &b,     &b_rs,     &b_cs );
365 
366 	bl1_zcreate_contigm( m,
367 	                     n,
368 	                     c_save, c_rs_save, c_cs_save,
369 	                     &c,     &c_rs,     &c_cs );
370 
371 	// Create a copy of B to use in the computation so the original matrix is
372 	// left untouched.
373 	b_copy = bl1_zallocm( m, n );
374 
375 	// Match the strides of B_copy to that of B.
376 	if ( bl1_is_col_storage( b_rs, b_cs ) )
377 	{
378 		b_copy_rs = 1;
379 		b_copy_cs = m;
380 	}
381 	else // if ( bl1_is_row_storage( b_rs, b_cs ) )
382 	{
383 		b_copy_rs = n;
384 		b_copy_cs = 1;
385 	}
386 
387 	// Copy the contents of B to B_copy.
388 	bl1_zcopymt( BLIS1_NO_TRANSPOSE,
389 	             m,
390 	             n,
391 	             b,      b_rs,      b_cs,
392 	             b_copy, b_copy_rs, b_copy_cs );
393 
394 	// Perform the operation on B_copy.
395 	bl1_ztrmm( side,
396 	           uplo,
397 	           trans,
398 	           diag,
399 	           m,
400 	           n,
401 		       alpha,
402 	           a,      a_rs,      a_cs,
403 	           b_copy, b_copy_rs, b_copy_cs );
404 
405 	// Scale C by beta.
406 	bl1_zscalm( BLIS1_NO_CONJUGATE,
407 	            m,
408 	            n,
409 	            beta,
410 	            c, c_rs, c_cs );
411 
412 	// Add B_copy into C.
413 	bl1_zaxpymt( BLIS1_NO_TRANSPOSE,
414 	             m,
415 	             n,
416 	             &one,
417 	             b_copy, b_copy_rs, b_copy_cs,
418 	             c,      c_rs,      c_cs );
419 
420 	// Free the copy of B.
421 	bl1_zfree( b_copy );
422 
423 	// Free any temporary contiguous matrices, copying the result back to
424 	// the original matrix.
425 	bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
426 	                   &a,     &a_rs,     &a_cs );
427 
428 	bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
429 	                   &b,     &b_rs,     &b_cs );
430 
431 	bl1_zfree_saved_contigm( m_save,
432 	                         n_save,
433 	                         c_save, c_rs_save, c_cs_save,
434 	                         &c,     &c_rs,     &c_cs );
435 }
436 
437