1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "blis1.h"
12 
bl1_sswapmt(trans1_t trans,int m,int n,float * a,int a_rs,int a_cs,float * b,int b_rs,int b_cs)13 void bl1_sswapmt( trans1_t trans, int m, int n, float* a, int a_rs, int a_cs, float* b, int b_rs, int b_cs )
14 {
15 	float*    a_begin;
16 	float*    b_begin;
17 	int       lda, inca;
18 	int       ldb, incb;
19 	int       n_iter;
20 	int       n_elem;
21 	int       j;
22 
23 	// Return early if possible.
24 	if ( bl1_zero_dim2( m, n ) ) return;
25 
26 	// Handle cases where A and B are vectors to ensure that the underlying copy
27 	// gets invoked only once.
28 	if ( bl1_is_vector( m, n ) )
29 	{
30 		// Initialize with values appropriate for vectors.
31 		n_iter = 1;
32 		n_elem = bl1_vector_dim( m, n );
33 		lda    = 1; // multiplied by zero when n_iter == 1; not needed.
34 		inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
35 		ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
36 		incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
37 	}
38 	else // matrix case
39 	{
40 		// Initialize with optimal values for column-major storage.
41 		n_iter = n;
42 		n_elem = m;
43 		lda    = a_cs;
44 		inca   = a_rs;
45 		ldb    = b_cs;
46 		incb   = b_rs;
47 
48 		// Handle the transposition of A.
49 		if ( bl1_does_trans( trans ) )
50 		{
51 			bl1_swap_ints( lda, inca );
52 		}
53 
54 		// An optimization: if B is row-major and if A is effectively row-major
55 		// after a possible transposition, then let's access the matrix by rows
56 		// instead of by columns for increased spatial locality.
57 		if ( bl1_is_row_storage( b_rs, b_cs ) )
58 		{
59 			if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
60 			     ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
61 			{
62 				bl1_swap_ints( n_iter, n_elem );
63 				bl1_swap_ints( lda, inca );
64 				bl1_swap_ints( ldb, incb );
65 			}
66 		}
67 	}
68 
69 	for ( j = 0; j < n_iter; j++ )
70 	{
71 		a_begin = a + j*lda;
72 		b_begin = b + j*ldb;
73 
74 		bl1_sswap( n_elem,
75 		           a_begin, inca,
76 		           b_begin, incb );
77 	}
78 }
79 
bl1_dswapmt(trans1_t trans,int m,int n,double * a,int a_rs,int a_cs,double * b,int b_rs,int b_cs)80 void bl1_dswapmt( trans1_t trans, int m, int n, double* a, int a_rs, int a_cs, double* b, int b_rs, int b_cs )
81 {
82 	double*   a_begin;
83 	double*   b_begin;
84 	int       lda, inca;
85 	int       ldb, incb;
86 	int       n_iter;
87 	int       n_elem;
88 	int       j;
89 
90 	// Return early if possible.
91 	if ( bl1_zero_dim2( m, n ) ) return;
92 
93 	// Handle cases where A and B are vectors to ensure that the underlying copy
94 	// gets invoked only once.
95 	if ( bl1_is_vector( m, n ) )
96 	{
97 		// Initialize with values appropriate for vectors.
98 		n_iter = 1;
99 		n_elem = bl1_vector_dim( m, n );
100 		lda    = 1; // multiplied by zero when n_iter == 1; not needed.
101 		inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
102 		ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
103 		incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
104 	}
105 	else // matrix case
106 	{
107 		// Initialize with optimal values for column-major storage.
108 		n_iter = n;
109 		n_elem = m;
110 		lda    = a_cs;
111 		inca   = a_rs;
112 		ldb    = b_cs;
113 		incb   = b_rs;
114 
115 		// Handle the transposition of A.
116 		if ( bl1_does_trans( trans ) )
117 		{
118 			bl1_swap_ints( lda, inca );
119 		}
120 
121 		// An optimization: if B is row-major and if A is effectively row-major
122 		// after a possible transposition, then let's access the matrix by rows
123 		// instead of by columns for increased spatial locality.
124 		if ( bl1_is_row_storage( b_rs, b_cs ) )
125 		{
126 			if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
127 			     ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
128 			{
129 				bl1_swap_ints( n_iter, n_elem );
130 				bl1_swap_ints( lda, inca );
131 				bl1_swap_ints( ldb, incb );
132 			}
133 		}
134 	}
135 
136 	for ( j = 0; j < n_iter; j++ )
137 	{
138 		a_begin = a + j*lda;
139 		b_begin = b + j*ldb;
140 
141 		bl1_dswap( n_elem,
142 		           a_begin, inca,
143 		           b_begin, incb );
144 	}
145 }
146 
bl1_cswapmt(trans1_t trans,int m,int n,scomplex * a,int a_rs,int a_cs,scomplex * b,int b_rs,int b_cs)147 void bl1_cswapmt( trans1_t trans, int m, int n, scomplex* a, int a_rs, int a_cs, scomplex* b, int b_rs, int b_cs )
148 {
149 	scomplex* a_begin;
150 	scomplex* b_begin;
151 	int       lda, inca;
152 	int       ldb, incb;
153 	int       n_iter;
154 	int       n_elem;
155 	int       j;
156 
157 	// Return early if possible.
158 	if ( bl1_zero_dim2( m, n ) ) return;
159 
160 	// Handle cases where A and B are vectors to ensure that the underlying copy
161 	// gets invoked only once.
162 	if ( bl1_is_vector( m, n ) )
163 	{
164 		// Initialize with values appropriate for vectors.
165 		n_iter = 1;
166 		n_elem = bl1_vector_dim( m, n );
167 		lda    = 1; // multiplied by zero when n_iter == 1; not needed.
168 		inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
169 		ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
170 		incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
171 	}
172 	else // matrix case
173 	{
174 		// Initialize with optimal values for column-major storage.
175 		n_iter = n;
176 		n_elem = m;
177 		lda    = a_cs;
178 		inca   = a_rs;
179 		ldb    = b_cs;
180 		incb   = b_rs;
181 
182 		// Handle the transposition of A.
183 		if ( bl1_does_trans( trans ) )
184 		{
185 			bl1_swap_ints( lda, inca );
186 		}
187 
188 		// An optimization: if B is row-major and if A is effectively row-major
189 		// after a possible transposition, then let's access the matrix by rows
190 		// instead of by columns for increased spatial locality.
191 		if ( bl1_is_row_storage( b_rs, b_cs ) )
192 		{
193 			if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
194 			     ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
195 			{
196 				bl1_swap_ints( n_iter, n_elem );
197 				bl1_swap_ints( lda, inca );
198 				bl1_swap_ints( ldb, incb );
199 			}
200 		}
201 	}
202 
203 	for ( j = 0; j < n_iter; j++ )
204 	{
205 		a_begin = a + j*lda;
206 		b_begin = b + j*ldb;
207 
208 		bl1_cswap( n_elem,
209 		           a_begin, inca,
210 		           b_begin, incb );
211 
212 		if ( bl1_does_conj( trans ) )
213 			bl1_cconjv( n_elem,
214 			            a_begin, inca );
215 
216 		if ( bl1_does_conj( trans ) )
217 			bl1_cconjv( n_elem,
218 			            b_begin, incb );
219 	}
220 }
221 
bl1_zswapmt(trans1_t trans,int m,int n,dcomplex * a,int a_rs,int a_cs,dcomplex * b,int b_rs,int b_cs)222 void bl1_zswapmt( trans1_t trans, int m, int n, dcomplex* a, int a_rs, int a_cs, dcomplex* b, int b_rs, int b_cs )
223 {
224 	dcomplex* a_begin;
225 	dcomplex* b_begin;
226 	int       lda, inca;
227 	int       ldb, incb;
228 	int       n_iter;
229 	int       n_elem;
230 	int       j;
231 
232 	// Return early if possible.
233 	if ( bl1_zero_dim2( m, n ) ) return;
234 
235 	// Handle cases where A and B are vectors to ensure that the underlying copy
236 	// gets invoked only once.
237 	if ( bl1_is_vector( m, n ) )
238 	{
239 		// Initialize with values appropriate for vectors.
240 		n_iter = 1;
241 		n_elem = bl1_vector_dim( m, n );
242 		lda    = 1; // multiplied by zero when n_iter == 1; not needed.
243 		inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
244 		ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
245 		incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
246 	}
247 	else // matrix case
248 	{
249 		// Initialize with optimal values for column-major storage.
250 		n_iter = n;
251 		n_elem = m;
252 		lda    = a_cs;
253 		inca   = a_rs;
254 		ldb    = b_cs;
255 		incb   = b_rs;
256 
257 		// Handle the transposition of A.
258 		if ( bl1_does_trans( trans ) )
259 		{
260 			bl1_swap_ints( lda, inca );
261 		}
262 
263 		// An optimization: if B is row-major and if A is effectively row-major
264 		// after a possible transposition, then let's access the matrix by rows
265 		// instead of by columns for increased spatial locality.
266 		if ( bl1_is_row_storage( b_rs, b_cs ) )
267 		{
268 			if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
269 			     ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
270 			{
271 				bl1_swap_ints( n_iter, n_elem );
272 				bl1_swap_ints( lda, inca );
273 				bl1_swap_ints( ldb, incb );
274 			}
275 		}
276 	}
277 
278 	for ( j = 0; j < n_iter; j++ )
279 	{
280 		a_begin = a + j*lda;
281 		b_begin = b + j*ldb;
282 
283 		bl1_zswap( n_elem,
284 		           a_begin, inca,
285 		           b_begin, incb );
286 
287 		if ( bl1_does_conj( trans ) )
288 			bl1_zconjv( n_elem,
289 			            a_begin, inca );
290 
291 		if ( bl1_does_conj( trans ) )
292 			bl1_zconjv( n_elem,
293 			            b_begin, incb );
294 	}
295 }
296 
297