1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "blis1.h"
12
bl1_sswapmt(trans1_t trans,int m,int n,float * a,int a_rs,int a_cs,float * b,int b_rs,int b_cs)13 void bl1_sswapmt( trans1_t trans, int m, int n, float* a, int a_rs, int a_cs, float* b, int b_rs, int b_cs )
14 {
15 float* a_begin;
16 float* b_begin;
17 int lda, inca;
18 int ldb, incb;
19 int n_iter;
20 int n_elem;
21 int j;
22
23 // Return early if possible.
24 if ( bl1_zero_dim2( m, n ) ) return;
25
26 // Handle cases where A and B are vectors to ensure that the underlying copy
27 // gets invoked only once.
28 if ( bl1_is_vector( m, n ) )
29 {
30 // Initialize with values appropriate for vectors.
31 n_iter = 1;
32 n_elem = bl1_vector_dim( m, n );
33 lda = 1; // multiplied by zero when n_iter == 1; not needed.
34 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
35 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
36 incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
37 }
38 else // matrix case
39 {
40 // Initialize with optimal values for column-major storage.
41 n_iter = n;
42 n_elem = m;
43 lda = a_cs;
44 inca = a_rs;
45 ldb = b_cs;
46 incb = b_rs;
47
48 // Handle the transposition of A.
49 if ( bl1_does_trans( trans ) )
50 {
51 bl1_swap_ints( lda, inca );
52 }
53
54 // An optimization: if B is row-major and if A is effectively row-major
55 // after a possible transposition, then let's access the matrix by rows
56 // instead of by columns for increased spatial locality.
57 if ( bl1_is_row_storage( b_rs, b_cs ) )
58 {
59 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
60 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
61 {
62 bl1_swap_ints( n_iter, n_elem );
63 bl1_swap_ints( lda, inca );
64 bl1_swap_ints( ldb, incb );
65 }
66 }
67 }
68
69 for ( j = 0; j < n_iter; j++ )
70 {
71 a_begin = a + j*lda;
72 b_begin = b + j*ldb;
73
74 bl1_sswap( n_elem,
75 a_begin, inca,
76 b_begin, incb );
77 }
78 }
79
bl1_dswapmt(trans1_t trans,int m,int n,double * a,int a_rs,int a_cs,double * b,int b_rs,int b_cs)80 void bl1_dswapmt( trans1_t trans, int m, int n, double* a, int a_rs, int a_cs, double* b, int b_rs, int b_cs )
81 {
82 double* a_begin;
83 double* b_begin;
84 int lda, inca;
85 int ldb, incb;
86 int n_iter;
87 int n_elem;
88 int j;
89
90 // Return early if possible.
91 if ( bl1_zero_dim2( m, n ) ) return;
92
93 // Handle cases where A and B are vectors to ensure that the underlying copy
94 // gets invoked only once.
95 if ( bl1_is_vector( m, n ) )
96 {
97 // Initialize with values appropriate for vectors.
98 n_iter = 1;
99 n_elem = bl1_vector_dim( m, n );
100 lda = 1; // multiplied by zero when n_iter == 1; not needed.
101 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
102 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
103 incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
104 }
105 else // matrix case
106 {
107 // Initialize with optimal values for column-major storage.
108 n_iter = n;
109 n_elem = m;
110 lda = a_cs;
111 inca = a_rs;
112 ldb = b_cs;
113 incb = b_rs;
114
115 // Handle the transposition of A.
116 if ( bl1_does_trans( trans ) )
117 {
118 bl1_swap_ints( lda, inca );
119 }
120
121 // An optimization: if B is row-major and if A is effectively row-major
122 // after a possible transposition, then let's access the matrix by rows
123 // instead of by columns for increased spatial locality.
124 if ( bl1_is_row_storage( b_rs, b_cs ) )
125 {
126 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
127 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
128 {
129 bl1_swap_ints( n_iter, n_elem );
130 bl1_swap_ints( lda, inca );
131 bl1_swap_ints( ldb, incb );
132 }
133 }
134 }
135
136 for ( j = 0; j < n_iter; j++ )
137 {
138 a_begin = a + j*lda;
139 b_begin = b + j*ldb;
140
141 bl1_dswap( n_elem,
142 a_begin, inca,
143 b_begin, incb );
144 }
145 }
146
bl1_cswapmt(trans1_t trans,int m,int n,scomplex * a,int a_rs,int a_cs,scomplex * b,int b_rs,int b_cs)147 void bl1_cswapmt( trans1_t trans, int m, int n, scomplex* a, int a_rs, int a_cs, scomplex* b, int b_rs, int b_cs )
148 {
149 scomplex* a_begin;
150 scomplex* b_begin;
151 int lda, inca;
152 int ldb, incb;
153 int n_iter;
154 int n_elem;
155 int j;
156
157 // Return early if possible.
158 if ( bl1_zero_dim2( m, n ) ) return;
159
160 // Handle cases where A and B are vectors to ensure that the underlying copy
161 // gets invoked only once.
162 if ( bl1_is_vector( m, n ) )
163 {
164 // Initialize with values appropriate for vectors.
165 n_iter = 1;
166 n_elem = bl1_vector_dim( m, n );
167 lda = 1; // multiplied by zero when n_iter == 1; not needed.
168 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
169 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
170 incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
171 }
172 else // matrix case
173 {
174 // Initialize with optimal values for column-major storage.
175 n_iter = n;
176 n_elem = m;
177 lda = a_cs;
178 inca = a_rs;
179 ldb = b_cs;
180 incb = b_rs;
181
182 // Handle the transposition of A.
183 if ( bl1_does_trans( trans ) )
184 {
185 bl1_swap_ints( lda, inca );
186 }
187
188 // An optimization: if B is row-major and if A is effectively row-major
189 // after a possible transposition, then let's access the matrix by rows
190 // instead of by columns for increased spatial locality.
191 if ( bl1_is_row_storage( b_rs, b_cs ) )
192 {
193 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
194 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
195 {
196 bl1_swap_ints( n_iter, n_elem );
197 bl1_swap_ints( lda, inca );
198 bl1_swap_ints( ldb, incb );
199 }
200 }
201 }
202
203 for ( j = 0; j < n_iter; j++ )
204 {
205 a_begin = a + j*lda;
206 b_begin = b + j*ldb;
207
208 bl1_cswap( n_elem,
209 a_begin, inca,
210 b_begin, incb );
211
212 if ( bl1_does_conj( trans ) )
213 bl1_cconjv( n_elem,
214 a_begin, inca );
215
216 if ( bl1_does_conj( trans ) )
217 bl1_cconjv( n_elem,
218 b_begin, incb );
219 }
220 }
221
bl1_zswapmt(trans1_t trans,int m,int n,dcomplex * a,int a_rs,int a_cs,dcomplex * b,int b_rs,int b_cs)222 void bl1_zswapmt( trans1_t trans, int m, int n, dcomplex* a, int a_rs, int a_cs, dcomplex* b, int b_rs, int b_cs )
223 {
224 dcomplex* a_begin;
225 dcomplex* b_begin;
226 int lda, inca;
227 int ldb, incb;
228 int n_iter;
229 int n_elem;
230 int j;
231
232 // Return early if possible.
233 if ( bl1_zero_dim2( m, n ) ) return;
234
235 // Handle cases where A and B are vectors to ensure that the underlying copy
236 // gets invoked only once.
237 if ( bl1_is_vector( m, n ) )
238 {
239 // Initialize with values appropriate for vectors.
240 n_iter = 1;
241 n_elem = bl1_vector_dim( m, n );
242 lda = 1; // multiplied by zero when n_iter == 1; not needed.
243 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
244 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
245 incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
246 }
247 else // matrix case
248 {
249 // Initialize with optimal values for column-major storage.
250 n_iter = n;
251 n_elem = m;
252 lda = a_cs;
253 inca = a_rs;
254 ldb = b_cs;
255 incb = b_rs;
256
257 // Handle the transposition of A.
258 if ( bl1_does_trans( trans ) )
259 {
260 bl1_swap_ints( lda, inca );
261 }
262
263 // An optimization: if B is row-major and if A is effectively row-major
264 // after a possible transposition, then let's access the matrix by rows
265 // instead of by columns for increased spatial locality.
266 if ( bl1_is_row_storage( b_rs, b_cs ) )
267 {
268 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
269 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
270 {
271 bl1_swap_ints( n_iter, n_elem );
272 bl1_swap_ints( lda, inca );
273 bl1_swap_ints( ldb, incb );
274 }
275 }
276 }
277
278 for ( j = 0; j < n_iter; j++ )
279 {
280 a_begin = a + j*lda;
281 b_begin = b + j*ldb;
282
283 bl1_zswap( n_elem,
284 a_begin, inca,
285 b_begin, incb );
286
287 if ( bl1_does_conj( trans ) )
288 bl1_zconjv( n_elem,
289 a_begin, inca );
290
291 if ( bl1_does_conj( trans ) )
292 bl1_zconjv( n_elem,
293 b_begin, incb );
294 }
295 }
296
297