1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "blis1.h"
12
bl1_sscalm(conj1_t conj,int m,int n,float * alpha,float * a,int a_rs,int a_cs)13 void bl1_sscalm( conj1_t conj, int m, int n, float* alpha, float* a, int a_rs, int a_cs )
14 {
15 float alpha_conj;
16 float* a_begin;
17 int lda, inca;
18 int n_iter;
19 int n_elem;
20 int j;
21
22 // Return early if possible.
23 if ( bl1_zero_dim2( m, n ) ) return;
24 if ( bl1_seq1( alpha ) ) return;
25
26 // Handle cases where A is a vector to ensure that the underlying axpy
27 // gets invoked only once.
28 if ( bl1_is_vector( m, n ) )
29 {
30 // Initialize with values appropriate for a vector.
31 n_iter = 1;
32 n_elem = bl1_vector_dim( m, n );
33 lda = 1; // multiplied by zero when n_iter == 1; not needed.
34 inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
35 }
36 else // matrix case
37 {
38 // Initialize with optimal values for column-major storage.
39 n_iter = n;
40 n_elem = m;
41 lda = a_cs;
42 inca = a_rs;
43
44 // An optimization: if A is row-major, then let's access the matrix
45 // by rows instead of by columns to increase spatial locality.
46 if ( bl1_is_row_storage( a_rs, a_cs ) )
47 {
48 bl1_swap_ints( n_iter, n_elem );
49 bl1_swap_ints( lda, inca );
50 }
51 }
52
53 bl1_scopys( conj, alpha, &alpha_conj );
54
55 for ( j = 0; j < n_iter; j++ )
56 {
57 a_begin = a + j*lda;
58
59 bl1_sscal( n_elem,
60 &alpha_conj,
61 a_begin, inca );
62 }
63 }
64
bl1_dscalm(conj1_t conj,int m,int n,double * alpha,double * a,int a_rs,int a_cs)65 void bl1_dscalm( conj1_t conj, int m, int n, double* alpha, double* a, int a_rs, int a_cs )
66 {
67 double alpha_conj;
68 double* a_begin;
69 int lda, inca;
70 int n_iter;
71 int n_elem;
72 int j;
73
74 // Return early if possible.
75 if ( bl1_zero_dim2( m, n ) ) return;
76 if ( bl1_deq1( alpha ) ) return;
77
78 // Handle cases where A is a vector to ensure that the underlying axpy
79 // gets invoked only once.
80 if ( bl1_is_vector( m, n ) )
81 {
82 // Initialize with values appropriate for a vector.
83 n_iter = 1;
84 n_elem = bl1_vector_dim( m, n );
85 lda = 1; // multiplied by zero when n_iter == 1; not needed.
86 inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
87 }
88 else // matrix case
89 {
90 // Initialize with optimal values for column-major storage.
91 n_iter = n;
92 n_elem = m;
93 lda = a_cs;
94 inca = a_rs;
95
96 // An optimization: if A is row-major, then let's access the matrix
97 // by rows instead of by columns to increase spatial locality.
98 if ( bl1_is_row_storage( a_rs, a_cs ) )
99 {
100 bl1_swap_ints( n_iter, n_elem );
101 bl1_swap_ints( lda, inca );
102 }
103 }
104
105 bl1_dcopys( conj, alpha, &alpha_conj );
106
107 for ( j = 0; j < n_iter; j++ )
108 {
109 a_begin = a + j*lda;
110
111 bl1_dscal( n_elem,
112 &alpha_conj,
113 a_begin, inca );
114 }
115 }
116
bl1_csscalm(conj1_t conj,int m,int n,float * alpha,scomplex * a,int a_rs,int a_cs)117 void bl1_csscalm( conj1_t conj, int m, int n, float* alpha, scomplex* a, int a_rs, int a_cs )
118 {
119 float alpha_conj;
120 scomplex* a_begin;
121 int lda, inca;
122 int n_iter;
123 int n_elem;
124 int j;
125
126 // Return early if possible.
127 if ( bl1_zero_dim2( m, n ) ) return;
128 if ( bl1_seq1( alpha ) ) return;
129
130 // Handle cases where A is a vector to ensure that the underlying axpy
131 // gets invoked only once.
132 if ( bl1_is_vector( m, n ) )
133 {
134 // Initialize with values appropriate for a vector.
135 n_iter = 1;
136 n_elem = bl1_vector_dim( m, n );
137 lda = 1; // multiplied by zero when n_iter == 1; not needed.
138 inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
139 }
140 else // matrix case
141 {
142 // Initialize with optimal values for column-major storage.
143 n_iter = n;
144 n_elem = m;
145 lda = a_cs;
146 inca = a_rs;
147
148 // An optimization: if A is row-major, then let's access the matrix
149 // by rows instead of by columns to increase spatial locality.
150 if ( bl1_is_row_storage( a_rs, a_cs ) )
151 {
152 bl1_swap_ints( n_iter, n_elem );
153 bl1_swap_ints( lda, inca );
154 }
155 }
156
157 bl1_scopys( conj, alpha, &alpha_conj );
158
159 for ( j = 0; j < n_iter; j++ )
160 {
161 a_begin = a + j*lda;
162
163 bl1_csscal( n_elem,
164 &alpha_conj,
165 a_begin, inca );
166 }
167 }
168
bl1_cscalm(conj1_t conj,int m,int n,scomplex * alpha,scomplex * a,int a_rs,int a_cs)169 void bl1_cscalm( conj1_t conj, int m, int n, scomplex* alpha, scomplex* a, int a_rs, int a_cs )
170 {
171 scomplex alpha_conj;
172 scomplex* a_begin;
173 int lda, inca;
174 int n_iter;
175 int n_elem;
176 int j;
177
178 // Return early if possible.
179 if ( bl1_zero_dim2( m, n ) ) return;
180 if ( bl1_ceq1( alpha ) ) return;
181
182 // Handle cases where A is a vector to ensure that the underlying axpy
183 // gets invoked only once.
184 if ( bl1_is_vector( m, n ) )
185 {
186 // Initialize with values appropriate for a vector.
187 n_iter = 1;
188 n_elem = bl1_vector_dim( m, n );
189 lda = 1; // multiplied by zero when n_iter == 1; not needed.
190 inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
191 }
192 else // matrix case
193 {
194 // Initialize with optimal values for column-major storage.
195 n_iter = n;
196 n_elem = m;
197 lda = a_cs;
198 inca = a_rs;
199
200 // An optimization: if A is row-major, then let's access the matrix
201 // by rows instead of by columns to increase spatial locality.
202 if ( bl1_is_row_storage( a_rs, a_cs ) )
203 {
204 bl1_swap_ints( n_iter, n_elem );
205 bl1_swap_ints( lda, inca );
206 }
207 }
208
209 bl1_ccopys( conj, alpha, &alpha_conj );
210
211 for ( j = 0; j < n_iter; j++ )
212 {
213 a_begin = a + j*lda;
214
215 bl1_cscal( n_elem,
216 &alpha_conj,
217 a_begin, inca );
218 }
219 }
220
bl1_zdscalm(conj1_t conj,int m,int n,double * alpha,dcomplex * a,int a_rs,int a_cs)221 void bl1_zdscalm( conj1_t conj, int m, int n, double* alpha, dcomplex* a, int a_rs, int a_cs )
222 {
223 double alpha_conj;
224 dcomplex* a_begin;
225 int lda, inca;
226 int n_iter;
227 int n_elem;
228 int j;
229
230 // Return early if possible.
231 if ( bl1_zero_dim2( m, n ) ) return;
232 if ( bl1_deq1( alpha ) ) return;
233
234 // Handle cases where A is a vector to ensure that the underlying axpy
235 // gets invoked only once.
236 if ( bl1_is_vector( m, n ) )
237 {
238 // Initialize with values appropriate for a vector.
239 n_iter = 1;
240 n_elem = bl1_vector_dim( m, n );
241 lda = 1; // multiplied by zero when n_iter == 1; not needed.
242 inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
243 }
244 else // matrix case
245 {
246 // Initialize with optimal values for column-major storage.
247 n_iter = n;
248 n_elem = m;
249 lda = a_cs;
250 inca = a_rs;
251
252 // An optimization: if A is row-major, then let's access the matrix
253 // by rows instead of by columns to increase spatial locality.
254 if ( bl1_is_row_storage( a_rs, a_cs ) )
255 {
256 bl1_swap_ints( n_iter, n_elem );
257 bl1_swap_ints( lda, inca );
258 }
259 }
260
261 bl1_dcopys( conj, alpha, &alpha_conj );
262
263 for ( j = 0; j < n_iter; j++ )
264 {
265 a_begin = a + j*lda;
266
267 bl1_zdscal( n_elem,
268 &alpha_conj,
269 a_begin, inca );
270 }
271 }
272
bl1_zscalm(conj1_t conj,int m,int n,dcomplex * alpha,dcomplex * a,int a_rs,int a_cs)273 void bl1_zscalm( conj1_t conj, int m, int n, dcomplex* alpha, dcomplex* a, int a_rs, int a_cs )
274 {
275 dcomplex alpha_conj;
276 dcomplex* a_begin;
277 int lda, inca;
278 int n_iter;
279 int n_elem;
280 int j;
281
282 // Return early if possible.
283 if ( bl1_zero_dim2( m, n ) ) return;
284 if ( bl1_zeq1( alpha ) ) return;
285
286 // Handle cases where A is a vector to ensure that the underlying axpy
287 // gets invoked only once.
288 if ( bl1_is_vector( m, n ) )
289 {
290 // Initialize with values appropriate for a vector.
291 n_iter = 1;
292 n_elem = bl1_vector_dim( m, n );
293 lda = 1; // multiplied by zero when n_iter == 1; not needed.
294 inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
295 }
296 else // matrix case
297 {
298 // Initialize with optimal values for column-major storage.
299 n_iter = n;
300 n_elem = m;
301 lda = a_cs;
302 inca = a_rs;
303
304 // An optimization: if A is row-major, then let's access the matrix
305 // by rows instead of by columns to increase spatial locality.
306 if ( bl1_is_row_storage( a_rs, a_cs ) )
307 {
308 bl1_swap_ints( n_iter, n_elem );
309 bl1_swap_ints( lda, inca );
310 }
311 }
312
313 bl1_zcopys( conj, alpha, &alpha_conj );
314
315 for ( j = 0; j < n_iter; j++ )
316 {
317 a_begin = a + j*lda;
318
319 bl1_zscal( n_elem,
320 &alpha_conj,
321 a_begin, inca );
322 }
323 }
324
325