1 /**************************************************************************************************
2 * *
3 * This file is part of BLASFEO. *
4 * *
5 * BLASFEO -- BLAS for embedded optimization. *
6 * Copyright (C) 2019 by Gianluca Frison. *
7 * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
8 * All rights reserved. *
9 * *
10 * The 2-Clause BSD License *
11 * *
12 * Redistribution and use in source and binary forms, with or without *
13 * modification, are permitted provided that the following conditions are met: *
14 * *
15 * 1. Redistributions of source code must retain the above copyright notice, this *
16 * list of conditions and the following disclaimer. *
17 * 2. Redistributions in binary form must reproduce the above copyright notice, *
18 * this list of conditions and the following disclaimer in the documentation *
19 * and/or other materials provided with the distribution. *
20 * *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
31 * *
32 * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
33 * *
34 **************************************************************************************************/
35
36
37
38 #if defined(LA_REFERENCE) | defined(TESTING_MODE)
39
40
41
AXPY_LIBSTR(int m,REAL alpha,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)42 void AXPY_LIBSTR(int m, REAL alpha, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
43 {
44 if(m<=0)
45 return;
46 int ii;
47 REAL *x = sx->pa + xi;
48 REAL *y = sy->pa + yi;
49 REAL *z = sz->pa + zi;
50 ii = 0;
51 for(; ii<m-3; ii+=4)
52 {
53 z[ii+0] = y[ii+0] + alpha*x[ii+0];
54 z[ii+1] = y[ii+1] + alpha*x[ii+1];
55 z[ii+2] = y[ii+2] + alpha*x[ii+2];
56 z[ii+3] = y[ii+3] + alpha*x[ii+3];
57 }
58 for(; ii<m; ii++)
59 z[ii+0] = y[ii+0] + alpha*x[ii+0];
60 return;
61 }
62
63
64
AXPBY_LIBSTR(int m,REAL alpha,struct STRVEC * sx,int xi,REAL beta,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)65 void AXPBY_LIBSTR(int m, REAL alpha, struct STRVEC *sx, int xi, REAL beta, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
66 {
67 if(m<=0)
68 return;
69 int ii;
70 REAL *x = sx->pa + xi;
71 REAL *y = sy->pa + yi;
72 REAL *z = sz->pa + zi;
73 ii = 0;
74 for(; ii<m-3; ii+=4)
75 {
76 z[ii+0] = beta*y[ii+0] + alpha*x[ii+0];
77 z[ii+1] = beta*y[ii+1] + alpha*x[ii+1];
78 z[ii+2] = beta*y[ii+2] + alpha*x[ii+2];
79 z[ii+3] = beta*y[ii+3] + alpha*x[ii+3];
80 }
81 for(; ii<m; ii++)
82 z[ii+0] = beta*y[ii+0] + alpha*x[ii+0];
83 return;
84 }
85
86
87
88 // multiply two vectors
VECMUL_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)89 void VECMUL_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
90 {
91 if(m<=0)
92 return;
93 REAL *x = sx->pa + xi;
94 REAL *y = sy->pa + yi;
95 REAL *z = sz->pa + zi;
96 int ii;
97 ii = 0;
98 for(; ii<m-3; ii+=4)
99 {
100 z[ii+0] = x[ii+0] * y[ii+0];
101 z[ii+1] = x[ii+1] * y[ii+1];
102 z[ii+2] = x[ii+2] * y[ii+2];
103 z[ii+3] = x[ii+3] * y[ii+3];
104 }
105 for(; ii<m; ii++)
106 {
107 z[ii+0] = x[ii+0] * y[ii+0];
108 }
109 return;
110 }
111
112
113
114 // multiply two vectors and add result to another vector
VECMULACC_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)115 void VECMULACC_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
116 {
117 if(m<=0)
118 return;
119 REAL *x = sx->pa + xi;
120 REAL *y = sy->pa + yi;
121 REAL *z = sz->pa + zi;
122 int ii;
123 ii = 0;
124 for(; ii<m-3; ii+=4)
125 {
126 z[ii+0] += x[ii+0] * y[ii+0];
127 z[ii+1] += x[ii+1] * y[ii+1];
128 z[ii+2] += x[ii+2] * y[ii+2];
129 z[ii+3] += x[ii+3] * y[ii+3];
130 }
131 for(; ii<m; ii++)
132 {
133 z[ii+0] += x[ii+0] * y[ii+0];
134 }
135 return;
136 }
137
138
139
140 // multiply two vectors and compute dot product
VECMULDOT_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)141 REAL VECMULDOT_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
142 {
143 if(m<=0)
144 return 0.0;
145 REAL *x = sx->pa + xi;
146 REAL *y = sy->pa + yi;
147 REAL *z = sz->pa + zi;
148 int ii;
149 REAL dot = 0.0;
150 ii = 0;
151 for(; ii<m-3; ii+=4)
152 {
153 z[ii+0] = x[ii+0] * y[ii+0];
154 z[ii+1] = x[ii+1] * y[ii+1];
155 z[ii+2] = x[ii+2] * y[ii+2];
156 z[ii+3] = x[ii+3] * y[ii+3];
157 dot += z[ii+0] + z[ii+1] + z[ii+2] + z[ii+3];
158 }
159 for(; ii<m; ii++)
160 {
161 z[ii+0] = x[ii+0] * y[ii+0];
162 dot += z[ii+0];
163 }
164 return dot;
165 }
166
167
168
169 // compute dot product of two vectors
DOT_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi)170 REAL DOT_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi)
171 {
172 if(m<=0)
173 return 0.0;
174 REAL *x = sx->pa + xi;
175 REAL *y = sy->pa + yi;
176 int ii;
177 REAL dot = 0.0;
178 ii = 0;
179 for(; ii<m-3; ii+=4)
180 {
181 dot += x[ii+0] * y[ii+0];
182 dot += x[ii+1] * y[ii+1];
183 dot += x[ii+2] * y[ii+2];
184 dot += x[ii+3] * y[ii+3];
185 }
186 for(; ii<m; ii++)
187 {
188 dot += x[ii+0] * y[ii+0];
189 }
190 return dot;
191 }
192
193
194
195 // construct givens plane rotation
ROTG_LIBSTR(REAL a,REAL b,REAL * c,REAL * s)196 void ROTG_LIBSTR(REAL a, REAL b, REAL *c, REAL *s)
197 {
198 REAL aa = FABS(a);
199 REAL bb = FABS(b);
200 REAL roe = (aa >= bb) ? a : b;
201 REAL scale = aa + bb;
202 REAL r;
203 if (scale == 0)
204 {
205 *c = 1.0;
206 *s = 0.0;
207 }
208 else
209 {
210 aa = a/scale;
211 bb = b/scale;
212 r = scale * SQRT(aa*aa + bb*bb);
213 r = r * (roe >= 0 ? 1 : -1);
214 *c = a / r;
215 *s = b / r;
216 }
217 return;
218 }
219
220
221
222 // apply plane rotation to the aj0 and aj1 columns of A at row index ai
COLROT_LIBSTR(int m,struct STRMAT * sA,int ai,int aj0,int aj1,REAL c,REAL s)223 void COLROT_LIBSTR(int m, struct STRMAT *sA, int ai, int aj0, int aj1, REAL c, REAL s)
224 {
225 int lda = sA->m;
226 REAL *px = sA->pA + ai + aj0*lda;
227 REAL *py = sA->pA + ai + aj1*lda;
228 int ii;
229 REAL d_tmp;
230 for(ii=0; ii<m; ii++)
231 {
232 d_tmp = c*px[ii] + s*py[ii];
233 py[ii] = c*py[ii] - s*px[ii];
234 px[ii] = d_tmp;
235 }
236 return;
237 }
238
239
240
241 // apply plane rotation to the ai0 and ai1 rows of A at column index aj
ROWROT_LIBSTR(int m,struct STRMAT * sA,int ai0,int ai1,int aj,REAL c,REAL s)242 void ROWROT_LIBSTR(int m, struct STRMAT *sA, int ai0, int ai1, int aj, REAL c, REAL s)
243 {
244 int lda = sA->m;
245 REAL *px = sA->pA + ai0 + aj*lda;
246 REAL *py = sA->pA + ai1 + aj*lda;
247 int ii;
248 REAL d_tmp;
249 for(ii=0; ii<m; ii++)
250 {
251 d_tmp = c*px[ii*lda] + s*py[ii*lda];
252 py[ii*lda] = c*py[ii*lda] - s*px[ii*lda];
253 px[ii*lda] = d_tmp;
254 }
255 return;
256 }
257
258
259
260 #elif defined(LA_EXTERNAL_BLAS_WRAPPER)
261
262
263
AXPY_LIBSTR(int m,REAL alpha,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)264 void AXPY_LIBSTR(int m, REAL alpha, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
265 {
266 if(m<=0)
267 return;
268 int i1 = 1;
269 REAL *x = sx->pa + xi;
270 REAL *y = sy->pa + yi;
271 REAL *z = sz->pa + zi;
272 if(y!=z)
273 COPY(&m, y, &i1, z, &i1);
274 AXPY(&m, &alpha, x, &i1, z, &i1);
275 return;
276 }
277
278
AXPBY_LIBSTR(int m,REAL alpha,struct STRVEC * sx,REAL beta,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)279 void AXPBY_LIBSTR(int m, REAL alpha, struct STRVEC *sx, REAL beta, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
280 {
281 if(m<=0)
282 return;
283 int i1 = 1;
284 REAL *x = sx->pa + xi;
285 REAL *y = sy->pa + yi;
286 REAL *z = sz->pa + zi;
287 if(y!=z)
288 COPY(&m, y, &i1, z, &i1);
289 SCAL(&m, &beta, z, &i1);
290 AXPY(&m, &alpha, x, &i1, z, &i1);
291 return;
292 }
293
294
295 // multiply two vectors
VECMUL_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)296 void VECMUL_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
297 {
298 if(m<=0)
299 return;
300 REAL *x = sx->pa + xi;
301 REAL *y = sy->pa + yi;
302 REAL *z = sz->pa + zi;
303 int ii;
304 ii = 0;
305 for(; ii<m-3; ii+=4)
306 {
307 z[ii+0] = x[ii+0] * y[ii+0];
308 z[ii+1] = x[ii+1] * y[ii+1];
309 z[ii+2] = x[ii+2] * y[ii+2];
310 z[ii+3] = x[ii+3] * y[ii+3];
311 }
312 for(; ii<m; ii++)
313 {
314 z[ii+0] = x[ii+0] * y[ii+0];
315 }
316 return;
317 }
318
319
320
321 // multiply two vectors and add result to another vector
VECMULACC_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)322 void VECMULACC_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
323 {
324 if(m<=0)
325 return;
326 REAL *x = sx->pa + xi;
327 REAL *y = sy->pa + yi;
328 REAL *z = sz->pa + zi;
329 int ii;
330 ii = 0;
331 for(; ii<m-3; ii+=4)
332 {
333 z[ii+0] += x[ii+0] * y[ii+0];
334 z[ii+1] += x[ii+1] * y[ii+1];
335 z[ii+2] += x[ii+2] * y[ii+2];
336 z[ii+3] += x[ii+3] * y[ii+3];
337 }
338 for(; ii<m; ii++)
339 {
340 z[ii+0] += x[ii+0] * y[ii+0];
341 }
342 return;
343 }
344
345
346
347 // multiply two vectors and compute dot product
VECMULDOT_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)348 REAL VECMULDOT_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
349 {
350 if(m<=0)
351 return 0.0;
352 REAL *x = sx->pa + xi;
353 REAL *y = sy->pa + yi;
354 REAL *z = sz->pa + zi;
355 int ii;
356 REAL dot = 0.0;
357 ii = 0;
358 for(; ii<m; ii++)
359 {
360 z[ii+0] = x[ii+0] * y[ii+0];
361 dot += z[ii+0];
362 }
363 return dot;
364 }
365
366
367
368 // compute dot product of two vectors
DOT_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi)369 REAL DOT_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi)
370 {
371 if(m<=0)
372 return 0.0;
373 REAL *x = sx->pa + xi;
374 REAL *y = sy->pa + yi;
375 int ii;
376 REAL dot = 0.0;
377 ii = 0;
378 for(; ii<m-3; ii+=4)
379 {
380 dot += x[ii+0] * y[ii+0];
381 dot += x[ii+1] * y[ii+1];
382 dot += x[ii+2] * y[ii+2];
383 dot += x[ii+3] * y[ii+3];
384 }
385 for(; ii<m; ii++)
386 {
387 dot += x[ii+0] * y[ii+0];
388 }
389 return dot;
390 }
391
392
393
394 // construct givens plane rotation
ROTG_LIBSTR(REAL a,REAL b,REAL * c,REAL * s)395 void ROTG_LIBSTR(REAL a, REAL b, REAL *c, REAL *s)
396 {
397 ROTG(&a, &b, c, s);
398 return;
399 }
400
401
402
403 // apply plane rotation to the aj0 and aj1 columns of A at row index ai
COLROT_LIBSTR(int m,struct STRMAT * sA,int ai,int aj0,int aj1,REAL c,REAL s)404 void COLROT_LIBSTR(int m, struct STRMAT *sA, int ai, int aj0, int aj1, REAL c, REAL s)
405 {
406 int lda = sA->m;
407 REAL *px = sA->pA + ai + aj0*lda;
408 REAL *py = sA->pA + ai + aj1*lda;
409 int i1 = 1;
410 ROT(&m, px, &i1, py, &i1, &c, &s);
411 return;
412 }
413
414
415
416 // apply plane rotation to the ai0 and ai1 rows of A at column index aj
ROWROT_LIBSTR(int m,struct STRMAT * sA,int ai0,int ai1,int aj,REAL c,REAL s)417 void ROWROT_LIBSTR(int m, struct STRMAT *sA, int ai0, int ai1, int aj, REAL c, REAL s)
418 {
419 int lda = sA->m;
420 REAL *px = sA->pA + ai0 + aj*lda;
421 REAL *py = sA->pA + ai1 + aj*lda;
422 ROT(&m, px, &lda, py, &lda, &c, &s);
423 return;
424 }
425
426
427
428 #else
429
430 #error : wrong LA choice
431
432 #endif
433
434
435