1 /**************************************************************************************************
2 *                                                                                                 *
3 * This file is part of BLASFEO.                                                                   *
4 *                                                                                                 *
5 * BLASFEO -- BLAS for embedded optimization.                                                      *
6 * Copyright (C) 2019 by Gianluca Frison.                                                          *
7 * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl.              *
8 * All rights reserved.                                                                            *
9 *                                                                                                 *
10 * The 2-Clause BSD License                                                                        *
11 *                                                                                                 *
12 * Redistribution and use in source and binary forms, with or without                              *
13 * modification, are permitted provided that the following conditions are met:                     *
14 *                                                                                                 *
15 * 1. Redistributions of source code must retain the above copyright notice, this                  *
16 *    list of conditions and the following disclaimer.                                             *
17 * 2. Redistributions in binary form must reproduce the above copyright notice,                    *
18 *    this list of conditions and the following disclaimer in the documentation                    *
19 *    and/or other materials provided with the distribution.                                       *
20 *                                                                                                 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND                 *
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED                   *
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE                          *
24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR                 *
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES                  *
26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;                    *
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND                     *
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT                      *
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS                   *
30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                    *
31 *                                                                                                 *
32 * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de                             *
33 *                                                                                                 *
34 **************************************************************************************************/
35 
36 
37 
38 #if defined(LA_REFERENCE) | defined(TESTING_MODE)
39 
40 
41 
AXPY_LIBSTR(int m,REAL alpha,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)42 void AXPY_LIBSTR(int m, REAL alpha, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
43 	{
44 	if(m<=0)
45 		return;
46 	int ii;
47 	REAL *x = sx->pa + xi;
48 	REAL *y = sy->pa + yi;
49 	REAL *z = sz->pa + zi;
50 	ii = 0;
51 	for(; ii<m-3; ii+=4)
52 		{
53 		z[ii+0] = y[ii+0] + alpha*x[ii+0];
54 		z[ii+1] = y[ii+1] + alpha*x[ii+1];
55 		z[ii+2] = y[ii+2] + alpha*x[ii+2];
56 		z[ii+3] = y[ii+3] + alpha*x[ii+3];
57 		}
58 	for(; ii<m; ii++)
59 		z[ii+0] = y[ii+0] + alpha*x[ii+0];
60 	return;
61 	}
62 
63 
64 
AXPBY_LIBSTR(int m,REAL alpha,struct STRVEC * sx,int xi,REAL beta,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)65 void AXPBY_LIBSTR(int m, REAL alpha, struct STRVEC *sx, int xi, REAL beta, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
66 	{
67 	if(m<=0)
68 		return;
69 	int ii;
70 	REAL *x = sx->pa + xi;
71 	REAL *y = sy->pa + yi;
72 	REAL *z = sz->pa + zi;
73 	ii = 0;
74 	for(; ii<m-3; ii+=4)
75 		{
76 		z[ii+0] = beta*y[ii+0] + alpha*x[ii+0];
77 		z[ii+1] = beta*y[ii+1] + alpha*x[ii+1];
78 		z[ii+2] = beta*y[ii+2] + alpha*x[ii+2];
79 		z[ii+3] = beta*y[ii+3] + alpha*x[ii+3];
80 		}
81 	for(; ii<m; ii++)
82 		z[ii+0] = beta*y[ii+0] + alpha*x[ii+0];
83 	return;
84 	}
85 
86 
87 
88 // multiply two vectors
VECMUL_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)89 void VECMUL_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
90 	{
91 	if(m<=0)
92 		return;
93 	REAL *x = sx->pa + xi;
94 	REAL *y = sy->pa + yi;
95 	REAL *z = sz->pa + zi;
96 	int ii;
97 	ii = 0;
98 	for(; ii<m-3; ii+=4)
99 		{
100 		z[ii+0] = x[ii+0] * y[ii+0];
101 		z[ii+1] = x[ii+1] * y[ii+1];
102 		z[ii+2] = x[ii+2] * y[ii+2];
103 		z[ii+3] = x[ii+3] * y[ii+3];
104 		}
105 	for(; ii<m; ii++)
106 		{
107 		z[ii+0] = x[ii+0] * y[ii+0];
108 		}
109 	return;
110 	}
111 
112 
113 
114 // multiply two vectors and add result to another vector
VECMULACC_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)115 void VECMULACC_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
116 	{
117 	if(m<=0)
118 		return;
119 	REAL *x = sx->pa + xi;
120 	REAL *y = sy->pa + yi;
121 	REAL *z = sz->pa + zi;
122 	int ii;
123 	ii = 0;
124 	for(; ii<m-3; ii+=4)
125 		{
126 		z[ii+0] += x[ii+0] * y[ii+0];
127 		z[ii+1] += x[ii+1] * y[ii+1];
128 		z[ii+2] += x[ii+2] * y[ii+2];
129 		z[ii+3] += x[ii+3] * y[ii+3];
130 		}
131 	for(; ii<m; ii++)
132 		{
133 		z[ii+0] += x[ii+0] * y[ii+0];
134 		}
135 	return;
136 	}
137 
138 
139 
140 // multiply two vectors and compute dot product
VECMULDOT_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)141 REAL VECMULDOT_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
142 	{
143 	if(m<=0)
144 		return 0.0;
145 	REAL *x = sx->pa + xi;
146 	REAL *y = sy->pa + yi;
147 	REAL *z = sz->pa + zi;
148 	int ii;
149 	REAL dot = 0.0;
150 	ii = 0;
151 	for(; ii<m-3; ii+=4)
152 		{
153 		z[ii+0] = x[ii+0] * y[ii+0];
154 		z[ii+1] = x[ii+1] * y[ii+1];
155 		z[ii+2] = x[ii+2] * y[ii+2];
156 		z[ii+3] = x[ii+3] * y[ii+3];
157 		dot += z[ii+0] + z[ii+1] + z[ii+2] + z[ii+3];
158 		}
159 	for(; ii<m; ii++)
160 		{
161 		z[ii+0] = x[ii+0] * y[ii+0];
162 		dot += z[ii+0];
163 		}
164 	return dot;
165 	}
166 
167 
168 
169 // compute dot product of two vectors
DOT_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi)170 REAL DOT_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi)
171 	{
172 	if(m<=0)
173 		return 0.0;
174 	REAL *x = sx->pa + xi;
175 	REAL *y = sy->pa + yi;
176 	int ii;
177 	REAL dot = 0.0;
178 	ii = 0;
179 	for(; ii<m-3; ii+=4)
180 		{
181 		dot += x[ii+0] * y[ii+0];
182 		dot += x[ii+1] * y[ii+1];
183 		dot += x[ii+2] * y[ii+2];
184 		dot += x[ii+3] * y[ii+3];
185 		}
186 	for(; ii<m; ii++)
187 		{
188 		dot += x[ii+0] * y[ii+0];
189 		}
190 	return dot;
191 	}
192 
193 
194 
195 // construct givens plane rotation
ROTG_LIBSTR(REAL a,REAL b,REAL * c,REAL * s)196 void ROTG_LIBSTR(REAL a, REAL b, REAL *c, REAL *s)
197 	{
198 	REAL aa = FABS(a);
199 	REAL bb = FABS(b);
200 	REAL roe = (aa >= bb) ? a : b;
201 	REAL scale = aa + bb;
202 	REAL r;
203 	if (scale == 0)
204 		{
205 		*c = 1.0;
206 		*s = 0.0;
207 		}
208 	else
209 		{
210 		aa = a/scale;
211 		bb = b/scale;
212 		r = scale * SQRT(aa*aa + bb*bb);
213 		r = r * (roe >= 0 ? 1 : -1);
214 		*c = a / r;
215 		*s = b / r;
216 		}
217 	return;
218 	}
219 
220 
221 
222 // apply plane rotation to the aj0 and aj1 columns of A at row index ai
COLROT_LIBSTR(int m,struct STRMAT * sA,int ai,int aj0,int aj1,REAL c,REAL s)223 void COLROT_LIBSTR(int m, struct STRMAT *sA, int ai, int aj0, int aj1, REAL c, REAL s)
224 	{
225 	int lda = sA->m;
226 	REAL *px = sA->pA + ai + aj0*lda;
227 	REAL *py = sA->pA + ai + aj1*lda;
228 	int ii;
229 	REAL d_tmp;
230 	for(ii=0; ii<m; ii++)
231 		{
232 		d_tmp  = c*px[ii] + s*py[ii];
233 		py[ii] = c*py[ii] - s*px[ii];
234 		px[ii] = d_tmp;
235 		}
236 	return;
237 	}
238 
239 
240 
241 // apply plane rotation to the ai0 and ai1 rows of A at column index aj
ROWROT_LIBSTR(int m,struct STRMAT * sA,int ai0,int ai1,int aj,REAL c,REAL s)242 void ROWROT_LIBSTR(int m, struct STRMAT *sA, int ai0, int ai1, int aj, REAL c, REAL s)
243 	{
244 	int lda = sA->m;
245 	REAL *px = sA->pA + ai0 + aj*lda;
246 	REAL *py = sA->pA + ai1 + aj*lda;
247 	int ii;
248 	REAL d_tmp;
249 	for(ii=0; ii<m; ii++)
250 		{
251 		d_tmp  = c*px[ii*lda] + s*py[ii*lda];
252 		py[ii*lda] = c*py[ii*lda] - s*px[ii*lda];
253 		px[ii*lda] = d_tmp;
254 		}
255 	return;
256 	}
257 
258 
259 
260 #elif defined(LA_EXTERNAL_BLAS_WRAPPER)
261 
262 
263 
AXPY_LIBSTR(int m,REAL alpha,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)264 void AXPY_LIBSTR(int m, REAL alpha, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
265 	{
266 	if(m<=0)
267 		return;
268 	int i1 = 1;
269 	REAL *x = sx->pa + xi;
270 	REAL *y = sy->pa + yi;
271 	REAL *z = sz->pa + zi;
272 	if(y!=z)
273 		COPY(&m, y, &i1, z, &i1);
274 	AXPY(&m, &alpha, x, &i1, z, &i1);
275 	return;
276 	}
277 
278 
AXPBY_LIBSTR(int m,REAL alpha,struct STRVEC * sx,REAL beta,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)279 void AXPBY_LIBSTR(int m, REAL alpha, struct STRVEC *sx, REAL beta, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
280 	{
281 	if(m<=0)
282 		return;
283 	int i1 = 1;
284 	REAL *x = sx->pa + xi;
285 	REAL *y = sy->pa + yi;
286 	REAL *z = sz->pa + zi;
287 	if(y!=z)
288 		COPY(&m, y, &i1, z, &i1);
289 	SCAL(&m, &beta, z, &i1);
290 	AXPY(&m, &alpha, x, &i1, z, &i1);
291 	return;
292 	}
293 
294 
295 // multiply two vectors
VECMUL_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)296 void VECMUL_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
297 	{
298 	if(m<=0)
299 		return;
300 	REAL *x = sx->pa + xi;
301 	REAL *y = sy->pa + yi;
302 	REAL *z = sz->pa + zi;
303 	int ii;
304 	ii = 0;
305 	for(; ii<m-3; ii+=4)
306 		{
307 		z[ii+0] = x[ii+0] * y[ii+0];
308 		z[ii+1] = x[ii+1] * y[ii+1];
309 		z[ii+2] = x[ii+2] * y[ii+2];
310 		z[ii+3] = x[ii+3] * y[ii+3];
311 		}
312 	for(; ii<m; ii++)
313 		{
314 		z[ii+0] = x[ii+0] * y[ii+0];
315 		}
316 	return;
317 	}
318 
319 
320 
321 // multiply two vectors and add result to another vector
VECMULACC_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)322 void VECMULACC_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
323 	{
324 	if(m<=0)
325 		return;
326 	REAL *x = sx->pa + xi;
327 	REAL *y = sy->pa + yi;
328 	REAL *z = sz->pa + zi;
329 	int ii;
330 	ii = 0;
331 	for(; ii<m-3; ii+=4)
332 		{
333 		z[ii+0] += x[ii+0] * y[ii+0];
334 		z[ii+1] += x[ii+1] * y[ii+1];
335 		z[ii+2] += x[ii+2] * y[ii+2];
336 		z[ii+3] += x[ii+3] * y[ii+3];
337 		}
338 	for(; ii<m; ii++)
339 		{
340 		z[ii+0] += x[ii+0] * y[ii+0];
341 		}
342 	return;
343 	}
344 
345 
346 
347 // multiply two vectors and compute dot product
VECMULDOT_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi,struct STRVEC * sz,int zi)348 REAL VECMULDOT_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
349 	{
350 	if(m<=0)
351 		return 0.0;
352 	REAL *x = sx->pa + xi;
353 	REAL *y = sy->pa + yi;
354 	REAL *z = sz->pa + zi;
355 	int ii;
356 	REAL dot = 0.0;
357 	ii = 0;
358 	for(; ii<m; ii++)
359 		{
360 		z[ii+0] = x[ii+0] * y[ii+0];
361 		dot += z[ii+0];
362 		}
363 	return dot;
364 	}
365 
366 
367 
368 // compute dot product of two vectors
DOT_LIBSTR(int m,struct STRVEC * sx,int xi,struct STRVEC * sy,int yi)369 REAL DOT_LIBSTR(int m, struct STRVEC *sx, int xi, struct STRVEC *sy, int yi)
370 	{
371 	if(m<=0)
372 		return 0.0;
373 	REAL *x = sx->pa + xi;
374 	REAL *y = sy->pa + yi;
375 	int ii;
376 	REAL dot = 0.0;
377 	ii = 0;
378 	for(; ii<m-3; ii+=4)
379 		{
380 		dot += x[ii+0] * y[ii+0];
381 		dot += x[ii+1] * y[ii+1];
382 		dot += x[ii+2] * y[ii+2];
383 		dot += x[ii+3] * y[ii+3];
384 		}
385 	for(; ii<m; ii++)
386 		{
387 		dot += x[ii+0] * y[ii+0];
388 		}
389 	return dot;
390 	}
391 
392 
393 
394 // construct givens plane rotation
ROTG_LIBSTR(REAL a,REAL b,REAL * c,REAL * s)395 void ROTG_LIBSTR(REAL a, REAL b, REAL *c, REAL *s)
396 	{
397 	ROTG(&a, &b, c, s);
398 	return;
399 	}
400 
401 
402 
403 // apply plane rotation to the aj0 and aj1 columns of A at row index ai
COLROT_LIBSTR(int m,struct STRMAT * sA,int ai,int aj0,int aj1,REAL c,REAL s)404 void COLROT_LIBSTR(int m, struct STRMAT *sA, int ai, int aj0, int aj1, REAL c, REAL s)
405 	{
406 	int lda = sA->m;
407 	REAL *px = sA->pA + ai + aj0*lda;
408 	REAL *py = sA->pA + ai + aj1*lda;
409 	int i1 = 1;
410 	ROT(&m, px, &i1, py, &i1, &c, &s);
411 	return;
412 	}
413 
414 
415 
416 // apply plane rotation to the ai0 and ai1 rows of A at column index aj
ROWROT_LIBSTR(int m,struct STRMAT * sA,int ai0,int ai1,int aj,REAL c,REAL s)417 void ROWROT_LIBSTR(int m, struct STRMAT *sA, int ai0, int ai1, int aj, REAL c, REAL s)
418 	{
419 	int lda = sA->m;
420 	REAL *px = sA->pA + ai0 + aj*lda;
421 	REAL *py = sA->pA + ai1 + aj*lda;
422 	ROT(&m, px, &lda, py, &lda, &c, &s);
423 	return;
424 	}
425 
426 
427 
428 #else
429 
430 #error : wrong LA choice
431 
432 #endif
433 
434 
435