1 /**************************************************************************************************
2 *                                                                                                 *
3 * This file is part of BLASFEO.                                                                   *
4 *                                                                                                 *
5 * BLASFEO -- BLAS for embedded optimization.                                                      *
6 * Copyright (C) 2019 by Gianluca Frison.                                                          *
7 * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl.              *
8 * All rights reserved.                                                                            *
9 *                                                                                                 *
10 * The 2-Clause BSD License                                                                        *
11 *                                                                                                 *
12 * Redistribution and use in source and binary forms, with or without                              *
13 * modification, are permitted provided that the following conditions are met:                     *
14 *                                                                                                 *
15 * 1. Redistributions of source code must retain the above copyright notice, this                  *
16 *    list of conditions and the following disclaimer.                                             *
17 * 2. Redistributions in binary form must reproduce the above copyright notice,                    *
18 *    this list of conditions and the following disclaimer in the documentation                    *
19 *    and/or other materials provided with the distribution.                                       *
20 *                                                                                                 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND                 *
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED                   *
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE                          *
24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR                 *
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES                  *
26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;                    *
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND                     *
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT                      *
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS                   *
30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                    *
31 *                                                                                                 *
32 * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de                             *
33 *                                                                                                 *
34 **************************************************************************************************/
35 
36 #include <stdlib.h>
37 #include <stdio.h>
38 
39 #include "../include/blasfeo_common.h"
40 #include "../include/blasfeo_s_kernel.h"
41 
42 
43 
44 #if defined(LA_HIGH_PERFORMANCE)
45 
46 
47 
48 // z = y + alpha*x, with increments equal to 1
blasfeo_saxpy(int m,float alpha,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)49 void blasfeo_saxpy(int m, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
50 	{
51 	float *x = sx->pa + xi;
52 	float *y = sy->pa + yi;
53 	float *z = sz->pa + zi;
54 	int ii;
55 	ii = 0;
56 	for( ; ii<m-3; ii+=4)
57 		{
58 		z[ii+0] = y[ii+0] + alpha*x[ii+0];
59 		z[ii+1] = y[ii+1] + alpha*x[ii+1];
60 		z[ii+2] = y[ii+2] + alpha*x[ii+2];
61 		z[ii+3] = y[ii+3] + alpha*x[ii+3];
62 		}
63 	for( ; ii<m; ii++)
64 		{
65 		z[ii+0] = y[ii+0] + alpha*x[ii+0];
66 		}
67 	return;
68 	}
69 
70 
71 
blasfeo_saxpby(int m,float alpha,struct blasfeo_svec * sx,int xi,float beta,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)72 void blasfeo_saxpby(int m, float alpha, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
73 	{
74 	if(m<=0)
75 		return;
76 	int ii;
77 	float *x = sx->pa + xi;
78 	float *y = sy->pa + yi;
79 	float *z = sz->pa + zi;
80 	ii = 0;
81 	for(; ii<m-3; ii+=4)
82 		{
83 		z[ii+0] = beta*y[ii+0] + alpha*x[ii+0];
84 		z[ii+1] = beta*y[ii+1] + alpha*x[ii+1];
85 		z[ii+2] = beta*y[ii+2] + alpha*x[ii+2];
86 		z[ii+3] = beta*y[ii+3] + alpha*x[ii+3];
87 		}
88 	for(; ii<m; ii++)
89 		z[ii+0] = beta*y[ii+0] + alpha*x[ii+0];
90 	return;
91 	}
92 
93 
94 
saxpy_bkp_libstr(int m,float alpha,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)95 void saxpy_bkp_libstr(int m, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
96 	{
97 	float *x = sx->pa + xi;
98 	float *y = sy->pa + yi;
99 	float *z = sz->pa + zi;
100 	int ii;
101 	ii = 0;
102 	for( ; ii<m-3; ii+=4)
103 		{
104 		z[ii+0] = y[ii+0];
105 		y[ii+0] = y[ii+0] + alpha*x[ii+0];
106 		z[ii+1] = y[ii+1];
107 		y[ii+1] = y[ii+1] + alpha*x[ii+1];
108 		z[ii+2] = y[ii+2];
109 		y[ii+2] = y[ii+2] + alpha*x[ii+2];
110 		z[ii+3] = y[ii+3];
111 		y[ii+3] = y[ii+3] + alpha*x[ii+3];
112 		}
113 	for( ; ii<m; ii++)
114 		{
115 		z[ii+0] = y[ii+0];
116 		y[ii+0] = y[ii+0] + alpha*x[ii+0];
117 		}
118 	return;
119 	}
120 
121 
122 
123 // multiply two vectors
blasfeo_svecmul(int m,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)124 void blasfeo_svecmul(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
125 	{
126 
127 	if(m<=0)
128 		return;
129 
130 	float *x = sx->pa + xi;
131 	float *y = sy->pa + yi;
132 	float *z = sz->pa + zi;
133 	int ii;
134 
135 	ii = 0;
136 
137 	for(; ii<m; ii++)
138 		{
139 		z[ii+0] = x[ii+0] * y[ii+0];
140 		}
141 	return;
142 	}
143 
144 
145 
146 // multiply two vectors and add result to another vector
blasfeo_svecmulacc(int m,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)147 void blasfeo_svecmulacc(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
148 	{
149 
150 	if(m<=0)
151 		return;
152 
153 	float *x = sx->pa + xi;
154 	float *y = sy->pa + yi;
155 	float *z = sz->pa + zi;
156 	int ii;
157 
158 	ii = 0;
159 
160 	for(; ii<m; ii++)
161 		{
162 		z[ii+0] += x[ii+0] * y[ii+0];
163 		}
164 	return;
165 	}
166 
167 
168 
169 // multiply two vectors and compute dot product
blasfeo_svecmuldot(int m,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)170 float blasfeo_svecmuldot(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
171 	{
172 
173 	if(m<=0)
174 		return 0.0;
175 
176 	float *x = sx->pa + xi;
177 	float *y = sy->pa + yi;
178 	float *z = sz->pa + zi;
179 	int ii;
180 	float dot = 0.0;
181 
182 	ii = 0;
183 
184 	for(; ii<m; ii++)
185 		{
186 		z[ii+0] = x[ii+0] * y[ii+0];
187 		dot += z[ii+0];
188 		}
189 	return dot;
190 	}
191 
192 
193 
194 // compute dot product
blasfeo_sdot(int m,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi)195 float blasfeo_sdot(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi)
196 	{
197 
198 	if(m<=0)
199 		return 0.0;
200 
201 	float *x = sx->pa + xi;
202 	float *y = sy->pa + yi;
203 	int ii;
204 	float dot = 0.0;
205 
206 	ii = 0;
207 	for(; ii<m; ii++)
208 		{
209 		dot += x[ii+0] * y[ii+0];
210 		}
211 	return dot;
212 	}
213 
214 
215 
216 #else
217 
218 #error : wrong LA choice
219 
220 #endif
221 
222