1 /**************************************************************************************************
2 * *
3 * This file is part of BLASFEO. *
4 * *
5 * BLASFEO -- BLAS for embedded optimization. *
6 * Copyright (C) 2019 by Gianluca Frison. *
7 * Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
8 * All rights reserved. *
9 * *
10 * The 2-Clause BSD License *
11 * *
12 * Redistribution and use in source and binary forms, with or without *
13 * modification, are permitted provided that the following conditions are met: *
14 * *
15 * 1. Redistributions of source code must retain the above copyright notice, this *
16 * list of conditions and the following disclaimer. *
17 * 2. Redistributions in binary form must reproduce the above copyright notice, *
18 * this list of conditions and the following disclaimer in the documentation *
19 * and/or other materials provided with the distribution. *
20 * *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
31 * *
32 * Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
33 * *
34 **************************************************************************************************/
35
36 #include <stdlib.h>
37 #include <stdio.h>
38
39 #include "../include/blasfeo_common.h"
40 #include "../include/blasfeo_s_kernel.h"
41
42
43
44 #if defined(LA_HIGH_PERFORMANCE)
45
46
47
48 // z = y + alpha*x, with increments equal to 1
blasfeo_saxpy(int m,float alpha,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)49 void blasfeo_saxpy(int m, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
50 {
51 float *x = sx->pa + xi;
52 float *y = sy->pa + yi;
53 float *z = sz->pa + zi;
54 int ii;
55 ii = 0;
56 for( ; ii<m-3; ii+=4)
57 {
58 z[ii+0] = y[ii+0] + alpha*x[ii+0];
59 z[ii+1] = y[ii+1] + alpha*x[ii+1];
60 z[ii+2] = y[ii+2] + alpha*x[ii+2];
61 z[ii+3] = y[ii+3] + alpha*x[ii+3];
62 }
63 for( ; ii<m; ii++)
64 {
65 z[ii+0] = y[ii+0] + alpha*x[ii+0];
66 }
67 return;
68 }
69
70
71
blasfeo_saxpby(int m,float alpha,struct blasfeo_svec * sx,int xi,float beta,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)72 void blasfeo_saxpby(int m, float alpha, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
73 {
74 if(m<=0)
75 return;
76 int ii;
77 float *x = sx->pa + xi;
78 float *y = sy->pa + yi;
79 float *z = sz->pa + zi;
80 ii = 0;
81 for(; ii<m-3; ii+=4)
82 {
83 z[ii+0] = beta*y[ii+0] + alpha*x[ii+0];
84 z[ii+1] = beta*y[ii+1] + alpha*x[ii+1];
85 z[ii+2] = beta*y[ii+2] + alpha*x[ii+2];
86 z[ii+3] = beta*y[ii+3] + alpha*x[ii+3];
87 }
88 for(; ii<m; ii++)
89 z[ii+0] = beta*y[ii+0] + alpha*x[ii+0];
90 return;
91 }
92
93
94
saxpy_bkp_libstr(int m,float alpha,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)95 void saxpy_bkp_libstr(int m, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
96 {
97 float *x = sx->pa + xi;
98 float *y = sy->pa + yi;
99 float *z = sz->pa + zi;
100 int ii;
101 ii = 0;
102 for( ; ii<m-3; ii+=4)
103 {
104 z[ii+0] = y[ii+0];
105 y[ii+0] = y[ii+0] + alpha*x[ii+0];
106 z[ii+1] = y[ii+1];
107 y[ii+1] = y[ii+1] + alpha*x[ii+1];
108 z[ii+2] = y[ii+2];
109 y[ii+2] = y[ii+2] + alpha*x[ii+2];
110 z[ii+3] = y[ii+3];
111 y[ii+3] = y[ii+3] + alpha*x[ii+3];
112 }
113 for( ; ii<m; ii++)
114 {
115 z[ii+0] = y[ii+0];
116 y[ii+0] = y[ii+0] + alpha*x[ii+0];
117 }
118 return;
119 }
120
121
122
123 // multiply two vectors
blasfeo_svecmul(int m,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)124 void blasfeo_svecmul(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
125 {
126
127 if(m<=0)
128 return;
129
130 float *x = sx->pa + xi;
131 float *y = sy->pa + yi;
132 float *z = sz->pa + zi;
133 int ii;
134
135 ii = 0;
136
137 for(; ii<m; ii++)
138 {
139 z[ii+0] = x[ii+0] * y[ii+0];
140 }
141 return;
142 }
143
144
145
146 // multiply two vectors and add result to another vector
blasfeo_svecmulacc(int m,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)147 void blasfeo_svecmulacc(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
148 {
149
150 if(m<=0)
151 return;
152
153 float *x = sx->pa + xi;
154 float *y = sy->pa + yi;
155 float *z = sz->pa + zi;
156 int ii;
157
158 ii = 0;
159
160 for(; ii<m; ii++)
161 {
162 z[ii+0] += x[ii+0] * y[ii+0];
163 }
164 return;
165 }
166
167
168
169 // multiply two vectors and compute dot product
blasfeo_svecmuldot(int m,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi,struct blasfeo_svec * sz,int zi)170 float blasfeo_svecmuldot(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi)
171 {
172
173 if(m<=0)
174 return 0.0;
175
176 float *x = sx->pa + xi;
177 float *y = sy->pa + yi;
178 float *z = sz->pa + zi;
179 int ii;
180 float dot = 0.0;
181
182 ii = 0;
183
184 for(; ii<m; ii++)
185 {
186 z[ii+0] = x[ii+0] * y[ii+0];
187 dot += z[ii+0];
188 }
189 return dot;
190 }
191
192
193
194 // compute dot product
blasfeo_sdot(int m,struct blasfeo_svec * sx,int xi,struct blasfeo_svec * sy,int yi)195 float blasfeo_sdot(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi)
196 {
197
198 if(m<=0)
199 return 0.0;
200
201 float *x = sx->pa + xi;
202 float *y = sy->pa + yi;
203 int ii;
204 float dot = 0.0;
205
206 ii = 0;
207 for(; ii<m; ii++)
208 {
209 dot += x[ii+0] * y[ii+0];
210 }
211 return dot;
212 }
213
214
215
216 #else
217
218 #error : wrong LA choice
219
220 #endif
221
222