1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_dge_sum_mv_s_s(enum blas_order_type order,int m,int n,double alpha,const float * a,int lda,const float * x,int incx,double beta,const float * b,int ldb,double * y,int incy)3 void BLAS_dge_sum_mv_s_s(enum blas_order_type order, int m, int n,
4 double alpha, const float *a, int lda,
5 const float *x, int incx,
6 double beta, const float *b, int ldb,
7 double *y, int incy)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * Computes y = alpha * A * x + beta * B * y,
14 * where A, B are general matricies.
15 *
16 * Arguments
17 * =========
18 *
19 * order (input) blas_order_type
20 * Order of A; row or column major
21 *
22 * m (input) int
23 * Row Dimension of A, B, length of output vector y
24 *
25 * n (input) int
26 * Column Dimension of A, B and the length of vector x
27 *
28 * alpha (input) double
29 *
30 * A (input) const float*
31 *
32 * lda (input) int
33 * Leading dimension of A
34 *
35 * x (input) const float*
36 *
37 * incx (input) int
38 * The stride for vector x.
39 *
40 * beta (input) double
41 *
42 * b (input) const float*
43 *
44 * ldb (input) int
45 * Leading dimension of B
46 *
47 * y (input/output) double*
48 *
49 * incy (input) int
50 * The stride for vector y.
51 *
52 */
53 {
54 /* Routine name */
55 static const char routine_name[] = "BLAS_dge_sum_mv_s_s";
56 int i, j;
57 int xi, yi;
58 int x_starti, y_starti, incxi, incyi;
59 int lda_min;
60 int ai;
61 int incai;
62 int aij;
63 int incaij;
64 int bi;
65 int incbi;
66 int bij;
67 int incbij;
68
69 const float *a_i = a;
70 const float *b_i = b;
71 const float *x_i = x;
72 double *y_i = y;
73 double alpha_i = alpha;
74 double beta_i = beta;
75 float a_elem;
76 float b_elem;
77 float x_elem;
78 double prod;
79 double sumA;
80 double sumB;
81 double tmp1;
82 double tmp2;
83
84
85
86 /* m is number of rows */
87 /* n is number of columns */
88
89 if (m == 0 || n == 0)
90 return;
91
92
93 /* all error calls */
94 if (order == blas_rowmajor) {
95 lda_min = n;
96 incai = lda; /* row stride */
97 incbi = ldb;
98 incbij = incaij = 1; /* column stride */
99 } else if (order == blas_colmajor) {
100 lda_min = m;
101 incai = incbi = 1; /*row stride */
102 incaij = lda; /* column stride */
103 incbij = ldb;
104 } else {
105 /* error, order not blas_colmajor not blas_rowmajor */
106 BLAS_error(routine_name, -1, order, 0);
107 return;
108 }
109
110 if (m < 0)
111 BLAS_error(routine_name, -2, m, 0);
112 else if (n < 0)
113 BLAS_error(routine_name, -3, n, 0);
114 if (lda < lda_min)
115 BLAS_error(routine_name, -6, lda, 0);
116 else if (ldb < lda_min)
117 BLAS_error(routine_name, -11, ldb, 0);
118 else if (incx == 0)
119 BLAS_error(routine_name, -8, incx, 0);
120 else if (incy == 0)
121 BLAS_error(routine_name, -13, incy, 0);
122
123 incxi = incx;
124 incyi = incy;
125
126
127
128
129
130
131
132 if (incxi > 0)
133 x_starti = 0;
134 else
135 x_starti = (1 - n) * incxi;
136
137 if (incyi > 0)
138 y_starti = 0;
139 else
140 y_starti = (1 - m) * incyi;
141
142
143
144 if (alpha_i == 0.0) {
145 if (beta_i == 0.0) {
146 /* alpha, beta are 0.0 */
147 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
148 y_i[yi] = 0.0;
149 }
150 } else if (beta_i == 1.0) {
151 /* alpha is 0.0, beta is 1.0 */
152
153
154 bi = 0;
155 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
156
157 sumB = 0.0;
158 bij = bi;
159 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
160 x_elem = x_i[xi];
161
162 b_elem = b_i[bij];
163 prod = (double) b_elem *x_elem;
164 sumB = sumB + prod;
165 bij += incbij;
166 }
167 /* now put the result into y_i */
168 y_i[yi] = sumB;
169
170 bi += incbi;
171 }
172 } else {
173 /* alpha is 0.0, beta not 1.0 nor 0.0 */
174
175
176 bi = 0;
177 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
178
179 sumB = 0.0;
180 bij = bi;
181 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
182 x_elem = x_i[xi];
183
184 b_elem = b_i[bij];
185 prod = (double) b_elem *x_elem;
186 sumB = sumB + prod;
187 bij += incbij;
188 }
189 /* now put the result into y_i */
190 tmp1 = sumB * beta_i;
191 y_i[yi] = tmp1;
192
193 bi += incbi;
194 }
195 }
196 } else if (alpha_i == 1.0) {
197 if (beta_i == 0.0) {
198 /* alpha is 1.0, beta is 0.0 */
199
200 ai = 0;
201
202 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
203 sumA = 0.0;
204 aij = ai;
205
206 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
207 x_elem = x_i[xi];
208 a_elem = a_i[aij];
209 prod = (double) a_elem *x_elem;
210 sumA = sumA + prod;
211 aij += incaij;
212
213 }
214 /* now put the result into y_i */
215 y_i[yi] = sumA;
216 ai += incai;
217
218 }
219 } else if (beta_i == 1.0) {
220 /* alpha is 1.0, beta is 1.0 */
221
222 ai = 0;
223 bi = 0;
224 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
225 sumA = 0.0;
226 aij = ai;
227 sumB = 0.0;
228 bij = bi;
229 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
230 x_elem = x_i[xi];
231 a_elem = a_i[aij];
232 prod = (double) a_elem *x_elem;
233 sumA = sumA + prod;
234 aij += incaij;
235 b_elem = b_i[bij];
236 prod = (double) b_elem *x_elem;
237 sumB = sumB + prod;
238 bij += incbij;
239 }
240 /* now put the result into y_i */
241 tmp1 = sumA;
242 tmp2 = sumB;
243 tmp1 = tmp1 + tmp2;
244 y_i[yi] = tmp1;
245 ai += incai;
246 bi += incbi;
247 }
248 } else {
249 /* alpha is 1.0, beta is other */
250
251 ai = 0;
252 bi = 0;
253 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
254 sumA = 0.0;
255 aij = ai;
256 sumB = 0.0;
257 bij = bi;
258 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
259 x_elem = x_i[xi];
260 a_elem = a_i[aij];
261 prod = (double) a_elem *x_elem;
262 sumA = sumA + prod;
263 aij += incaij;
264 b_elem = b_i[bij];
265 prod = (double) b_elem *x_elem;
266 sumB = sumB + prod;
267 bij += incbij;
268 }
269 /* now put the result into y_i */
270 tmp1 = sumA;
271 tmp2 = sumB * beta_i;
272 tmp1 = tmp1 + tmp2;
273 y_i[yi] = tmp1;
274 ai += incai;
275 bi += incbi;
276 }
277 }
278 } else {
279 if (beta_i == 0.0) {
280 /* alpha is other, beta is 0.0 */
281
282 ai = 0;
283
284 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
285 sumA = 0.0;
286 aij = ai;
287
288 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
289 x_elem = x_i[xi];
290 a_elem = a_i[aij];
291 prod = (double) a_elem *x_elem;
292 sumA = sumA + prod;
293 aij += incaij;
294
295 }
296 /* now put the result into y_i */
297 tmp1 = sumA * alpha_i;
298 y_i[yi] = tmp1;
299 ai += incai;
300
301 }
302 } else if (beta_i == 1.0) {
303 /* alpha is other, beta is 1.0 */
304
305 ai = 0;
306 bi = 0;
307 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
308 sumA = 0.0;
309 aij = ai;
310 sumB = 0.0;
311 bij = bi;
312 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
313 x_elem = x_i[xi];
314 a_elem = a_i[aij];
315 prod = (double) a_elem *x_elem;
316 sumA = sumA + prod;
317 aij += incaij;
318 b_elem = b_i[bij];
319 prod = (double) b_elem *x_elem;
320 sumB = sumB + prod;
321 bij += incbij;
322 }
323 /* now put the result into y_i */
324 tmp1 = sumA * alpha_i;
325 tmp2 = sumB;
326 tmp1 = tmp1 + tmp2;
327 y_i[yi] = tmp1;
328 ai += incai;
329 bi += incbi;
330 }
331 } else {
332 /* most general form, alpha, beta are other */
333
334 ai = 0;
335 bi = 0;
336 for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
337 sumA = 0.0;
338 aij = ai;
339 sumB = 0.0;
340 bij = bi;
341 for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
342 x_elem = x_i[xi];
343 a_elem = a_i[aij];
344 prod = (double) a_elem *x_elem;
345 sumA = sumA + prod;
346 aij += incaij;
347 b_elem = b_i[bij];
348 prod = (double) b_elem *x_elem;
349 sumB = sumB + prod;
350 bij += incbij;
351 }
352 /* now put the result into y_i */
353 tmp1 = sumA * alpha_i;
354 tmp2 = sumB * beta_i;
355 tmp1 = tmp1 + tmp2;
356 y_i[yi] = tmp1;
357 ai += incai;
358 bi += incbi;
359 }
360 }
361 }
362
363
364 } /* end BLAS_dge_sum_mv_s_s */
365