1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_dge_sum_mv_s_s(enum blas_order_type order,int m,int n,double alpha,const float * a,int lda,const float * x,int incx,double beta,const float * b,int ldb,double * y,int incy)3 void BLAS_dge_sum_mv_s_s(enum blas_order_type order, int m, int n,
4 			 double alpha, const float *a, int lda,
5 			 const float *x, int incx,
6 			 double beta, const float *b, int ldb,
7 			 double *y, int incy)
8 
9 /*
10  * Purpose
11  * =======
12  *
13  * Computes y = alpha * A * x + beta * B * y,
14  *     where A, B are general matricies.
15  *
16  * Arguments
17  * =========
18  *
19  * order        (input) blas_order_type
20  *              Order of A; row or column major
21  *
22  * m            (input) int
23  *              Row Dimension of A, B, length of output vector y
24  *
25  * n            (input) int
26  *              Column Dimension of A, B and the length of vector x
27  *
28  * alpha        (input) double
29  *
30  * A            (input) const float*
31  *
32  * lda          (input) int
33  *              Leading dimension of A
34  *
35  * x            (input) const float*
36  *
37  * incx         (input) int
38  *              The stride for vector x.
39  *
40  * beta         (input) double
41  *
42  * b            (input) const float*
43  *
44  * ldb          (input) int
45  *              Leading dimension of B
46  *
47  * y            (input/output) double*
48  *
49  * incy         (input) int
50  *              The stride for vector y.
51  *
52  */
53 {
54   /* Routine name */
55   static const char routine_name[] = "BLAS_dge_sum_mv_s_s";
56   int i, j;
57   int xi, yi;
58   int x_starti, y_starti, incxi, incyi;
59   int lda_min;
60   int ai;
61   int incai;
62   int aij;
63   int incaij;
64   int bi;
65   int incbi;
66   int bij;
67   int incbij;
68 
69   const float *a_i = a;
70   const float *b_i = b;
71   const float *x_i = x;
72   double *y_i = y;
73   double alpha_i = alpha;
74   double beta_i = beta;
75   float a_elem;
76   float b_elem;
77   float x_elem;
78   double prod;
79   double sumA;
80   double sumB;
81   double tmp1;
82   double tmp2;
83 
84 
85 
86   /* m is number of rows */
87   /* n is number of columns */
88 
89   if (m == 0 || n == 0)
90     return;
91 
92 
93   /* all error calls */
94   if (order == blas_rowmajor) {
95     lda_min = n;
96     incai = lda;		/* row stride */
97     incbi = ldb;
98     incbij = incaij = 1;	/* column stride */
99   } else if (order == blas_colmajor) {
100     lda_min = m;
101     incai = incbi = 1;		/*row stride */
102     incaij = lda;		/* column stride */
103     incbij = ldb;
104   } else {
105     /* error, order not blas_colmajor not blas_rowmajor */
106     BLAS_error(routine_name, -1, order, 0);
107     return;
108   }
109 
110   if (m < 0)
111     BLAS_error(routine_name, -2, m, 0);
112   else if (n < 0)
113     BLAS_error(routine_name, -3, n, 0);
114   if (lda < lda_min)
115     BLAS_error(routine_name, -6, lda, 0);
116   else if (ldb < lda_min)
117     BLAS_error(routine_name, -11, ldb, 0);
118   else if (incx == 0)
119     BLAS_error(routine_name, -8, incx, 0);
120   else if (incy == 0)
121     BLAS_error(routine_name, -13, incy, 0);
122 
123   incxi = incx;
124   incyi = incy;
125 
126 
127 
128 
129 
130 
131 
132   if (incxi > 0)
133     x_starti = 0;
134   else
135     x_starti = (1 - n) * incxi;
136 
137   if (incyi > 0)
138     y_starti = 0;
139   else
140     y_starti = (1 - m) * incyi;
141 
142 
143 
144   if (alpha_i == 0.0) {
145     if (beta_i == 0.0) {
146       /* alpha, beta are 0.0 */
147       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
148 	y_i[yi] = 0.0;
149       }
150     } else if (beta_i == 1.0) {
151       /* alpha is 0.0, beta is 1.0 */
152 
153 
154       bi = 0;
155       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
156 
157 	sumB = 0.0;
158 	bij = bi;
159 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
160 	  x_elem = x_i[xi];
161 
162 	  b_elem = b_i[bij];
163 	  prod = (double) b_elem *x_elem;
164 	  sumB = sumB + prod;
165 	  bij += incbij;
166 	}
167 	/* now put the result into y_i */
168 	y_i[yi] = sumB;
169 
170 	bi += incbi;
171       }
172     } else {
173       /* alpha is 0.0, beta not 1.0 nor 0.0 */
174 
175 
176       bi = 0;
177       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
178 
179 	sumB = 0.0;
180 	bij = bi;
181 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
182 	  x_elem = x_i[xi];
183 
184 	  b_elem = b_i[bij];
185 	  prod = (double) b_elem *x_elem;
186 	  sumB = sumB + prod;
187 	  bij += incbij;
188 	}
189 	/* now put the result into y_i */
190 	tmp1 = sumB * beta_i;
191 	y_i[yi] = tmp1;
192 
193 	bi += incbi;
194       }
195     }
196   } else if (alpha_i == 1.0) {
197     if (beta_i == 0.0) {
198       /* alpha is 1.0, beta is 0.0 */
199 
200       ai = 0;
201 
202       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
203 	sumA = 0.0;
204 	aij = ai;
205 
206 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
207 	  x_elem = x_i[xi];
208 	  a_elem = a_i[aij];
209 	  prod = (double) a_elem *x_elem;
210 	  sumA = sumA + prod;
211 	  aij += incaij;
212 
213 	}
214 	/* now put the result into y_i */
215 	y_i[yi] = sumA;
216 	ai += incai;
217 
218       }
219     } else if (beta_i == 1.0) {
220       /* alpha is 1.0, beta is 1.0 */
221 
222       ai = 0;
223       bi = 0;
224       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
225 	sumA = 0.0;
226 	aij = ai;
227 	sumB = 0.0;
228 	bij = bi;
229 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
230 	  x_elem = x_i[xi];
231 	  a_elem = a_i[aij];
232 	  prod = (double) a_elem *x_elem;
233 	  sumA = sumA + prod;
234 	  aij += incaij;
235 	  b_elem = b_i[bij];
236 	  prod = (double) b_elem *x_elem;
237 	  sumB = sumB + prod;
238 	  bij += incbij;
239 	}
240 	/* now put the result into y_i */
241 	tmp1 = sumA;
242 	tmp2 = sumB;
243 	tmp1 = tmp1 + tmp2;
244 	y_i[yi] = tmp1;
245 	ai += incai;
246 	bi += incbi;
247       }
248     } else {
249       /* alpha is 1.0, beta is other */
250 
251       ai = 0;
252       bi = 0;
253       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
254 	sumA = 0.0;
255 	aij = ai;
256 	sumB = 0.0;
257 	bij = bi;
258 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
259 	  x_elem = x_i[xi];
260 	  a_elem = a_i[aij];
261 	  prod = (double) a_elem *x_elem;
262 	  sumA = sumA + prod;
263 	  aij += incaij;
264 	  b_elem = b_i[bij];
265 	  prod = (double) b_elem *x_elem;
266 	  sumB = sumB + prod;
267 	  bij += incbij;
268 	}
269 	/* now put the result into y_i */
270 	tmp1 = sumA;
271 	tmp2 = sumB * beta_i;
272 	tmp1 = tmp1 + tmp2;
273 	y_i[yi] = tmp1;
274 	ai += incai;
275 	bi += incbi;
276       }
277     }
278   } else {
279     if (beta_i == 0.0) {
280       /* alpha is other, beta is 0.0 */
281 
282       ai = 0;
283 
284       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
285 	sumA = 0.0;
286 	aij = ai;
287 
288 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
289 	  x_elem = x_i[xi];
290 	  a_elem = a_i[aij];
291 	  prod = (double) a_elem *x_elem;
292 	  sumA = sumA + prod;
293 	  aij += incaij;
294 
295 	}
296 	/* now put the result into y_i */
297 	tmp1 = sumA * alpha_i;
298 	y_i[yi] = tmp1;
299 	ai += incai;
300 
301       }
302     } else if (beta_i == 1.0) {
303       /* alpha is other, beta is 1.0 */
304 
305       ai = 0;
306       bi = 0;
307       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
308 	sumA = 0.0;
309 	aij = ai;
310 	sumB = 0.0;
311 	bij = bi;
312 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
313 	  x_elem = x_i[xi];
314 	  a_elem = a_i[aij];
315 	  prod = (double) a_elem *x_elem;
316 	  sumA = sumA + prod;
317 	  aij += incaij;
318 	  b_elem = b_i[bij];
319 	  prod = (double) b_elem *x_elem;
320 	  sumB = sumB + prod;
321 	  bij += incbij;
322 	}
323 	/* now put the result into y_i */
324 	tmp1 = sumA * alpha_i;
325 	tmp2 = sumB;
326 	tmp1 = tmp1 + tmp2;
327 	y_i[yi] = tmp1;
328 	ai += incai;
329 	bi += incbi;
330       }
331     } else {
332       /* most general form, alpha, beta are other */
333 
334       ai = 0;
335       bi = 0;
336       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
337 	sumA = 0.0;
338 	aij = ai;
339 	sumB = 0.0;
340 	bij = bi;
341 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
342 	  x_elem = x_i[xi];
343 	  a_elem = a_i[aij];
344 	  prod = (double) a_elem *x_elem;
345 	  sumA = sumA + prod;
346 	  aij += incaij;
347 	  b_elem = b_i[bij];
348 	  prod = (double) b_elem *x_elem;
349 	  sumB = sumB + prod;
350 	  bij += incbij;
351 	}
352 	/* now put the result into y_i */
353 	tmp1 = sumA * alpha_i;
354 	tmp2 = sumB * beta_i;
355 	tmp1 = tmp1 + tmp2;
356 	y_i[yi] = tmp1;
357 	ai += incai;
358 	bi += incbi;
359       }
360     }
361   }
362 
363 
364 }				/* end BLAS_dge_sum_mv_s_s */
365