1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_cge_sum_mv_c_s(enum blas_order_type order,int m,int n,const void * alpha,const void * a,int lda,const float * x,int incx,const void * beta,const void * b,int ldb,void * y,int incy)3 void BLAS_cge_sum_mv_c_s(enum blas_order_type order, int m, int n,
4 			 const void *alpha, const void *a, int lda,
5 			 const float *x, int incx,
6 			 const void *beta, const void *b, int ldb,
7 			 void *y, int incy)
8 
9 /*
10  * Purpose
11  * =======
12  *
13  * Computes y = alpha * A * x + beta * B * y,
14  *     where A, B are general matricies.
15  *
16  * Arguments
17  * =========
18  *
19  * order        (input) blas_order_type
20  *              Order of A; row or column major
21  *
22  * m            (input) int
23  *              Row Dimension of A, B, length of output vector y
24  *
25  * n            (input) int
26  *              Column Dimension of A, B and the length of vector x
27  *
28  * alpha        (input) const void*
29  *
30  * A            (input) const void*
31  *
32  * lda          (input) int
33  *              Leading dimension of A
34  *
35  * x            (input) const float*
36  *
37  * incx         (input) int
38  *              The stride for vector x.
39  *
40  * beta         (input) const void*
41  *
42  * b            (input) const void*
43  *
44  * ldb          (input) int
45  *              Leading dimension of B
46  *
47  * y            (input/output) void*
48  *
49  * incy         (input) int
50  *              The stride for vector y.
51  *
52  */
53 {
54   /* Routine name */
55   static const char routine_name[] = "BLAS_cge_sum_mv_c_s";
56   int i, j;
57   int xi, yi;
58   int x_starti, y_starti, incxi, incyi;
59   int lda_min;
60   int ai;
61   int incai;
62   int aij;
63   int incaij;
64   int bi;
65   int incbi;
66   int bij;
67   int incbij;
68 
69   const float *a_i = (float *) a;
70   const float *b_i = (float *) b;
71   const float *x_i = x;
72   float *y_i = (float *) y;
73   float *alpha_i = (float *) alpha;
74   float *beta_i = (float *) beta;
75   float a_elem[2];
76   float b_elem[2];
77   float x_elem;
78   float prod[2];
79   float sumA[2];
80   float sumB[2];
81   float tmp1[2];
82   float tmp2[2];
83 
84 
85 
86   /* m is number of rows */
87   /* n is number of columns */
88 
89   if (m == 0 || n == 0)
90     return;
91 
92 
93   /* all error calls */
94   if (order == blas_rowmajor) {
95     lda_min = n;
96     incai = lda;		/* row stride */
97     incbi = ldb;
98     incbij = incaij = 1;	/* column stride */
99   } else if (order == blas_colmajor) {
100     lda_min = m;
101     incai = incbi = 1;		/*row stride */
102     incaij = lda;		/* column stride */
103     incbij = ldb;
104   } else {
105     /* error, order not blas_colmajor not blas_rowmajor */
106     BLAS_error(routine_name, -1, order, 0);
107     return;
108   }
109 
110   if (m < 0)
111     BLAS_error(routine_name, -2, m, 0);
112   else if (n < 0)
113     BLAS_error(routine_name, -3, n, 0);
114   if (lda < lda_min)
115     BLAS_error(routine_name, -6, lda, 0);
116   else if (ldb < lda_min)
117     BLAS_error(routine_name, -11, ldb, 0);
118   else if (incx == 0)
119     BLAS_error(routine_name, -8, incx, 0);
120   else if (incy == 0)
121     BLAS_error(routine_name, -13, incy, 0);
122 
123   incxi = incx;
124   incyi = incy;
125 
126   incyi *= 2;
127   incai *= 2;
128   incaij *= 2;
129   incbi *= 2;
130   incbij *= 2;
131 
132   if (incxi > 0)
133     x_starti = 0;
134   else
135     x_starti = (1 - n) * incxi;
136 
137   if (incyi > 0)
138     y_starti = 0;
139   else
140     y_starti = (1 - m) * incyi;
141 
142 
143 
144   if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
145     if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
146       /* alpha, beta are 0.0 */
147       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
148 	y_i[yi] = 0.0;
149 	y_i[yi + 1] = 0.0;
150       }
151     } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
152       /* alpha is 0.0, beta is 1.0 */
153 
154 
155       bi = 0;
156       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
157 
158 	sumB[0] = sumB[1] = 0.0;
159 	bij = bi;
160 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
161 	  x_elem = x_i[xi];
162 
163 	  b_elem[0] = b_i[bij];
164 	  b_elem[1] = b_i[bij + 1];
165 	  {
166 	    prod[0] = b_elem[0] * x_elem;
167 	    prod[1] = b_elem[1] * x_elem;
168 	  }
169 	  sumB[0] = sumB[0] + prod[0];
170 	  sumB[1] = sumB[1] + prod[1];
171 	  bij += incbij;
172 	}
173 	/* now put the result into y_i */
174 	y_i[yi] = sumB[0];
175 	y_i[yi + 1] = sumB[1];
176 
177 	bi += incbi;
178       }
179     } else {
180       /* alpha is 0.0, beta not 1.0 nor 0.0 */
181 
182 
183       bi = 0;
184       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
185 
186 	sumB[0] = sumB[1] = 0.0;
187 	bij = bi;
188 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
189 	  x_elem = x_i[xi];
190 
191 	  b_elem[0] = b_i[bij];
192 	  b_elem[1] = b_i[bij + 1];
193 	  {
194 	    prod[0] = b_elem[0] * x_elem;
195 	    prod[1] = b_elem[1] * x_elem;
196 	  }
197 	  sumB[0] = sumB[0] + prod[0];
198 	  sumB[1] = sumB[1] + prod[1];
199 	  bij += incbij;
200 	}
201 	/* now put the result into y_i */
202 	{
203 	  tmp1[0] = sumB[0] * beta_i[0] - sumB[1] * beta_i[1];
204 	  tmp1[1] = sumB[0] * beta_i[1] + sumB[1] * beta_i[0];
205 	}
206 
207 	y_i[yi] = tmp1[0];
208 	y_i[yi + 1] = tmp1[1];
209 
210 	bi += incbi;
211       }
212     }
213   } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
214     if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
215       /* alpha is 1.0, beta is 0.0 */
216 
217       ai = 0;
218 
219       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
220 	sumA[0] = sumA[1] = 0.0;
221 	aij = ai;
222 
223 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
224 	  x_elem = x_i[xi];
225 	  a_elem[0] = a_i[aij];
226 	  a_elem[1] = a_i[aij + 1];
227 	  {
228 	    prod[0] = a_elem[0] * x_elem;
229 	    prod[1] = a_elem[1] * x_elem;
230 	  }
231 	  sumA[0] = sumA[0] + prod[0];
232 	  sumA[1] = sumA[1] + prod[1];
233 	  aij += incaij;
234 
235 	}
236 	/* now put the result into y_i */
237 	y_i[yi] = sumA[0];
238 	y_i[yi + 1] = sumA[1];
239 	ai += incai;
240 
241       }
242     } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
243       /* alpha is 1.0, beta is 1.0 */
244 
245       ai = 0;
246       bi = 0;
247       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
248 	sumA[0] = sumA[1] = 0.0;
249 	aij = ai;
250 	sumB[0] = sumB[1] = 0.0;
251 	bij = bi;
252 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
253 	  x_elem = x_i[xi];
254 	  a_elem[0] = a_i[aij];
255 	  a_elem[1] = a_i[aij + 1];
256 	  {
257 	    prod[0] = a_elem[0] * x_elem;
258 	    prod[1] = a_elem[1] * x_elem;
259 	  }
260 	  sumA[0] = sumA[0] + prod[0];
261 	  sumA[1] = sumA[1] + prod[1];
262 	  aij += incaij;
263 	  b_elem[0] = b_i[bij];
264 	  b_elem[1] = b_i[bij + 1];
265 	  {
266 	    prod[0] = b_elem[0] * x_elem;
267 	    prod[1] = b_elem[1] * x_elem;
268 	  }
269 	  sumB[0] = sumB[0] + prod[0];
270 	  sumB[1] = sumB[1] + prod[1];
271 	  bij += incbij;
272 	}
273 	/* now put the result into y_i */
274 	tmp1[0] = sumA[0];
275 	tmp1[1] = sumA[1];
276 	tmp2[0] = sumB[0];
277 	tmp2[1] = sumB[1];
278 	tmp1[0] = tmp1[0] + tmp2[0];
279 	tmp1[1] = tmp1[1] + tmp2[1];
280 	y_i[yi] = tmp1[0];
281 	y_i[yi + 1] = tmp1[1];
282 	ai += incai;
283 	bi += incbi;
284       }
285     } else {
286       /* alpha is 1.0, beta is other */
287 
288       ai = 0;
289       bi = 0;
290       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
291 	sumA[0] = sumA[1] = 0.0;
292 	aij = ai;
293 	sumB[0] = sumB[1] = 0.0;
294 	bij = bi;
295 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
296 	  x_elem = x_i[xi];
297 	  a_elem[0] = a_i[aij];
298 	  a_elem[1] = a_i[aij + 1];
299 	  {
300 	    prod[0] = a_elem[0] * x_elem;
301 	    prod[1] = a_elem[1] * x_elem;
302 	  }
303 	  sumA[0] = sumA[0] + prod[0];
304 	  sumA[1] = sumA[1] + prod[1];
305 	  aij += incaij;
306 	  b_elem[0] = b_i[bij];
307 	  b_elem[1] = b_i[bij + 1];
308 	  {
309 	    prod[0] = b_elem[0] * x_elem;
310 	    prod[1] = b_elem[1] * x_elem;
311 	  }
312 	  sumB[0] = sumB[0] + prod[0];
313 	  sumB[1] = sumB[1] + prod[1];
314 	  bij += incbij;
315 	}
316 	/* now put the result into y_i */
317 	tmp1[0] = sumA[0];
318 	tmp1[1] = sumA[1];
319 	{
320 	  tmp2[0] = sumB[0] * beta_i[0] - sumB[1] * beta_i[1];
321 	  tmp2[1] = sumB[0] * beta_i[1] + sumB[1] * beta_i[0];
322 	}
323 
324 	tmp1[0] = tmp1[0] + tmp2[0];
325 	tmp1[1] = tmp1[1] + tmp2[1];
326 	y_i[yi] = tmp1[0];
327 	y_i[yi + 1] = tmp1[1];
328 	ai += incai;
329 	bi += incbi;
330       }
331     }
332   } else {
333     if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
334       /* alpha is other, beta is 0.0 */
335 
336       ai = 0;
337 
338       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
339 	sumA[0] = sumA[1] = 0.0;
340 	aij = ai;
341 
342 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
343 	  x_elem = x_i[xi];
344 	  a_elem[0] = a_i[aij];
345 	  a_elem[1] = a_i[aij + 1];
346 	  {
347 	    prod[0] = a_elem[0] * x_elem;
348 	    prod[1] = a_elem[1] * x_elem;
349 	  }
350 	  sumA[0] = sumA[0] + prod[0];
351 	  sumA[1] = sumA[1] + prod[1];
352 	  aij += incaij;
353 
354 	}
355 	/* now put the result into y_i */
356 	{
357 	  tmp1[0] = sumA[0] * alpha_i[0] - sumA[1] * alpha_i[1];
358 	  tmp1[1] = sumA[0] * alpha_i[1] + sumA[1] * alpha_i[0];
359 	}
360 
361 	y_i[yi] = tmp1[0];
362 	y_i[yi + 1] = tmp1[1];
363 	ai += incai;
364 
365       }
366     } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
367       /* alpha is other, beta is 1.0 */
368 
369       ai = 0;
370       bi = 0;
371       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
372 	sumA[0] = sumA[1] = 0.0;
373 	aij = ai;
374 	sumB[0] = sumB[1] = 0.0;
375 	bij = bi;
376 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
377 	  x_elem = x_i[xi];
378 	  a_elem[0] = a_i[aij];
379 	  a_elem[1] = a_i[aij + 1];
380 	  {
381 	    prod[0] = a_elem[0] * x_elem;
382 	    prod[1] = a_elem[1] * x_elem;
383 	  }
384 	  sumA[0] = sumA[0] + prod[0];
385 	  sumA[1] = sumA[1] + prod[1];
386 	  aij += incaij;
387 	  b_elem[0] = b_i[bij];
388 	  b_elem[1] = b_i[bij + 1];
389 	  {
390 	    prod[0] = b_elem[0] * x_elem;
391 	    prod[1] = b_elem[1] * x_elem;
392 	  }
393 	  sumB[0] = sumB[0] + prod[0];
394 	  sumB[1] = sumB[1] + prod[1];
395 	  bij += incbij;
396 	}
397 	/* now put the result into y_i */
398 	{
399 	  tmp1[0] = sumA[0] * alpha_i[0] - sumA[1] * alpha_i[1];
400 	  tmp1[1] = sumA[0] * alpha_i[1] + sumA[1] * alpha_i[0];
401 	}
402 
403 	tmp2[0] = sumB[0];
404 	tmp2[1] = sumB[1];
405 	tmp1[0] = tmp1[0] + tmp2[0];
406 	tmp1[1] = tmp1[1] + tmp2[1];
407 	y_i[yi] = tmp1[0];
408 	y_i[yi + 1] = tmp1[1];
409 	ai += incai;
410 	bi += incbi;
411       }
412     } else {
413       /* most general form, alpha, beta are other */
414 
415       ai = 0;
416       bi = 0;
417       for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
418 	sumA[0] = sumA[1] = 0.0;
419 	aij = ai;
420 	sumB[0] = sumB[1] = 0.0;
421 	bij = bi;
422 	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
423 	  x_elem = x_i[xi];
424 	  a_elem[0] = a_i[aij];
425 	  a_elem[1] = a_i[aij + 1];
426 	  {
427 	    prod[0] = a_elem[0] * x_elem;
428 	    prod[1] = a_elem[1] * x_elem;
429 	  }
430 	  sumA[0] = sumA[0] + prod[0];
431 	  sumA[1] = sumA[1] + prod[1];
432 	  aij += incaij;
433 	  b_elem[0] = b_i[bij];
434 	  b_elem[1] = b_i[bij + 1];
435 	  {
436 	    prod[0] = b_elem[0] * x_elem;
437 	    prod[1] = b_elem[1] * x_elem;
438 	  }
439 	  sumB[0] = sumB[0] + prod[0];
440 	  sumB[1] = sumB[1] + prod[1];
441 	  bij += incbij;
442 	}
443 	/* now put the result into y_i */
444 	{
445 	  tmp1[0] = sumA[0] * alpha_i[0] - sumA[1] * alpha_i[1];
446 	  tmp1[1] = sumA[0] * alpha_i[1] + sumA[1] * alpha_i[0];
447 	}
448 
449 	{
450 	  tmp2[0] = sumB[0] * beta_i[0] - sumB[1] * beta_i[1];
451 	  tmp2[1] = sumB[0] * beta_i[1] + sumB[1] * beta_i[0];
452 	}
453 
454 	tmp1[0] = tmp1[0] + tmp2[0];
455 	tmp1[1] = tmp1[1] + tmp2[1];
456 	y_i[yi] = tmp1[0];
457 	y_i[yi + 1] = tmp1[1];
458 	ai += incai;
459 	bi += incbi;
460       }
461     }
462   }
463 
464 
465 }				/* end BLAS_cge_sum_mv_c_s */
466