1 /*******************************************************************************
2  * ALBERTA:  an Adaptive multi Level finite element toolbox using
3  *           Bisectioning refinement and Error control by Residual
4  *           Techniques for scientific Applications
5  *
6  * file:     el_vec.h
7  *
8  * description: support routines for operation on element vector and matrices
9  *
10  *******************************************************************************
11  *
12  *  this file's author(s): Claus-Justus Heine
13  *                         Abteilung fuer Angewandte Mathematik
14  *                         Albert-Ludwigs-Universitaet Freiburg
15  *                         Hermann-Herder-Str. 10
16  *                         D-79104 Freiburg im Breisgau, Germany
17  *
18  *  http://www.alberta-fem.de
19  *
20  *  (c) by C.-J. Heine (2009)
21  *
22  ******************************************************************************/
23 
24 #ifndef _ALBERTA_EL_VEC_H_
25 #define _ALBERTA_EL_VEC_H_
26 
27 #include "alberta.h"
28 
29 /* Initializer for an uninitialized element-vector. */
30 #define DEFUN_INIT_EL_VEC(VECNAME)			\
31   static inline EL_##VECNAME##_VEC *			\
32   CPP_CONCAT3(init_el_, VECNAME##_name, _vec)(		\
33     EL_##VECNAME##_VEC *vec, int size, int size_max)	\
34   {							\
35     vec->n_components     = size;			\
36     vec->n_components_max = size_max;			\
37     vec->reserved         = 1;				\
38     DBL_LIST_INIT(&vec->chain);				\
39     return vec;						\
40   }							\
41   struct _AI_semicolon_dummy
42 
43 DEFUN_INIT_EL_VEC(INT);
44 DEFUN_INIT_EL_VEC(DOF);
45 DEFUN_INIT_EL_VEC(REAL);
46 DEFUN_INIT_EL_VEC(REAL_DD);
47 /*DEFUN_INIT_EL_VEC(REAL_D);*/
48 DEFUN_INIT_EL_VEC(UCHAR);
49 DEFUN_INIT_EL_VEC(SCHAR);
50 DEFUN_INIT_EL_VEC(PTR);
51 
52 static inline EL_REAL_D_VEC *
init_el_real_d_vec(EL_REAL_D_VEC * vec,int size,int size_max)53 init_el_real_d_vec(EL_REAL_D_VEC *vec, int size, int size_max)
54 {
55   vec->n_components     = size;
56   vec->n_components_max = size_max;
57   vec->reserved         = DIM_OF_WORLD;
58   DBL_LIST_INIT(&vec->chain);
59 
60   return vec;
61 }
62 
63 #define EL_VEC_SIZEOF(VECNAME, _size_max)	\
64   (((_size_max) - 1)				\
65    *						\
66    sizeof(EL_##VECNAME##_VEC_TYPE)		\
67    +						\
68    sizeof(EL_##VECNAME##_VEC))
69 
70 /* An automatic element vector, beware that this is just an
71  * uninitialized memory region if _init == false.
72  */
73 #define DEF_EL_VEC_VAR(VECNAME, name, _size, _size_max, _init)	\
74   char _AI_##name##_space[EL_VEC_SIZEOF(VECNAME, (_size_max))];	\
75   EL_##VECNAME##_VEC *name =					\
76     (_init)							\
77     ? CPP_CONCAT3(init_el_, VECNAME##_name, _vec)(		\
78       (EL_##VECNAME##_VEC *)_AI_##name##_space,			\
79       (_size), (_size_max))					\
80     : (EL_##VECNAME##_VEC *)_AI_##name##_space
81 
82 /* Automatic element vector of fixed maximal size. The macro
83  * "size_max" needs to be constant.
84  */
85 #define DEF_EL_VEC_CONST(VECNAME, name, _size, _size_max)		\
86   struct {								\
87       EL_##VECNAME##_VEC el_vec;					\
88       EL_##VECNAME##_VEC_TYPE data[MAX(1, (_size_max)-1)];		\
89   } _AI_##name = {							\
90     {									\
91       (_size),     /* n_components */					\
92       (_size_max), /* n_components_max */				\
93       DBL_LIST_INITIALIZER(_AI_##name.el_vec.chain), /* chain */	\
94       ((sizeof(EL_##VECNAME##_VEC_TYPE) + sizeof(REAL) - 1)		\
95        / sizeof(REAL)), /* reserved (stride) */				\
96     },									\
97   };									\
98   EL_##VECNAME##_VEC *name = &_AI_##name.el_vec
99 
100 /* Allocate and initialize a single, un-chained element vector. */
101 #define ALLOC_EL_VEC(VECNAME, _size, _size_max)		\
102   CPP_CONCAT3(init_el_, VECNAME##_name, _vec)(		\
103     (EL_##VECNAME##_VEC *)MEM_ALLOC(			\
104       EL_VEC_SIZEOF(VECNAME, (_size_max)), char),	\
105     (_size), (_size_max))
106 
107 /* <<< get_dof_indices() */
108 
get_dof_indices(EL_DOF_VEC * dofs,const FE_SPACE * fe_space,const EL * el)109 static inline EL_DOF_VEC *get_dof_indices(EL_DOF_VEC *dofs,
110 					  const FE_SPACE *fe_space,
111 					  const EL *el)
112 {
113   const BAS_FCTS *bas_fcts = fe_space->bas_fcts;
114   const FE_SPACE *fe_chain;
115 
116   if (dofs) {
117     CHAIN_DO(fe_space, const FE_SPACE) {
118       GET_DOF_INDICES(fe_space->bas_fcts, el, fe_space->admin, dofs->vec);
119       dofs->n_components = fe_space->bas_fcts->n_bas_fcts;
120       dofs = CHAIN_NEXT(dofs, EL_DOF_VEC);
121     } CHAIN_WHILE(fe_space, const FE_SPACE);
122   } else {
123     EL_DOF_VEC *chain_dofs;
124 
125     dofs = (EL_DOF_VEC *)GET_DOF_INDICES(bas_fcts, el, fe_space->admin, NULL);
126     dofs->n_components = bas_fcts->n_bas_fcts;
127     DBL_LIST_INIT(&dofs->chain);
128     CHAIN_FOREACH(fe_chain, fe_space, const FE_SPACE) {
129       chain_dofs = (EL_DOF_VEC *)
130 	GET_DOF_INDICES(fe_chain->bas_fcts, el, fe_chain->admin, NULL);
131       chain_dofs->n_components = fe_chain->bas_fcts->n_bas_fcts;
132       CHAIN_ADD_TAIL(dofs, chain_dofs);
133     }
134   }
135   return dofs;
136 }
137 
138 /* >>> */
139 
140 /* <<< get_bound() */
141 
get_bound(EL_BNDRY_VEC * bndry,const BAS_FCTS * bas_fcts,const EL_INFO * el_info)142 static inline EL_BNDRY_VEC *get_bound(EL_BNDRY_VEC *bndry,
143 				      const BAS_FCTS *bas_fcts,
144 				      const EL_INFO *el_info)
145 
146 {
147   const BAS_FCTS *bfcts_chain;
148 
149   if (bndry) {
150     CHAIN_DO(bas_fcts, const BAS_FCTS) {
151       GET_BOUND(bas_fcts, el_info, bndry->vec);
152       bndry->n_components = bas_fcts->n_bas_fcts;
153       bndry = CHAIN_NEXT(bndry, EL_BNDRY_VEC);
154     } CHAIN_WHILE(bas_fcts, const BAS_FCTS);
155   } else {
156     EL_BNDRY_VEC *chain_bndry;
157 
158     bndry = (EL_BNDRY_VEC *)GET_BOUND(bas_fcts, el_info, NULL);
159     bndry->n_components = bas_fcts->n_bas_fcts;
160     DBL_LIST_INIT(&bndry->chain);
161     CHAIN_FOREACH(bfcts_chain, bas_fcts, const BAS_FCTS) {
162       chain_bndry = (EL_BNDRY_VEC *)GET_BOUND(bfcts_chain, el_info, NULL);
163       chain_bndry->n_components = bfcts_chain->n_bas_fcts;
164       CHAIN_ADD_TAIL(bndry, chain_bndry);
165     }
166   }
167   return bndry;
168 }
169 
170 /* >>> */
171 
172 /* <<< interpol() */
173 
174 static inline
el_interpol(EL_REAL_VEC * coeff,const EL_INFO * el_info,int wall,const EL_INT_VEC * indices,LOC_FCT_AT_QP f,void * ud,const BAS_FCTS * bas_fcts)175 void el_interpol(EL_REAL_VEC *coeff,
176 		 const EL_INFO *el_info, int wall,
177 		 const EL_INT_VEC *indices,
178 		 LOC_FCT_AT_QP f, void *ud,
179 		 const BAS_FCTS *bas_fcts)
180 {
181   const BAS_FCTS *bfcts_chain;
182   const int *ind = NULL;
183   int n_ind = -1;
184 
185   if (indices) {
186     ind   = indices->vec;
187     n_ind = indices->n_components;
188   }
189   INTERPOL(bas_fcts, coeff, el_info, wall, n_ind, ind, f, ud);
190   CHAIN_FOREACH(bfcts_chain, bas_fcts, const BAS_FCTS) {
191     if (indices) {
192       indices = CHAIN_NEXT(indices, EL_INT_VEC);
193       ind     = indices->vec;
194       n_ind   = indices->n_components;
195     }
196     coeff   = CHAIN_NEXT(coeff, EL_REAL_VEC);
197     INTERPOL(bfcts_chain, coeff, el_info, wall, n_ind, ind, f, ud);
198   }
199 }
200 
201 /* >>> */
202 
203 /* <<< interpol_dow() */
204 
205 static inline
el_interpol_dow(EL_REAL_VEC_D * coeff,const EL_INFO * el_info,int wall,const EL_INT_VEC * indices,LOC_FCT_D_AT_QP f,void * f_data,const BAS_FCTS * bas_fcts)206 void el_interpol_dow(EL_REAL_VEC_D *coeff,
207 		     const EL_INFO *el_info, int wall,
208 		     const EL_INT_VEC *indices,
209 		     LOC_FCT_D_AT_QP f, void *f_data,
210 		     const BAS_FCTS *bas_fcts)
211 {
212   const BAS_FCTS *bfcts_chain;
213   const int *ind = NULL;
214   int n_ind = -1;
215 
216   if (indices) {
217     ind   = indices->vec;
218     n_ind = indices->n_components;
219   }
220   INTERPOL_DOW(bas_fcts, coeff, el_info, wall, n_ind, ind, f, f_data);
221   CHAIN_FOREACH(bfcts_chain, bas_fcts, const BAS_FCTS) {
222     if (indices) {
223       indices = CHAIN_NEXT(indices, EL_INT_VEC);
224       ind     = indices->vec;
225       n_ind   = indices->n_components;
226     }
227     coeff   = CHAIN_NEXT(coeff, EL_REAL_VEC_D);
228     INTERPOL_DOW(bfcts_chain, coeff, el_info, wall, n_ind, ind, f, f_data);
229   }
230 }
231 
232 /* >>> */
233 
234 /* <<< dirichlet_map() */
235 
__dirichlet_map(S_CHAR * bound,const BNDRY_FLAGS * bndry_bits,int n_components,const BNDRY_FLAGS mask)236 static inline void __dirichlet_map(S_CHAR *bound,
237 				   const BNDRY_FLAGS *bndry_bits,
238 				   int n_components,
239 				   const BNDRY_FLAGS mask)
240 {
241   int i;
242   BNDRY_FLAGS all;
243 
244   if (mask == NULL) {
245     BNDRY_FLAGS_ALL(all);
246     mask = all;
247   }
248 
249   for (i = 0; i < n_components; i++) {
250     if (BNDRY_FLAGS_IS_INTERIOR(bndry_bits[i])) {
251       bound[i] = INTERIOR;
252       continue;
253     }
254     if (BNDRY_FLAGS_IS_PARTOF(bndry_bits[i], mask)) {
255       bound[i] = DIRICHLET;
256     } else {
257       bound[i] = INTERIOR;
258     }
259   }
260 }
261 
dirichlet_map_single(EL_SCHAR_VEC * bound,const EL_BNDRY_VEC * bndry_bits,const BNDRY_FLAGS mask)262 static inline void dirichlet_map_single(EL_SCHAR_VEC *bound,
263 					const EL_BNDRY_VEC *bndry_bits,
264 					const BNDRY_FLAGS mask)
265 {
266   bound->n_components = bndry_bits->n_components;
267 
268   __dirichlet_map(bound->vec, bndry_bits->vec, bound->n_components, mask);
269 }
270 
dirichlet_map(EL_SCHAR_VEC * bound,const EL_BNDRY_VEC * bndry_bits,const BNDRY_FLAGS mask)271 static inline void dirichlet_map(EL_SCHAR_VEC *bound,
272 				 const EL_BNDRY_VEC *bndry_bits,
273 				 const BNDRY_FLAGS mask)
274 {
275   const EL_BNDRY_VEC *bndry_chain;
276 
277   dirichlet_map_single(bound, bndry_bits, mask);
278   CHAIN_FOREACH(bndry_chain, bndry_bits, EL_BNDRY_VEC) {
279     bound = CHAIN_NEXT(bound, EL_SCHAR_VEC);
280     dirichlet_map_single(bound, bndry_chain, mask);
281   }
282 }
283 
284 /* >>> */
285 
286 /* <<< filling el-vecs with data */
287 
288 #define DEFUN_FILL_EL_VEC(VECNAME, vecname)				\
289   static inline const EL_##VECNAME##_VEC *				\
290   fill_el_##vecname##_vec(EL_##VECNAME##_VEC *el_vec,			\
291 			  EL *el,					\
292 			  const DOF_##VECNAME##_VEC *dof_vec)		\
293   {									\
294     const FE_SPACE *fe_space = dof_vec->fe_space;			\
295     const FE_SPACE *fe_chain;						\
296 									\
297     if (el_vec) {							\
298       CHAIN_DO(fe_space, const FE_SPACE) {				\
299 	fe_space->bas_fcts->get_##vecname##_vec(el_vec->vec, el, dof_vec); \
300 	el_vec->n_components = fe_space->bas_fcts->n_bas_fcts;		\
301 	el_vec  = CHAIN_NEXT(el_vec, EL_##VECNAME##_VEC);		\
302 	dof_vec = CHAIN_NEXT(dof_vec, DOF_##VECNAME##_VEC);		\
303       } CHAIN_WHILE(fe_space, const FE_SPACE);				\
304     } else {								\
305       EL_##VECNAME##_VEC *chain_vec;					\
306 									\
307       el_vec = (EL_##VECNAME##_VEC *)					\
308 	fe_space->bas_fcts->get_##vecname##_vec(NULL, el, dof_vec);	\
309       el_vec->n_components = fe_space->bas_fcts->n_bas_fcts;		\
310       DBL_LIST_INIT(&el_vec->chain);					\
311       CHAIN_FOREACH(fe_chain, fe_space, const FE_SPACE) {		\
312 	dof_vec = CHAIN_NEXT(dof_vec, DOF_##VECNAME##_VEC);		\
313 	chain_vec = (EL_##VECNAME##_VEC *)				\
314 	  fe_chain->bas_fcts->get_##vecname##_vec(NULL, el, dof_vec);	\
315 	chain_vec->n_components = fe_chain->bas_fcts->n_bas_fcts;	\
316 	CHAIN_ADD_TAIL(el_vec, chain_vec);				\
317       }									\
318     }									\
319     return el_vec;							\
320   }									\
321   struct _AI_semicolon_dummy
322 
323 DEFUN_FILL_EL_VEC(INT, int);
324 DEFUN_FILL_EL_VEC(REAL, real);
325 DEFUN_FILL_EL_VEC(REAL_D, real_d);
326 DEFUN_FILL_EL_VEC(REAL_DD, real_dd);
327 DEFUN_FILL_EL_VEC(UCHAR, uchar);
328 DEFUN_FILL_EL_VEC(SCHAR, schar);
329 
330 static inline
331 const EL_REAL_VEC_D *
fill_el_real_vec_d(EL_REAL_VEC_D * el_vec,EL * el,const DOF_REAL_VEC_D * dof_vec)332 fill_el_real_vec_d(EL_REAL_VEC_D *el_vec, EL *el, const DOF_REAL_VEC_D *dof_vec)
333 {
334   const FE_SPACE *fe_space = dof_vec->fe_space;
335   const FE_SPACE *fe_chain;
336 
337   if (el_vec) {
338     CHAIN_DO(fe_space, const FE_SPACE) {
339       fe_space->bas_fcts->get_real_vec_d(el_vec->vec, el, dof_vec);
340       el_vec->n_components = fe_space->bas_fcts->n_bas_fcts;
341       el_vec  = CHAIN_NEXT(el_vec, EL_REAL_VEC_D);
342       dof_vec = CHAIN_NEXT(dof_vec, DOF_REAL_VEC_D);
343     } CHAIN_WHILE(fe_space, const FE_SPACE);
344   } else {
345     EL_REAL_VEC_D *el_vec_chain;
346 
347     el_vec = (EL_REAL_VEC_D *)
348       fe_space->bas_fcts->get_real_vec_d(NULL, el, dof_vec);
349     el_vec->n_components = fe_space->bas_fcts->n_bas_fcts;
350     DBL_LIST_INIT(&el_vec->chain);
351     CHAIN_FOREACH(fe_chain, fe_space, const FE_SPACE) {
352       dof_vec = CHAIN_NEXT(dof_vec, DOF_REAL_VEC_D);
353       el_vec_chain =
354 	(EL_REAL_VEC_D *)fe_chain->bas_fcts->get_real_vec_d(NULL, el, dof_vec);
355       el_vec_chain->n_components = fe_chain->bas_fcts->n_bas_fcts;
356       CHAIN_ADD_TAIL(el_vec, el_vec_chain);
357     }
358   }
359 
360   return el_vec;
361 }
362 
363 /* >>> */
364 
365 /* <<< f = c * f + (a A + b B) u, result is an element-vector */
366 
367 /* We have two kinds of functions: those starting with two underscores
368  * are for internal use, they assume fixed data-types and ignore any
369  * chain attached to the respective objects.
370  *
371  * The naming scheme rather treats the beasts as local operators; the
372  * different kinds are distinguished by RangeDim/DomainDim
373  * combinations.
374  *
375  */
376 
377 /* <<< SCAL x SCAL (RANGE x DOMAIN) */
378 
__el_bi_mat_vec(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_VEC * f_h)379 static inline void __el_bi_mat_vec(REAL a, const EL_MATRIX *A,
380 				   REAL b, const EL_MATRIX *B,
381 				   const EL_REAL_VEC *u_h,
382 				   REAL c, EL_REAL_VEC *f_h)
383 {
384   int i, j;
385   REAL sum;
386 
387   if (A && B) {
388     for (i = 0; i < A->n_row; i++) {
389       sum = 0.0;
390       for (j = 0; j < A->n_col; j++) {
391 	sum += (a * A->data.real[i][j] + b * B->data.real[i][j]) * u_h->vec[j];
392       }
393       f_h->vec[i] = c * f_h->vec[i] + sum;
394     }
395   } else {
396     if (B) {
397       A = B;
398       a = b;
399       B = NULL;
400       b = 0.0;
401     }
402     for (i = 0; i < A->n_row; i++) {
403       sum = 0.0;
404       for (j = 0; j < A->n_col; j++) {
405 	sum += a * A->data.real[i][j] * u_h->vec[j];
406       }
407       f_h->vec[i] = c * f_h->vec[i] + sum;
408     }
409   }
410 }
411 
412 __FLATTEN_ATTRIBUTE__
413 static inline
el_bi_mat_vec(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_VEC * f_h)414 EL_REAL_VEC *el_bi_mat_vec(REAL a,
415 			   const EL_MATRIX *A,
416 			   REAL b,
417 			   const EL_MATRIX *B,
418 			   const EL_REAL_VEC *u_h,
419 			   REAL c,
420 			   EL_REAL_VEC *f_h)
421 {
422   const EL_MATRIX *A_chain;
423 
424   if (A == NULL) {
425     A = B;
426     a = b;
427     B = NULL;
428     b = 0.0;
429   }
430   COL_CHAIN_DO(A, const EL_MATRIX) {
431     __el_bi_mat_vec(a, A, b, B, u_h, c, f_h);
432     ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
433       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
434       u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
435       __el_bi_mat_vec(a, A_chain, b, B, u_h, 1.0, f_h);
436     }
437     /* roll-over to start of this row */
438     B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
439     u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
440     /* next row */
441     f_h = CHAIN_NEXT(f_h, EL_REAL_VEC);
442     B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
443   } COL_CHAIN_WHILE(A, const EL_MATRIX);
444 
445   return f_h;
446 }
447 
448 /* >>> */
449 
450 /* <<< SCAL x DOW (RANGE x DOMAIN) */
451 
452 /* <<< REAL x REAL_D, fixed stride */
453 
__el_bi_mat_vec_rrd(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,EL_REAL_VEC * f_h)454 static inline void __el_bi_mat_vec_rrd(REAL a, const EL_MATRIX *A,
455 				       REAL b, const EL_MATRIX *B,
456 				       const EL_REAL_D_VEC *u_h,
457 				       REAL c, EL_REAL_VEC *f_h)
458 {
459   int i, j;
460   REAL sum;
461 
462   /* A and B must be either REAL or REAL_D matrices */
463   if (A && B) {
464     if (A->type == MATENT_REAL) {
465       if (B->type == MATENT_REAL) {
466 	for (j = 0; j < A->n_col; j++) {
467 	  sum = SUM_DOW(u_h->vec[j]);
468 	  for (i = 0; i < A->n_row; i++) {
469 	    f_h->vec[i] =
470 	      c * f_h->vec[i]
471 	      +
472 	      (a * A->data.real[i][j] + b * B->data.real[i][j]) * sum;
473 	  }
474 	}
475 	return;
476       } else {
477 	for (j = 0; j < A->n_col; j++) {
478 	  REAL u_sum = SUM_DOW(u_h->vec[j]);
479 	  for (i = 0; i < A->n_row; i++) {
480 	    f_h->vec[i] =
481 	      c * f_h->vec[i]
482 	      +
483 	      a * A->data.real[i][j] * u_sum
484 	      +
485 	      b * SCP_DOW(B->data.real_d[i][j], u_h->vec[j]);
486 	  }
487 	}
488 	return;
489       }
490     } else {
491       if (B->type == MATENT_REAL) {
492 	__el_bi_mat_vec_rrd(b, B, a, A, u_h, c, f_h);
493       } else {
494 	for (j = 0; j < A->n_col; j++) {
495 	  for (i = 0; i < A->n_row; i++) {
496 	    f_h->vec[i] =
497 	      c * f_h->vec[i]
498 	      +
499 	      a * SCP_DOW(A->data.real_d[i][j], u_h->vec[j])
500 	      +
501 	      b * SCP_DOW(B->data.real_d[i][j], u_h->vec[j]);
502 	  }
503 	}
504       }
505       return;
506     }
507   } else if (A) {
508     if (A->type == MATENT_REAL) {
509       for (j = 0; j < A->n_col; j++) {
510 	sum = SUM_DOW(u_h->vec[j]);
511 	for (i = 0; i < A->n_row; i++) {
512 	  f_h->vec[i] = c * f_h->vec[i] + a * A->data.real[i][j] * sum;
513 	}
514       }
515     } else {
516       for (i = 0; i < A->n_row; i++) {
517 	for (j = 0; j < A->n_col; j++) {
518 	  f_h->vec[i] =
519 	    c * f_h->vec[i] + a * SCP_DOW(A->data.real_d[i][j], u_h->vec[j]);
520 	}
521       }
522     }
523     return;
524   }
525 }
526 
527 __FLATTEN_ATTRIBUTE__
528 static inline
el_bi_mat_vec_rrd(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,EL_REAL_VEC * f_h)529 EL_REAL_VEC *el_bi_mat_vec_rrd(REAL a,
530 			       const EL_MATRIX *A,
531 			       REAL b,
532 			       const EL_MATRIX *B,
533 			       const EL_REAL_D_VEC *u_h,
534 			       REAL c,
535 			       EL_REAL_VEC *f_h)
536 {
537   const EL_MATRIX *A_chain;
538 
539   if (A == NULL) {
540     A = B;
541     a = b;
542     B = NULL;
543     b = 0.0;
544   }
545   COL_CHAIN_DO(A, const EL_MATRIX) {
546     __el_bi_mat_vec_rrd(a, A, b, B, u_h, c, f_h);
547     ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
548       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
549       u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
550       __el_bi_mat_vec_rrd(a, A_chain, b, B, u_h, 1.0, f_h);
551     }
552     /* roll over to start of this row */
553     B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
554     u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
555     /* next row */
556     f_h = CHAIN_NEXT(f_h, EL_REAL_VEC);
557     B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
558   } COL_CHAIN_WHILE(A, const EL_MATRIX);
559 
560   return f_h;
561 }
562 
563 /* >>> */
564 
565 /* <<< REAL x REAL_D, vaiable stride */
566 
567 static inline
__el_bi_mat_vec_scl_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,EL_REAL_VEC * f_h)568 void __el_bi_mat_vec_scl_dow(REAL a,
569 			     const EL_MATRIX *A,
570 			     REAL b,
571 			     const EL_MATRIX *B,
572 			     const EL_REAL_VEC_D *u_h,
573 			     REAL c,
574 			     EL_REAL_VEC *f_h)
575 {
576   if (u_h->stride != 1) {
577     __el_bi_mat_vec_rrd(a, A, b, B, (const EL_REAL_D_VEC *)u_h, c, f_h);
578   } else {
579     __el_bi_mat_vec(a, A, b, B, (const EL_REAL_VEC *)u_h, c, f_h);
580   }
581 }
582 
583 __FLATTEN_ATTRIBUTE__
584 static inline
el_bi_mat_vec_scl_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,EL_REAL_VEC * f_h)585 EL_REAL_VEC *el_bi_mat_vec_scl_dow(REAL a,
586 				   const EL_MATRIX *A,
587 				   REAL b,
588 				   const EL_MATRIX *B,
589 				   const EL_REAL_VEC_D *u_h,
590 				   REAL c,
591 				   EL_REAL_VEC *f_h)
592 {
593   const EL_MATRIX *A_chain;
594   if (A == NULL) {
595     A = B;
596     a = b;
597     B = NULL;
598     b = 0.0;
599   }
600   COL_CHAIN_DO(A, const EL_MATRIX) {
601     __el_bi_mat_vec_scl_dow(a, A, b, B, u_h, c, f_h);
602     ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
603       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
604       u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
605       __el_bi_mat_vec_scl_dow(a, A_chain, b, B, u_h, 1.0, f_h);
606     }
607     /* roll-over to start of row */
608     B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
609     u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
610     /* next row */
611     B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
612     f_h = CHAIN_NEXT(f_h, EL_REAL_VEC);
613   } COL_CHAIN_WHILE(A, const EL_MATRIX);
614 
615   return f_h;
616 }
617 
618 /* >>> */
619 
620 /* >>> */
621 
622 /* <<< DOW x SCAL (RANGE x DOMAIN) */
623 
624 /* <<< READ_D x REAL, fixed stride */
625 
626 static inline void
__el_bi_mat_vec_rdr(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_D_VEC * f_h)627 __el_bi_mat_vec_rdr(REAL a, const EL_MATRIX *A,
628 		    REAL b, const EL_MATRIX *B,
629 		    const EL_REAL_VEC *u_h,
630 		    REAL c, EL_REAL_D_VEC *f_h)
631 {
632   int i, j;
633   REAL val;
634 
635   if (A && B) {
636     if (A->type == MATENT_REAL) {
637       if (B->type == MATENT_REAL) {
638 	for (i = 0; i < A->n_row; i++) {
639 	  val = 0.0;
640 	  for (j = 0; j < A->n_col; j++) {
641 	    val +=
642 	      (a * A->data.real[i][j] + b * B->data.real[i][j]) * u_h->vec[j];
643 	  }
644 	  for (j = 0; j < DIM_OF_WORLD; j++) {
645 	    f_h->vec[i][j] = c * f_h->vec[i][j] + val;
646 	  }
647 	}
648 	return;
649       } else {
650 	for (i = 0; i < A->n_row; i++) {
651 	  val = 0.0;
652 	  for (j = 0; j < A->n_col; j++) {
653 	    val += a * A->data.real[i][j] * u_h->vec[j];
654 	  }
655 	  for (j = 0; j < DIM_OF_WORLD; j++) {
656 	    f_h->vec[i][j] = c * f_h->vec[i][j] + val;
657 	  }
658 	  for (j = 0; j < A->n_col; j++) {
659 	    AXPY_DOW(b * u_h->vec[j], B->data.real_d[i][j], f_h->vec[i]);
660 	  }
661 	}
662 	return;
663       }
664     } else {
665       if (B->type == MATENT_REAL) {
666 	__el_bi_mat_vec_rdr(b, B, a, A, u_h, c, f_h);
667       } else {
668 	for (i = 0; i < A->n_row; i++) {
669 	  for (j = 0; j < DIM_OF_WORLD; j++) {
670 	    f_h->vec[i][j] = c * f_h->vec[i][j];
671 	  }
672 	  for (j = 0; j < A->n_col; j++) {
673 	    AXPY_DOW(a * u_h->vec[j], A->data.real_d[i][j], f_h->vec[i]);
674 	    AXPY_DOW(b * u_h->vec[j], B->data.real_d[i][j], f_h->vec[i]);
675 	  }
676 	}
677       }
678       return;
679     }
680   } else if (A) {
681     if (A->type == MATENT_REAL) {
682       for (i = 0; i < A->n_row; i++) {
683 	val = 0.0;
684 	for (j = 0; j < A->n_col; j++) {
685 	  val += a * A->data.real[i][j] * u_h->vec[j];
686 	}
687 	for (j = 0; j < DIM_OF_WORLD; j++) {
688 	  f_h->vec[i][j] = c * f_h->vec[i][j] + val;
689 	}
690       }
691     } else {
692       for (i = 0; i < A->n_row; i++) {
693 	for (j = 0; j < A->n_col; j++) {
694 	  AXPBY_DOW(a * u_h->vec[j], A->data.real_d[i][j], c, f_h->vec[i],
695 		    f_h->vec[i]);
696 	}
697       }
698     }
699     return;
700   }
701 }
702 
703 __FLATTEN_ATTRIBUTE__
704 static inline
el_bi_mat_vec_rdr(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_D_VEC * f_h)705 EL_REAL_D_VEC *el_bi_mat_vec_rdr(REAL a,
706 				 const EL_MATRIX *A,
707 				 REAL b,
708 				 const EL_MATRIX *B,
709 				 const EL_REAL_VEC *u_h,
710 				 REAL c,
711 				 EL_REAL_D_VEC *f_h)
712 {
713   const EL_MATRIX *A_chain;
714 
715   if (A == NULL) {
716     A = B;
717     a = b;
718     B = NULL;
719     b = 0.0;
720   }
721   COL_CHAIN_DO(A, const EL_MATRIX) {
722     __el_bi_mat_vec_rdr(a, A, b, B, u_h, c, f_h);
723     ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
724       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
725       u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
726       __el_bi_mat_vec_rdr(a, A, b, B, u_h, 1.0, f_h);
727     }
728     /* roll-over to start of row */
729     B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
730     u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
731     /* next row */
732     B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
733     f_h = CHAIN_NEXT(f_h, EL_REAL_D_VEC);
734   } COL_CHAIN_WHILE(A, const EL_MATRIX);
735 
736   return f_h;
737 }
738 
739 /* >>> */
740 
741 /* <<< REAL_D x REAL, variable stride */
742 
743 static inline
__el_bi_mat_vec_dow_scl(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_VEC_D * f_h)744 void __el_bi_mat_vec_dow_scl(REAL a,
745 			     const EL_MATRIX *A,
746 			     REAL b,
747 			     const EL_MATRIX *B,
748 			     const EL_REAL_VEC *u_h,
749 			     REAL c,
750 			     EL_REAL_VEC_D *f_h)
751 {
752   if (f_h->stride != 1) {
753     __el_bi_mat_vec_rdr(a, A, b, B, u_h, c, (EL_REAL_D_VEC *)f_h);
754   } else {
755     __el_bi_mat_vec(a, A, b, B, u_h, c, (EL_REAL_VEC *)f_h);
756   }
757 }
758 
759 __FLATTEN_ATTRIBUTE__
760 static inline
el_bi_mat_vec_dow_scl(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_VEC_D * f_h)761 EL_REAL_VEC_D *el_bi_mat_vec_dow_scl(REAL a,
762 				     const EL_MATRIX *A,
763 				     REAL b,
764 				     const EL_MATRIX *B,
765 				     const EL_REAL_VEC *u_h,
766 				     REAL c,
767 				     EL_REAL_VEC_D *f_h)
768 {
769   const EL_MATRIX *A_chain;
770 
771   if (A == NULL) {
772     A = B;
773     a = b;
774     B = NULL;
775     b = 0.0;
776   }
777   COL_CHAIN_DO(A, const EL_MATRIX) {
778     __el_bi_mat_vec_dow_scl(a, A, b, B, u_h, c, f_h);
779     ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
780       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
781       u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
782       __el_bi_mat_vec_dow_scl(a, A, b, B, u_h, 1.0, f_h);
783     }
784     /* roll-over to start of row */
785     B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
786     u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
787     /* next row */
788     B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
789     f_h = CHAIN_NEXT(f_h, EL_REAL_VEC_D);
790   } COL_CHAIN_WHILE(A, const EL_MATRIX);
791 
792   return f_h;
793 }
794 
795 /* >>> */
796 
797 /* >>> */
798 
799 /* <<< DOW x DOW (RANGE x DOMAIN) */
800 
801 /* <<< fixed stride */
802 
803 /* Multiply an element matrix by an element vector and store the
804  * result inside an element vector.
805  */
806 static inline
__el_bi_mat_vec_d(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,EL_REAL_D_VEC * f_h)807 void __el_bi_mat_vec_d(REAL a, const EL_MATRIX *A,
808 		       REAL b, const EL_MATRIX *B,
809 		       const EL_REAL_D_VEC *u_h,
810 		       REAL c, EL_REAL_D_VEC *f_h)
811 {
812   int i, j;
813   REAL *sum;
814 
815   if (A && B) {
816 #undef MAT_BI_BODY
817 #define MAT_BI_BODY(PFX1, CC1, C1, S1, TYPE1,		\
818 		    PFX2, CC2, C2, S2, TYPE2)		\
819     for (i = 0; i < A->n_row; i++) {			\
820       sum = f_h->vec[i];				\
821       for (j = 0; j < A->n_col; j++) {			\
822 	PFX1##PFX2##BIMV_DOW(a, CC1 A->data.S1[i][j],	\
823 			     b, CC2 B->data.S2[i][j],	\
824 			     u_h->vec[j],		\
825 			     c, sum);			\
826       }							\
827     }
828     MAT_EMIT_BI_BODY_SWITCH(A->type, B->type);
829   } else {
830     if (A == NULL) {
831       A = B;
832       a = b;
833       B = NULL;
834       b = 0.0;
835     }
836 #undef MAT_BODY
837 #define MAT_BODY(PFX, CC, C, S, TYPE)					\
838     for (i = 0; i < A->n_row; i++) {					\
839       sum = f_h->vec[i];						\
840       for (j = 0; j < A->n_col; j++) {					\
841 	PFX##GEMV_DOW(a, CC A->data.S[i][j], u_h->vec[j], c, sum);	\
842       }									\
843     }
844     MAT_EMIT_BODY_SWITCH(A->type);
845   }
846 }
847 
848 __FLATTEN_ATTRIBUTE__
849 static inline
el_bi_mat_vec_d(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,EL_REAL_D_VEC * f_h)850 EL_REAL_D_VEC *el_bi_mat_vec_d(REAL a,
851 			       const EL_MATRIX *A,
852 			       REAL b,
853 			       const EL_MATRIX *B,
854 			       const EL_REAL_D_VEC *u_h,
855 			       REAL c,
856 			       EL_REAL_D_VEC *f_h)
857 {
858   const EL_MATRIX *A_chain;
859 
860   if (A == NULL) {
861     A = B;
862     a = b;
863     B = NULL;
864     b = 0.0;
865   }
866   COL_CHAIN_DO(A, const EL_MATRIX) {
867     __el_bi_mat_vec_d(a, A, b, B, u_h, c, f_h);
868     ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
869       /* next column */
870       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
871       u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
872       __el_bi_mat_vec_d(a, A, b, B, u_h, 1.0, f_h);
873     }
874     /* roll-over to start of row */
875     B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
876     u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
877     /* next row */
878     B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
879     f_h = CHAIN_NEXT(f_h, EL_REAL_D_VEC);
880   } COL_CHAIN_WHILE(A, const EL_MATRIX);
881 
882   return f_h;
883 }
884 
885 /* >>> */
886 
887 /* <<< REAL_D x REAL_D, variable stride */
888 
889 static inline
__el_bi_mat_vec_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,EL_REAL_VEC_D * f_h)890 void __el_bi_mat_vec_dow(REAL a,
891 			 const EL_MATRIX *A,
892 			 REAL b,
893 			 const EL_MATRIX *B,
894 			 const EL_REAL_VEC_D *u_h,
895 			 REAL c,
896 			 EL_REAL_VEC_D *f_h)
897 {
898   if (f_h->stride == 1) {
899     __el_bi_mat_vec_scl_dow(a, A, b, B, u_h, c, (EL_REAL_VEC *)f_h);
900   } else if (u_h->stride == 1) {
901     __el_bi_mat_vec_dow_scl(a, A, b, B, (const EL_REAL_VEC *)u_h, c, f_h);
902   } else {
903     __el_bi_mat_vec_d(a, A, b, B, (const EL_REAL_D_VEC *)u_h,
904 		      c, (EL_REAL_D_VEC *)f_h);
905   }
906 }
907 
908 __FLATTEN_ATTRIBUTE__
909 static inline
el_bi_mat_vec_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,EL_REAL_VEC_D * f_h)910 EL_REAL_VEC_D *el_bi_mat_vec_dow(REAL a,
911 				 const EL_MATRIX *A,
912 				 REAL b,
913 				 const EL_MATRIX *B,
914 				 const EL_REAL_VEC_D *u_h,
915 				 REAL c,
916 				 EL_REAL_VEC_D *f_h)
917 {
918   const EL_MATRIX *A_chain;
919 
920   if (A == NULL) {
921     A = B;
922     a = b;
923     B = NULL;
924     b = 0.0;
925   }
926   COL_CHAIN_DO(A, const EL_MATRIX) {
927     __el_bi_mat_vec_dow(a, A, b, B, u_h, c, f_h);
928     ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
929       /* next column */
930       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
931       u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
932       __el_bi_mat_vec_dow(a, A, b, B, u_h, 1.0, f_h);
933     }
934     /* roll-over to start of row */
935     B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
936     u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
937     /* next row */
938     f_h = CHAIN_NEXT(f_h, EL_REAL_VEC_D);
939     B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
940   } COL_CHAIN_WHILE(A, const EL_MATRIX);
941 
942   return f_h;
943 }
944 
945 /* >>> */
946 
947 /* >>> */
948 
949 /* <<< f = c * f + a A u, f += a A u versions */
950 
951 /* <<< SCL x SCL */
952 
953 static inline EL_REAL_VEC *
el_gen_mat_vec(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,EL_REAL_VEC * f_h)954 el_gen_mat_vec(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
955 	       REAL b, EL_REAL_VEC *f_h)
956 {
957   return el_bi_mat_vec(a, A, 0.0, NULL, u_h, b, f_h);
958 }
959 
960 static inline EL_REAL_VEC *
el_mat_vec(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,EL_REAL_VEC * f_h)961 el_mat_vec(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h, EL_REAL_VEC *f_h)
962 {
963   return el_gen_mat_vec(a, A, u_h, 1.0, f_h);
964 }
965 
966 /* >>> */
967 
968 /* <<< SCL x DOW */
969 
970 static inline EL_REAL_VEC *
el_gen_mat_vec_rrd(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,REAL b,EL_REAL_VEC * f_h)971 el_gen_mat_vec_rrd(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
972 		   REAL b, EL_REAL_VEC *f_h)
973 {
974   return el_bi_mat_vec_rrd(a, A, 0.0, NULL, u_h, b, f_h);
975 }
976 
977 static inline EL_REAL_VEC *
el_mat_vec_rrd(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,EL_REAL_VEC * f_h)978 el_mat_vec_rrd(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
979 	       EL_REAL_VEC *f_h)
980 {
981   return el_gen_mat_vec_rrd(a, A, u_h, 1.0, f_h);
982 }
983 
984 static inline EL_REAL_VEC *
el_gen_mat_vec_scl_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,REAL b,EL_REAL_VEC * f_h)985 el_gen_mat_vec_scl_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
986 		       REAL b, EL_REAL_VEC *f_h)
987 {
988   return el_bi_mat_vec_scl_dow(a, A, 0.0, NULL, u_h, b, f_h);
989 }
990 
991 static inline EL_REAL_VEC *
el_mat_vec_scl_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,EL_REAL_VEC * f_h)992 el_mat_vec_scl_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
993 		   EL_REAL_VEC *f_h)
994 {
995   return el_gen_mat_vec_scl_dow(a, A, u_h, 1.0, f_h);
996 }
997 
998 /* >>> */
999 
1000 /* <<< DOW x SCL */
1001 
1002 static inline EL_REAL_D_VEC *
el_gen_mat_vec_rdr(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,EL_REAL_D_VEC * f_h)1003 el_gen_mat_vec_rdr(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1004 		   REAL b, EL_REAL_D_VEC *f_h)
1005 {
1006   return el_bi_mat_vec_rdr(a, A, 0.0, NULL, u_h, b, f_h);
1007 }
1008 
1009 static inline EL_REAL_D_VEC *
el_mat_vec_rdr(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,EL_REAL_D_VEC * f_h)1010 el_mat_vec_rdr(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1011 	       EL_REAL_D_VEC *f_h)
1012 {
1013   return el_gen_mat_vec_rdr(a, A, u_h, 1.0, f_h);
1014 }
1015 
1016 static inline EL_REAL_VEC_D *
el_gen_mat_vec_dow_scl(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,EL_REAL_VEC_D * f_h)1017 el_gen_mat_vec_dow_scl(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1018 		       REAL b, EL_REAL_VEC_D *f_h)
1019 {
1020   return el_bi_mat_vec_dow_scl(a, A, 0.0, NULL, u_h, b, f_h);
1021 }
1022 
1023 static inline EL_REAL_VEC_D *
el_mat_vec_dow_scl(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,EL_REAL_VEC_D * f_h)1024 el_mat_vec_dow_scl(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1025 		   EL_REAL_VEC_D *f_h)
1026 {
1027   return el_gen_mat_vec_dow_scl(a, A, u_h, 1.0, f_h);
1028 }
1029 
1030 /* >>> */
1031 
1032 /* <<< DOW x DOW */
1033 
1034 static inline EL_REAL_D_VEC *
el_gen_mat_vec_d(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,REAL b,EL_REAL_D_VEC * f_h)1035 el_gen_mat_vec_d(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1036 		 REAL b, EL_REAL_D_VEC *f_h)
1037 {
1038   return el_bi_mat_vec_d(a, A, 0.0, NULL, u_h, b, f_h);
1039 }
1040 
1041 static inline EL_REAL_D_VEC *
el_mat_vec_d(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,EL_REAL_D_VEC * f_h)1042 el_mat_vec_d(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1043 	     EL_REAL_D_VEC *f_h)
1044 {
1045   return el_gen_mat_vec_d(a, A, u_h, 1.0, f_h);
1046 }
1047 
1048 static inline EL_REAL_VEC_D *
el_gen_mat_vec_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,REAL b,EL_REAL_VEC_D * f_h)1049 el_gen_mat_vec_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1050 		   REAL b, EL_REAL_VEC_D *f_h)
1051 {
1052   return el_bi_mat_vec_dow(a, A, 0.0, NULL, u_h, b, f_h);
1053 }
1054 
1055 static inline EL_REAL_VEC_D *
el_mat_vec_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,EL_REAL_VEC_D * f_h)1056 el_mat_vec_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1057 	       EL_REAL_VEC_D *f_h)
1058 {
1059   return el_gen_mat_vec_dow(a, A, u_h, 1.0, f_h);
1060 }
1061 
1062 /* >>> */
1063 
1064 /* >>> */
1065 
1066 /* >>> */
1067 
1068 /* <<< f = c * f + (a A + b B) u, result is a global DOF-vector */
1069 
1070 /* <<< SCAL-SCAL */
1071 
1072 static inline
__bi_mat_el_vec(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1073 void __bi_mat_el_vec(REAL a, const EL_MATRIX *A,
1074 		     REAL b, const EL_MATRIX *B,
1075 		     const EL_REAL_VEC *u_h,
1076 		     REAL c, DOF_REAL_VEC *f_h,
1077 		     const EL_DOF_VEC *dof,
1078 		     const EL_SCHAR_VEC *bnd)
1079 {
1080   int i, j;
1081   REAL sum;
1082 
1083   if (A && B) {
1084     for (i = 0; i < A->n_row; i++) {
1085       if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1086 	sum = 0.0;
1087 	for (j = 0; j < A->n_col; j++) {
1088 	  sum +=
1089 	    (a * A->data.real[i][j] + b * B->data.real[i][j]) * u_h->vec[j];
1090 	}
1091 	f_h->vec[dof->vec[i]] = c * f_h->vec[dof->vec[i]] + sum;
1092       }
1093     }
1094   } else {
1095     if (B) {
1096       A = B;
1097       a = b;
1098       B = NULL;
1099       b = 0.0;
1100     }
1101     for (i = 0; i < A->n_row; i++) {
1102       if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1103 	sum = 0.0;
1104 	for (j = 0; j < A->n_col; j++) {
1105 	  sum += a * A->data.real[i][j] * u_h->vec[j];
1106 	}
1107 	f_h->vec[dof->vec[i]] = c * f_h->vec[dof->vec[i]] + sum;
1108       }
1109     }
1110   }
1111 }
1112 
1113 __FLATTEN_ATTRIBUTE__
1114 static inline
bi_mat_el_vec(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1115 void bi_mat_el_vec(REAL a,
1116 		   const EL_MATRIX *A,
1117 		   REAL b,
1118 		   const EL_MATRIX *B,
1119 		   const EL_REAL_VEC *u_h,
1120 		   REAL c,
1121 		   DOF_REAL_VEC *f_h,
1122 		   const EL_DOF_VEC *dof,
1123 		   const EL_SCHAR_VEC *bnd)
1124 {
1125   if (A == NULL) {
1126     A = B;
1127     a = b;
1128     B = NULL;
1129     b = 0.0;
1130   }
1131   COL_CHAIN_DO(A, const EL_MATRIX) {
1132     ROW_CHAIN_DO(A, const EL_MATRIX) {
1133 
1134       __bi_mat_el_vec(a, A, b, B, u_h, c, f_h, dof, bnd);
1135 
1136       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1137       u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
1138     } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1139     f_h   = CHAIN_NEXT(f_h, DOF_REAL_VEC);
1140     dof   = CHAIN_NEXT(dof, EL_DOF_VEC);
1141     bnd   = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1142     B     = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1143   } COL_CHAIN_WHILE(A, const EL_MATRIX);
1144 }
1145 
1146 /* >>> */
1147 
1148 /* <<< SCAL-DOW version */
1149 
1150 /* <<< fixed stride */
1151 
1152 static inline
__bi_mat_el_vec_rrd(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1153 void __bi_mat_el_vec_rrd(REAL a, const EL_MATRIX *A,
1154 			 REAL b, const EL_MATRIX *B,
1155 			 const EL_REAL_D_VEC *u_h,
1156 			 REAL c, DOF_REAL_VEC *f_h,
1157 			 const EL_DOF_VEC *dof,
1158 			 const EL_SCHAR_VEC *bnd)
1159 {
1160   int i, j;
1161   REAL sum;
1162 
1163   /* A and B must be either REAL or REAL_D matrices */
1164   if (A && B) {
1165     if (A->type == MATENT_REAL) {
1166       if (B->type == MATENT_REAL) {
1167 	for (j = 0; j < A->n_col; j++) {
1168 	  sum = SUM_DOW(u_h->vec[j]);
1169 	  for (i = 0; i < A->n_row; i++) {
1170 	    if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1171 	      f_h->vec[dof->vec[i]] =
1172 		c * f_h->vec[dof->vec[i]]
1173 		+
1174 		(a * A->data.real[i][j] + b * B->data.real[i][j]) * sum;
1175 	    }
1176 	  }
1177 	}
1178 	return;
1179       } else {
1180 	for (j = 0; j < A->n_col; j++) {
1181 	  REAL u_sum = SUM_DOW(u_h->vec[j]);
1182 	  for (i = 0; i < A->n_row; i++) {
1183 	    if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1184 	      f_h->vec[dof->vec[i]] =
1185 		c * f_h->vec[dof->vec[i]]
1186 		+
1187 		a * A->data.real[i][j] * u_sum
1188 		+
1189 		b * SCP_DOW(B->data.real_d[i][j], u_h->vec[j]);
1190 	    }
1191 	  }
1192 	}
1193 	return;
1194       }
1195     } else {
1196       if (B->type == MATENT_REAL) {
1197 	__bi_mat_el_vec_rrd(b, B, a, A, u_h, c, f_h, dof, bnd);
1198       } else {
1199 	for (j = 0; j < A->n_col; j++) {
1200 	  for (i = 0; i < A->n_row; i++) {
1201 	    if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1202 	      f_h->vec[dof->vec[i]] =
1203 		c * f_h->vec[dof->vec[i]]
1204 		+
1205 		a * SCP_DOW(A->data.real_d[i][j], u_h->vec[j])
1206 		+
1207 		b * SCP_DOW(B->data.real_d[i][j], u_h->vec[j]);
1208 	    }
1209 	  }
1210 	}
1211       }
1212       return;
1213     }
1214   } else if (A) {
1215     if (A->type == MATENT_REAL) {
1216       for (j = 0; j < A->n_col; j++) {
1217 	sum = SUM_DOW(u_h->vec[j]);
1218 	for (i = 0; i < A->n_row; i++) {
1219 	  if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1220 	    f_h->vec[dof->vec[i]] =
1221 	      c * f_h->vec[dof->vec[i]] + + a * A->data.real[i][j] * sum;
1222 	  }
1223 	}
1224       }
1225     } else {
1226       for (i = 0; i < A->n_row; i++) {
1227 	for (j = 0; j < A->n_col; j++) {
1228 	  if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1229 	    f_h->vec[dof->vec[i]] =
1230 	      c * f_h->vec[dof->vec[i]]
1231 	      +
1232 	      a * SCP_DOW(A->data.real_d[i][j], u_h->vec[j]);
1233 	  }
1234 	}
1235       }
1236     }
1237     return;
1238   }
1239 }
1240 
1241 __FLATTEN_ATTRIBUTE__
1242 static inline
bi_mat_el_vec_rrd(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1243 void bi_mat_el_vec_rrd(REAL a,
1244 		       const EL_MATRIX *A,
1245 		       REAL b,
1246 		       const EL_MATRIX *B,
1247 		       const EL_REAL_D_VEC *u_h,
1248 		       REAL c,
1249 		       DOF_REAL_VEC *f_h,
1250 		       const EL_DOF_VEC *dof,
1251 		       const EL_SCHAR_VEC *bnd)
1252 {
1253   if (A == NULL) {
1254     A = B;
1255     a = b;
1256     B = NULL;
1257     b = 0.0;
1258   }
1259   COL_CHAIN_DO(A, const EL_MATRIX) {
1260     ROW_CHAIN_DO(A, const EL_MATRIX) {
1261 
1262       __bi_mat_el_vec_rrd(a, A, b, B, u_h, c, f_h, dof, bnd);
1263 
1264       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1265       u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
1266     } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1267     f_h   = CHAIN_NEXT(f_h, DOF_REAL_VEC);
1268     dof   = CHAIN_NEXT(dof, EL_DOF_VEC);
1269     bnd   = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1270     B     = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1271   } COL_CHAIN_WHILE(A, const EL_MATRIX);
1272 }
1273 
1274 /* >>> */
1275 
1276 /* <<< variable stride version */
1277 
1278 static inline
__bi_mat_el_vec_scl_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1279 void __bi_mat_el_vec_scl_dow(REAL a, const EL_MATRIX *A,
1280 			     REAL b, const EL_MATRIX *B,
1281 			     const EL_REAL_VEC_D *u_h,
1282 			     REAL c, DOF_REAL_VEC *f_h,
1283 			     const EL_DOF_VEC *dof,
1284 			     const EL_SCHAR_VEC *bnd)
1285 {
1286   if (u_h->stride != 1) {
1287     __bi_mat_el_vec_rrd(a, A, b, B, (const EL_REAL_D_VEC *)u_h,
1288 			c, f_h, dof, bnd);
1289   } else {
1290     __bi_mat_el_vec(a, A, b, B, (const EL_REAL_VEC *)u_h,
1291 		    c, f_h, dof, bnd);
1292   }
1293 }
1294 
1295 __FLATTEN_ATTRIBUTE__
1296 static inline
bi_mat_el_vec_scl_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1297 void bi_mat_el_vec_scl_dow(REAL a,
1298 			   const EL_MATRIX *A,
1299 			   REAL b,
1300 			   const EL_MATRIX *B,
1301 			   const EL_REAL_VEC_D *u_h,
1302 			   REAL c,
1303 			   DOF_REAL_VEC *f_h,
1304 			   const EL_DOF_VEC *dof,
1305 			   const EL_SCHAR_VEC *bnd)
1306 {
1307   if (A == NULL) {
1308     A = B;
1309     a = b;
1310     B = NULL;
1311     b = 0.0;
1312   }
1313   COL_CHAIN_DO(A, const EL_MATRIX) {
1314     ROW_CHAIN_DO(A, const EL_MATRIX) {
1315 
1316       __bi_mat_el_vec_scl_dow(a, A, b, B, u_h, c, f_h, dof, bnd);
1317 
1318       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1319       u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
1320     } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1321     f_h   = CHAIN_NEXT(f_h, DOF_REAL_VEC);
1322     dof   = CHAIN_NEXT(dof, EL_DOF_VEC);
1323     bnd   = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1324     B     = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1325   } COL_CHAIN_WHILE(A, const EL_MATRIX);
1326 }
1327 
1328 /* >>> */
1329 
1330 /* >>> */
1331 
1332 /* <<< DOW-SCAL version */
1333 
1334 /* <<< fixed stride */
1335 
1336 static inline
__bi_mat_el_vec_rdr(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1337 void __bi_mat_el_vec_rdr(REAL a, const EL_MATRIX *A,
1338 			 REAL b, const EL_MATRIX *B,
1339 			 const EL_REAL_VEC *u_h,
1340 			 REAL c, DOF_REAL_D_VEC *f_h,
1341 			 const EL_DOF_VEC *dof,
1342 			 const EL_SCHAR_VEC *bnd)
1343 {
1344   int i, j;
1345   REAL val;
1346 
1347   if (A && B) {
1348     if (A->type == MATENT_REAL) {
1349       if (B->type == MATENT_REAL) {
1350 	for (i = 0; i < A->n_row; i++) {
1351 	  if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1352 	    val = 0.0;
1353 	    for (j = 0; j < A->n_col; j++) {
1354 	      val +=
1355 		(a * A->data.real[i][j] + b * B->data.real[i][j]) * u_h->vec[j];
1356 	    }
1357 	    for (j = 0; j < DIM_OF_WORLD; j++) {
1358 	      f_h->vec[dof->vec[i]][j] = c * f_h->vec[dof->vec[i]][j] + val;
1359 	    }
1360 	  }
1361 	}
1362 	return;
1363       } else {
1364 	for (i = 0; i < A->n_row; i++) {
1365 	  if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1366 	    val = 0.0;
1367 	    for (j = 0; j < A->n_col; j++) {
1368 	      val += a * A->data.real[i][j] * u_h->vec[j];
1369 	    }
1370 	    for (j = 0; j < DIM_OF_WORLD; j++) {
1371 	      f_h->vec[dof->vec[i]][j] = c * f_h->vec[dof->vec[i]][j] + val;
1372 	    }
1373 	    for (j = 0; j < A->n_col; j++) {
1374 	      AXPY_DOW(b * u_h->vec[j], B->data.real_d[i][j],
1375 		       f_h->vec[dof->vec[i]]);
1376 	    }
1377 	  }
1378 	}
1379 	return;
1380       }
1381     } else {
1382       if (B->type == MATENT_REAL) {
1383 	__bi_mat_el_vec_rdr(b, B, a, A, u_h, c, f_h, dof, bnd);
1384       } else {
1385 	for (i = 0; i < A->n_row; i++) {
1386 	  if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1387 	    for (j = 0; j < DIM_OF_WORLD; j++) {
1388 	      f_h->vec[dof->vec[i]][j] = c * f_h->vec[dof->vec[i]][j];
1389 	    }
1390 	    for (j = 0; j < A->n_col; j++) {
1391 	      AXPY_DOW(a * u_h->vec[j], A->data.real_d[i][j],
1392 		       f_h->vec[dof->vec[i]]);
1393 	      AXPY_DOW(b * u_h->vec[j], B->data.real_d[i][j],
1394 		       f_h->vec[dof->vec[i]]);
1395 	    }
1396 	  }
1397 	}
1398       }
1399     }
1400   } else if (A) {
1401     if (A->type == MATENT_REAL) {
1402       for (i = 0; i < A->n_row; i++) {
1403 	if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1404 	  val = 0.0;
1405 	  for (j = 0; j < A->n_col; j++) {
1406 	    val += a * A->data.real[i][j] * u_h->vec[j];
1407 	  }
1408 	  for (j = 0; j < DIM_OF_WORLD; j++) {
1409 	    f_h->vec[dof->vec[i]][j] = c * f_h->vec[dof->vec[i]][j] + val;
1410 	  }
1411 	}
1412       }
1413     } else {
1414       for (i = 0; i < A->n_row; i++) {
1415 	if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1416 	  for (j = 0; j < A->n_col; j++) {
1417 	    AXPBY_DOW(a * u_h->vec[j], A->data.real_d[i][j],
1418 		      c, f_h->vec[dof->vec[i]],
1419 		      f_h->vec[dof->vec[i]]);
1420 	  }
1421 	}
1422       }
1423     }
1424     return;
1425   }
1426 }
1427 
bi_mat_el_vec_rdr(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1428 static inline void bi_mat_el_vec_rdr(REAL a,
1429 				     const EL_MATRIX *A,
1430 				     REAL b,
1431 				     const EL_MATRIX *B,
1432 				     const EL_REAL_VEC *u_h,
1433 				     REAL c,
1434 				     DOF_REAL_D_VEC *f_h,
1435 				     const EL_DOF_VEC *dof,
1436 				     const EL_SCHAR_VEC *bnd)
1437 {
1438   if (A == NULL) {
1439     A = B;
1440     a = b;
1441     B = NULL;
1442     b = 0.0;
1443   }
1444   COL_CHAIN_DO(A, const EL_MATRIX) {
1445     ROW_CHAIN_DO(A, const EL_MATRIX) {
1446 
1447       __bi_mat_el_vec_rdr(a, A, b, B, u_h, c, f_h, dof, bnd);
1448 
1449       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1450       u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
1451     } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1452     f_h   = CHAIN_NEXT(f_h, DOF_REAL_D_VEC);
1453     dof   = CHAIN_NEXT(dof, EL_DOF_VEC);
1454     bnd   = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1455     B     = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1456   } COL_CHAIN_WHILE(A, const EL_MATRIX);
1457 }
1458 
1459 /* >>> */
1460 
1461 /* <<< variable stride version */
1462 
1463 static inline
__bi_mat_el_vec_dow_scl(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1464 void __bi_mat_el_vec_dow_scl(REAL a, const EL_MATRIX *A,
1465 			     REAL b, const EL_MATRIX *B,
1466 			     const EL_REAL_VEC *u_h,
1467 			     REAL c, DOF_REAL_VEC_D *f_h,
1468 			     const EL_DOF_VEC *dof,
1469 			     const EL_SCHAR_VEC *bnd)
1470 {
1471   if (f_h->stride != 1) {
1472     __bi_mat_el_vec_rdr(a, A, b, B, u_h, c, (DOF_REAL_D_VEC *)f_h, dof, bnd);
1473   } else {
1474     __bi_mat_el_vec(a, A, b, B, u_h, c, (DOF_REAL_VEC *)f_h, dof, bnd);
1475   }
1476 }
1477 
1478 __FLATTEN_ATTRIBUTE__
1479 static inline
bi_mat_el_vec_dow_scl(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1480 void bi_mat_el_vec_dow_scl(REAL a,
1481 			   const EL_MATRIX *A,
1482 			   REAL b,
1483 			   const EL_MATRIX *B,
1484 			   const EL_REAL_VEC *u_h,
1485 			   REAL c,
1486 			   DOF_REAL_VEC_D *f_h,
1487 			   const EL_DOF_VEC *dof,
1488 			   const EL_SCHAR_VEC *bnd)
1489 {
1490   if (A == NULL) {
1491     A = B;
1492     a = b;
1493     B = NULL;
1494     b = 0.0;
1495   }
1496   COL_CHAIN_DO(A, const EL_MATRIX) {
1497     ROW_CHAIN_DO(A, const EL_MATRIX) {
1498 
1499       __bi_mat_el_vec_dow_scl(a, A, b, B, u_h, c, f_h, dof, bnd);
1500 
1501       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1502       u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
1503     } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1504     f_h   = CHAIN_NEXT(f_h, DOF_REAL_VEC_D);
1505     dof   = CHAIN_NEXT(dof, EL_DOF_VEC);
1506     bnd   = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1507     B     = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1508   } COL_CHAIN_WHILE(A, const EL_MATRIX);
1509 }
1510 
1511 /* >>> */
1512 
1513 /* >>> */
1514 
1515 /* <<< DOW-DOW version */
1516 
1517 /* <<< fixed stride */
1518 
1519 /* Multiply an element matrix by an element vector and store the
1520  * result inside an element vector.
1521  */
1522 static inline
__bi_mat_el_vec_d(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1523 void __bi_mat_el_vec_d(REAL a, const EL_MATRIX *A,
1524 		       REAL b, const EL_MATRIX *B,
1525 		       const EL_REAL_D_VEC *u_h,
1526 		       REAL c, DOF_REAL_D_VEC *f_h,
1527 		       const EL_DOF_VEC *dof,
1528 		       const EL_SCHAR_VEC *bnd)
1529 {
1530   int i, j;
1531   REAL *sum;
1532 
1533   if (A && B) {
1534 #undef MAT_BI_BODY
1535 #define MAT_BI_BODY(PFX1, CC1, C1, S1, TYPE1,		\
1536 		    PFX2, CC2, C2, S2, TYPE2)		\
1537     for (i = 0; i < A->n_row; i++) {			\
1538       if (bnd == NULL || bnd->vec[i] < DIRICHLET) {	\
1539 	sum = f_h->vec[dof->vec[i]];			\
1540 	for (j = 0; j < A->n_col; j++) {		\
1541 	  PFX1##PFX2##BIMV_DOW(a, CC1 A->data.S1[i][j],	\
1542 			       b, CC2 B->data.S2[i][j],	\
1543 			       u_h->vec[j],		\
1544 			       c, sum);			\
1545 	}						\
1546       }							\
1547     }
1548     MAT_EMIT_BI_BODY_SWITCH(A->type, B->type);
1549   } else {
1550     if (A == NULL) {
1551       A = B;
1552       a = b;
1553       B = NULL;
1554       b = 0.0;
1555     }
1556 #undef MAT_BODY
1557 #define MAT_BODY(PFX, CC, C, S, TYPE)					\
1558     for (i = 0; i < A->n_row; i++) {					\
1559       if (bnd == NULL || bnd->vec[i] < DIRICHLET) {			\
1560 	sum = f_h->vec[dof->vec[i]];					\
1561 	for (j = 0; j < A->n_col; j++) {				\
1562 	  PFX##GEMV_DOW(a, CC A->data.S[i][j], u_h->vec[j], c, sum);	\
1563 	}								\
1564       }									\
1565     }
1566     MAT_EMIT_BODY_SWITCH(A->type);
1567   }
1568 }
1569 
1570 __FLATTEN_ATTRIBUTE__
1571 static inline
bi_mat_el_vec_d(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1572 void bi_mat_el_vec_d(REAL a,
1573 		     const EL_MATRIX *A,
1574 		     REAL b,
1575 		     const EL_MATRIX *B,
1576 		     const EL_REAL_D_VEC *u_h,
1577 		     REAL c,
1578 		     DOF_REAL_D_VEC *f_h,
1579 		     const EL_DOF_VEC *dof,
1580 		     const EL_SCHAR_VEC *bnd)
1581 {
1582   if (A == NULL) {
1583     A = B;
1584     a = b;
1585     B = NULL;
1586     b = 0.0;
1587   }
1588   COL_CHAIN_DO(A, const EL_MATRIX) {
1589     ROW_CHAIN_DO(A, const EL_MATRIX) {
1590 
1591       __bi_mat_el_vec_d(a, A, b, B, u_h, c, f_h, dof, bnd);
1592 
1593       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1594       u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
1595     } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1596     f_h = CHAIN_NEXT(f_h, DOF_REAL_D_VEC);
1597     dof = CHAIN_NEXT(dof, EL_DOF_VEC);
1598     bnd = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1599     B   = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1600   } COL_CHAIN_WHILE(A, const EL_MATRIX);
1601 }
1602 
1603 /* >>> */
1604 
1605 /* <<< variable stride version */
1606 
1607 static inline
__bi_mat_el_vec_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1608 void __bi_mat_el_vec_dow(REAL a, const EL_MATRIX *A,
1609 			 REAL b, const EL_MATRIX *B,
1610 			 const EL_REAL_VEC_D *u_h,
1611 			 REAL c, DOF_REAL_VEC_D *f_h,
1612 			 const EL_DOF_VEC *dof,
1613 			 const EL_SCHAR_VEC *bnd)
1614 {
1615   if (f_h->stride == 1) {
1616     __bi_mat_el_vec_scl_dow(a, A, b, B, u_h,
1617 			    c, (DOF_REAL_VEC *)f_h, dof, bnd);
1618   } else if (u_h->stride == 1) {
1619     __bi_mat_el_vec_dow_scl(a, A, b, B, (const EL_REAL_VEC *)u_h,
1620 			    c, f_h, dof, bnd);
1621   } else {
1622     __bi_mat_el_vec_d(a, A, b, B, (const EL_REAL_D_VEC *)u_h,
1623 		      c, (DOF_REAL_D_VEC *)f_h, dof, bnd);
1624   }
1625 }
1626 
1627 __FLATTEN_ATTRIBUTE__
1628 static inline
bi_mat_el_vec_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1629 void bi_mat_el_vec_dow(REAL a,
1630 		       const EL_MATRIX *A,
1631 		       REAL b,
1632 		       const EL_MATRIX *B,
1633 		       const EL_REAL_VEC_D *u_h,
1634 		       REAL c,
1635 		       DOF_REAL_VEC_D *f_h,
1636 		       const EL_DOF_VEC *dof,
1637 		       const EL_SCHAR_VEC *bnd)
1638 {
1639   if (A == NULL) {
1640     A = B;
1641     a = b;
1642     B = NULL;
1643     b = 0.0;
1644   }
1645   COL_CHAIN_DO(A, const EL_MATRIX) {
1646     ROW_CHAIN_DO(A, const EL_MATRIX) {
1647 
1648       __bi_mat_el_vec_dow(a, A, b, B, u_h, c, f_h, dof, bnd);
1649 
1650       B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1651       u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
1652     } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1653     f_h = CHAIN_NEXT(f_h, DOF_REAL_VEC_D);
1654     dof = CHAIN_NEXT(dof, EL_DOF_VEC);
1655     bnd = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1656     B   = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1657   } COL_CHAIN_WHILE(A, const EL_MATRIX);
1658 }
1659 
1660 /* >>> */
1661 
1662 /* >>> */
1663 
1664 /* <<< f = c * f + a A u, f += a A u versions */
1665 
1666 /* <<< SCL x SCL */
1667 
1668 static inline void
gen_mat_el_vec(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1669 gen_mat_el_vec(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1670 	       REAL b, DOF_REAL_VEC *f_h,
1671 	       const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1672 {
1673   bi_mat_el_vec(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1674 }
1675 
1676 static inline void
mat_el_vec(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1677 mat_el_vec(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1678 	   DOF_REAL_VEC *f_h, const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1679 {
1680   gen_mat_el_vec(a, A, u_h, 1.0, f_h, dof, bnd);
1681 }
1682 
1683 /* >>> */
1684 
1685 /* <<< DOW x DOW */
1686 
1687 static inline void
gen_mat_el_vec_d(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,REAL b,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1688 gen_mat_el_vec_d(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1689 		 REAL b, DOF_REAL_D_VEC *f_h,
1690 		 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1691 {
1692   bi_mat_el_vec_d(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1693 }
1694 
1695 static inline void
mat_el_vec_d(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1696 mat_el_vec_d(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1697 	     DOF_REAL_D_VEC *f_h,
1698 	     const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1699 {
1700   gen_mat_el_vec_d(a, A, u_h, 1.0, f_h, dof, bnd);
1701 }
1702 
1703 static inline void
gen_mat_el_vec_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,REAL b,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1704 gen_mat_el_vec_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1705 		   REAL b, DOF_REAL_VEC_D *f_h,
1706 		   const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1707 {
1708   bi_mat_el_vec_dow(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1709 }
1710 
1711 static inline void
mat_el_vec_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1712 mat_el_vec_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1713 	       DOF_REAL_VEC_D *f_h,
1714 	       const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1715 {
1716   gen_mat_el_vec_dow(a, A, u_h, 1.0, f_h, dof, bnd);
1717 }
1718 
1719 /* >>> */
1720 
1721 /* <<< SCL x DOW */
1722 
1723 static inline void
gen_mat_el_vec_rrd(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,REAL b,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1724 gen_mat_el_vec_rrd(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1725 		   REAL b, DOF_REAL_VEC *f_h,
1726 		   const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1727 {
1728   bi_mat_el_vec_rrd(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1729 }
1730 
1731 static inline void
mat_el_vec_rrd(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1732 mat_el_vec_rrd(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1733 	       DOF_REAL_VEC *f_h,
1734 	       const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1735 {
1736   gen_mat_el_vec_rrd(a, A, u_h, 1.0, f_h, dof, bnd);
1737 }
1738 
1739 static inline void
gen_mat_el_vec_scl_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,REAL b,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1740 gen_mat_el_vec_scl_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1741 		       REAL b, DOF_REAL_VEC *f_h,
1742 		       const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1743 {
1744   bi_mat_el_vec_scl_dow(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1745 }
1746 
1747 static inline void
mat_el_vec_scl_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1748 mat_el_vec_scl_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1749 		   DOF_REAL_VEC *f_h,
1750 		   const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1751 {
1752   gen_mat_el_vec_scl_dow(a, A, u_h, 1.0, f_h, dof, bnd);
1753 }
1754 
1755 /* >>> */
1756 
1757 /* <<< DOW x SCL */
1758 
1759 static inline void
gen_mat_el_vec_rdr(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1760 gen_mat_el_vec_rdr(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1761 		   REAL b, DOF_REAL_D_VEC *f_h,
1762 		   const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1763 {
1764   bi_mat_el_vec_rdr(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1765 }
1766 
1767 static inline void
mat_el_vec_rdr(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1768 mat_el_vec_rdr(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1769 	       DOF_REAL_D_VEC *f_h,
1770 	       const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1771 {
1772   gen_mat_el_vec_rdr(a, A, u_h, 1.0, f_h, dof, bnd);
1773 }
1774 
1775 static inline void
gen_mat_el_vec_dow_scl(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1776 gen_mat_el_vec_dow_scl(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1777 		       REAL b, DOF_REAL_VEC_D *f_h,
1778 		       const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1779 {
1780   bi_mat_el_vec_dow_scl(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1781 }
1782 
1783 static inline void
mat_el_vec_dow_scl(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1784 mat_el_vec_dow_scl(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1785 		   DOF_REAL_VEC_D *f_h,
1786 		   const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1787 {
1788   gen_mat_el_vec_dow_scl(a, A, u_h, 1.0, f_h, dof, bnd);
1789 }
1790 
1791 /* >>> */
1792 
1793 /* >>> */
1794 
1795 /* >>> */
1796 
1797 /* <<< some blas functions (incomplete) */
1798 
1799 /* <<< axpy, axey, set for EL_MATRIX */
1800 
1801 static inline
__el_mat_set(REAL a,EL_MATRIX * res)1802 EL_MATRIX *__el_mat_set(REAL a, EL_MATRIX *res)
1803 {
1804   int i, j;
1805 
1806 #undef MAT_BODY
1807 #define MAT_BODY(PFX, CC, C, S, TYPE)		\
1808   for (i = 0; i < res->n_row; i++) {		\
1809     for (j = 0; j < res->n_col; j++) {		\
1810       PFX##SET_DOW(a, res->data.S[i][j]);	\
1811     }						\
1812   }
1813   MAT_EMIT_BODY_SWITCH(res->type);
1814 
1815   return res;
1816 }
1817 
el_mat_set(REAL a,EL_MATRIX * res)1818 static inline EL_MATRIX *el_mat_set(REAL a, EL_MATRIX *res)
1819 {
1820   COL_CHAIN_DO(res, EL_MATRIX) {
1821     ROW_CHAIN_DO(res, EL_MATRIX) {
1822       __el_mat_set(a, res);
1823     } ROW_CHAIN_WHILE(res, EL_MATRIX);
1824   } COL_CHAIN_WHILE(res, EL_MATRIX);
1825 
1826   return res;
1827 }
1828 
1829 static inline
__el_mat_axey(REAL a,const EL_MATRIX * A,EL_MATRIX * res)1830 EL_MATRIX *__el_mat_axey(REAL a, const EL_MATRIX *A, EL_MATRIX *res)
1831 {
1832   int i, j;
1833 
1834 #undef MAT_TRI_BODY
1835 #define MAT_TRI_BODY(PFX1, CC1, C1, S1, TYPE1,				\
1836 		     PFX2, CC2, C2, S2, TYPE2)				\
1837   for (i = 0; i < A->n_row; i++) {					\
1838     for (j = 0; j < A->n_col; j++) {					\
1839       PFX1##PFX2##AXEY_DOW(a, CC2 A->data.S2[i][j], res->data.S1[i][j]); \
1840     }									\
1841   }
1842   MAT_EMIT_TRI_BODY_SWITCH(A->type, res->type);
1843 
1844   return res;
1845 }
1846 
1847 static inline EL_MATRIX *
el_mat_axey(REAL a,const EL_MATRIX * A,EL_MATRIX * res)1848 el_mat_axey(REAL a, const EL_MATRIX *A, EL_MATRIX *res)
1849 {
1850   COL_CHAIN_DO(res, EL_MATRIX) {
1851     ROW_CHAIN_DO(res, EL_MATRIX) {
1852       __el_mat_axey(a, A, res);
1853       A = ROW_CHAIN_NEXT(A, const EL_MATRIX);
1854     } ROW_CHAIN_WHILE(res, EL_MATRIX);
1855     A = COL_CHAIN_NEXT(A, const EL_MATRIX);
1856   } COL_CHAIN_WHILE(res, EL_MATRIX);
1857 
1858   return res;
1859 }
1860 
1861 static inline
__el_mat_axpy(REAL a,const EL_MATRIX * A,EL_MATRIX * res)1862 EL_MATRIX *__el_mat_axpy(REAL a, const EL_MATRIX *A, EL_MATRIX *res)
1863 {
1864   int i, j;
1865 
1866 #undef MAT_TRI_BODY
1867 #define MAT_TRI_BODY(PFX1, CC1, C1, S1, TYPE1,				\
1868 		     PFX2, CC2, C2, S2, TYPE2)				\
1869   for (i = 0; i < A->n_row; i++) {					\
1870     for (j = 0; j < A->n_col; j++) {					\
1871       PFX1##PFX2##AXPY_DOW(a, CC2 A->data.S2[i][j], res->data.S1[i][j]); \
1872     }									\
1873   }
1874   MAT_EMIT_TRI_BODY_SWITCH(res->type, A->type);
1875 
1876   return res;
1877 }
1878 
1879 static inline EL_MATRIX *
el_mat_axpy(REAL a,const EL_MATRIX * A,EL_MATRIX * res)1880 el_mat_axpy(REAL a, const EL_MATRIX *A, EL_MATRIX *res)
1881 {
1882   COL_CHAIN_DO(res, EL_MATRIX) {
1883     ROW_CHAIN_DO(res, EL_MATRIX) {
1884       __el_mat_axpy(a, A, res);
1885       A = ROW_CHAIN_NEXT(A, const EL_MATRIX);
1886     } ROW_CHAIN_WHILE(res, EL_MATRIX);
1887     A = COL_CHAIN_NEXT(A, const EL_MATRIX);
1888   } COL_CHAIN_WHILE(res, EL_MATRIX);
1889 
1890   return res;
1891 }
1892 
1893 static inline
M_el_mat_axpby(REAL a,const EL_MATRIX * X,REAL b,const EL_MATRIX * Y,EL_MATRIX * res)1894 EL_MATRIX *M_el_mat_axpby(REAL a, const EL_MATRIX *X,
1895 			  REAL b, const EL_MATRIX *Y, EL_MATRIX *res)
1896 {
1897 
1898   int i, j;
1899 
1900   switch (X->type) {
1901   case MATENT_REAL_DD:
1902     switch (Y->type) {
1903     case MATENT_REAL_DD:
1904       for (i = 0; i < res->n_row; i++) {
1905 	for (j = 0; j < res->n_col; j++) {
1906 	  MMMAXPBY_DOW(a, (const REAL_D *) X->data.real_dd[i][j],
1907 		       b, (const REAL_D *) Y->data.real_dd[i][j],
1908 		       res->data.real_dd[i][j]);
1909 	}
1910       }
1911       break;
1912     case MATENT_REAL_D:
1913       for (i = 0; i < res->n_row; i++) {
1914 	for (j = 0; j < res->n_col; j++) {
1915 	  MMDMAXPBY_DOW(a, (const REAL_D *) X->data.real_dd[i][j],
1916 			b, Y->data.real_d[i][j], res->data.real_dd[i][j]);
1917 	}
1918       }
1919       break;
1920     case MATENT_REAL:
1921       for (i = 0; i < res->n_row; i++) {
1922 	for (j = 0; j < res->n_col; j++) {
1923 	  MMSCMAXPBY_DOW(a, (const REAL_D *) X->data.real_dd[i][j],
1924 			 b, Y->data.real[i][j], res->data.real_dd[i][j]);
1925 	}
1926       }
1927       break;
1928     default:
1929       ERROR_EXIT("!!! unknown MATENT_TYPE: %d", Y->type);
1930       break;
1931     }
1932     break;
1933   case MATENT_REAL_D:
1934     if (Y->type == MATENT_REAL_D) {
1935       for (i = 0; i < res->n_row; i++) {
1936 	for (j = 0; j < res->n_col; j++) {
1937 	  MDMDMAXPBY_DOW(a, X->data.real_d[i][j],
1938 			 b, Y->data.real_d[i][j], res->data.real_dd[i][j]);
1939 	}
1940       }
1941     } else if (Y->type == MATENT_REAL) {
1942       for (i = 0; i < res->n_row; i++) {
1943 	for (j = 0; j < res->n_col; j++) {
1944 	  MDMSCMAXPBY_DOW(a, X->data.real_d[i][j],
1945 			  b, Y->data.real[i][j], res->data.real_dd[i][j]);
1946 	}
1947       }
1948     }
1949     break;
1950   case MATENT_REAL:
1951     if (res->type == MATENT_REAL) {
1952       for (i = 0; i < res->n_row; i++) {
1953 	for (j = 0; j < res->n_col; j++) {
1954 	  MSCMSCMAXPBY_DOW(a, X->data.real[i][j],
1955 			   b, Y->data.real[i][j], res->data.real_dd[i][j]);
1956 	}
1957       }
1958     }
1959     break;
1960   default:
1961     ERROR_EXIT("!!! unknown MATENT_TYPE: %d", X->type);
1962     break;
1963   }
1964 
1965   return res;
1966 }
1967 
1968 static inline
DM_el_mat_axpby(REAL a,const EL_MATRIX * X,REAL b,const EL_MATRIX * Y,EL_MATRIX * res)1969 EL_MATRIX *DM_el_mat_axpby(REAL a, const EL_MATRIX *X,
1970 			   REAL b, const EL_MATRIX *Y, EL_MATRIX *res)
1971 {
1972 
1973   int i, j;
1974 
1975   switch (X->type) {
1976 
1977   case MATENT_REAL_D:
1978     if (Y->type == MATENT_REAL_D) {
1979       for (i = 0; i < res->n_row; i++) {
1980 	for (j = 0; j < res->n_col; j++) {
1981 	  DMDMDMAXPBY_DOW(a, X->data.real_d[i][j],
1982 			  b, Y->data.real_d[i][j], res->data.real_d[i][j]);
1983 	}
1984       }
1985     } else if (Y->type == MATENT_REAL) {
1986       for (i = 0; i < res->n_row; i++) {
1987 	for (j = 0; j < res->n_col; j++) {
1988 	  DMDMSCMAXPBY_DOW(a, X->data.real_d[i][j],
1989 			   b, Y->data.real[i][j], res->data.real_d[i][j]);
1990 	}
1991       }
1992     }
1993     break;
1994   case MATENT_REAL:
1995     if (Y->type == MATENT_REAL) {
1996       for (i = 0; i < res->n_row; i++) {
1997 	for (j = 0; j < res->n_col; j++) {
1998 	  DMSCMSCMAXPBY_DOW(a, X->data.real[i][j],
1999 			    b, Y->data.real[i][j], res->data.real_d[i][j]);
2000 	}
2001       }
2002     }
2003     break;
2004   default:
2005     ERROR_EXIT("!!! unknown MATENT_TYPE: %d", X->type);
2006     break;
2007   }
2008 
2009   return res;
2010 }
2011 
2012 static inline
SCM_el_mat_axpby(REAL a,const EL_MATRIX * X,REAL b,const EL_MATRIX * Y,EL_MATRIX * res)2013 EL_MATRIX *SCM_el_mat_axpby(REAL a, const EL_MATRIX *X,
2014 			    REAL b, const EL_MATRIX *Y, EL_MATRIX *res)
2015 {
2016   int i, j;
2017 
2018   for (i = 0; i < res->n_row; i++) {
2019     for (j = 0; j < res->n_col; j++) {
2020       SCMSCMSCMAXPBY_DOW(a, X->data.real[i][j],
2021 			 b, Y->data.real[i][j], res->data.real[i][j]);
2022     }
2023   }
2024 
2025   return res;
2026 }
2027 
2028 static inline
__el_mat_axpby(REAL a,const EL_MATRIX * X,REAL b,const EL_MATRIX * Y,EL_MATRIX * res)2029 EL_MATRIX *__el_mat_axpby(REAL a, const EL_MATRIX *X,
2030 			  REAL b, const EL_MATRIX *Y, EL_MATRIX *res)
2031 {
2032   FUNCNAME("el_mat_axpby");
2033 
2034 
2035   REAL tmp;
2036   const EL_MATRIX *tmp_mat;
2037 
2038 
2039   TEST_EXIT(res,
2040 	    "please get_el_matrix(result,...) bevore using el_mat_axpby!!!\n");
2041   TEST_EXIT(res->type >= X->type,
2042 	    "MATENT_TYPE of EL_MATRIX *result have to be equal "
2043 	    "or greater then MATENT_TYPE of EL_MATRIX *X!!!\n");
2044   TEST_EXIT(res->type >= Y->type,
2045 	    "MATENT_TYPE of EL_MATRIX *result have to be equal "
2046 	    "or greater then MATENT_TYPE of EL_MATRIX *Y!!!\n");
2047 
2048 
2049   if (Y->type > X->type) {
2050     tmp_mat = X;
2051     tmp = a;
2052     X = Y;
2053     a = b;
2054     Y = tmp_mat;
2055     b = tmp;
2056   }
2057 
2058   switch (res->type) {
2059   case MATENT_REAL_DD:
2060     res = M_el_mat_axpby(a, X, b, Y, res);
2061     break;
2062   case MATENT_REAL_D:
2063     res = DM_el_mat_axpby(a, X, b, Y, res);
2064     break;
2065   case MATENT_REAL:
2066     res = SCM_el_mat_axpby(a, X, b, Y, res);
2067     break;
2068   default:
2069     ERROR_EXIT("!!! unknown MATENT_TYPE: %d", res->type);
2070     break;
2071   }
2072 
2073   return (res);
2074 
2075 }
2076 
2077 static inline EL_MATRIX *
el_mat_axpby(REAL a,const EL_MATRIX * X,REAL b,const EL_MATRIX * Y,EL_MATRIX * res)2078 el_mat_axpby(REAL a, const EL_MATRIX *X,
2079 	     REAL b, const EL_MATRIX *Y, EL_MATRIX *res)
2080 {
2081   COL_CHAIN_DO(res, EL_MATRIX) {
2082     ROW_CHAIN_DO(res, EL_MATRIX) {
2083       __el_mat_axpby(a, X, b, Y, res);
2084       X = ROW_CHAIN_NEXT(X, const EL_MATRIX);
2085       Y = ROW_CHAIN_NEXT(Y, const EL_MATRIX);
2086     } ROW_CHAIN_WHILE(res, EL_MATRIX);
2087     X = COL_CHAIN_NEXT(X, const EL_MATRIX);
2088     Y = COL_CHAIN_NEXT(Y, const EL_MATRIX);
2089   } COL_CHAIN_WHILE(res, EL_MATRIX);
2090 
2091   return res;
2092 }
2093 
2094 /* >>> */
2095 
2096 /* <<< axpy() for element vectors  */
2097 
2098 static inline
__el_real_vec_axpy(REAL a,const EL_REAL_VEC * x,EL_REAL_VEC * y)2099 void __el_real_vec_axpy(REAL a, const EL_REAL_VEC *x, EL_REAL_VEC *y)
2100 {
2101   FUNCNAME("__el_real_vec_axpy");
2102   int i;
2103   DEBUG_TEST_EXIT(x->n_components == y->n_components,
2104 		  "element vector dimensions do not match, x: %d, y: %d.\n",
2105 		  x->n_components, y->n_components);
2106   for (i = 0; i < x->n_components; i++) {
2107     y->vec[i] += a * x->vec[i];
2108   }
2109 }
2110 
2111 __FLATTEN_ATTRIBUTE__
2112 static inline
el_real_vec_axpy(REAL a,const EL_REAL_VEC * x,EL_REAL_VEC * y)2113 void el_real_vec_axpy(REAL a, const EL_REAL_VEC *x, EL_REAL_VEC *y)
2114 {
2115   CHAIN_DO(x, const EL_REAL_VEC) {
2116     __el_real_vec_axpy(a, x, y);
2117     y = CHAIN_NEXT(y, EL_REAL_VEC);
2118   } CHAIN_WHILE(x, const EL_REAL_VEC);
2119 }
2120 
2121 static inline
__el_real_d_vec_axpy(REAL a,const EL_REAL_D_VEC * x,EL_REAL_D_VEC * y)2122 void __el_real_d_vec_axpy(REAL a, const EL_REAL_D_VEC *x, EL_REAL_D_VEC *y)
2123 {
2124   FUNCNAME("__el_real_d_vec_axpy");
2125   int i;
2126   DEBUG_TEST_EXIT(x->n_components == y->n_components,
2127 		  "element vector dimensions do not match, x: %d, y: %d.\n",
2128 		  x->n_components, y->n_components);
2129   for (i = 0; i < x->n_components; i++) {
2130     AXPY_DOW(a, x->vec[i], y->vec[i]);
2131   }
2132 }
2133 
2134 __FLATTEN_ATTRIBUTE__
2135 static inline
el_real_d_vec_axpy(REAL a,const EL_REAL_D_VEC * x,EL_REAL_D_VEC * y)2136 void el_real_d_vec_axpy(REAL a, const EL_REAL_D_VEC *x, EL_REAL_D_VEC *y)
2137 {
2138   CHAIN_DO(x, const EL_REAL_D_VEC) {
2139     __el_real_d_vec_axpy(a, x, y);
2140     y = CHAIN_NEXT(y, EL_REAL_D_VEC);
2141   } CHAIN_WHILE(x, const EL_REAL_D_VEC);
2142 }
2143 
2144 __FLATTEN_ATTRIBUTE__
2145 static inline
el_real_vec_d_axpy(REAL a,const EL_REAL_VEC_D * x,EL_REAL_VEC_D * y)2146 void el_real_vec_d_axpy(REAL a, const EL_REAL_VEC_D *x, EL_REAL_VEC_D *y)
2147 {
2148   CHAIN_DO(x, const EL_REAL_VEC_D) {
2149     if (x->stride == 1) {
2150       __el_real_vec_axpy(a, (const EL_REAL_VEC *)x, (EL_REAL_VEC *)y);
2151     } else {
2152       __el_real_d_vec_axpy(a, (const EL_REAL_D_VEC *)x, (EL_REAL_D_VEC *)y);
2153     }
2154     y = CHAIN_NEXT(y, EL_REAL_VEC_D);
2155   } CHAIN_WHILE(x, const EL_REAL_VEC_D);
2156 }
2157 
2158 /* >>> */
2159 
2160 /* <<< axey() for element vectors  */
2161 
2162 static inline
__el_real_vec_axey(REAL a,const EL_REAL_VEC * x,EL_REAL_VEC * y)2163 void __el_real_vec_axey(REAL a, const EL_REAL_VEC *x, EL_REAL_VEC *y)
2164 {
2165   FUNCNAME("__el_real_vec_axey");
2166   int i;
2167   DEBUG_TEST_EXIT(x->n_components == y->n_components,
2168 		  "element vector dimensions do not match, x: %d, y: %d.\n",
2169 		  x->n_components, y->n_components);
2170   for (i = 0; i < x->n_components; i++) {
2171     y->vec[i] = a * x->vec[i];
2172   }
2173 }
2174 
2175 __FLATTEN_ATTRIBUTE__
2176 static inline
el_real_vec_axey(REAL a,const EL_REAL_VEC * x,EL_REAL_VEC * y)2177 void el_real_vec_axey(REAL a, const EL_REAL_VEC *x, EL_REAL_VEC *y)
2178 {
2179   CHAIN_DO(x, const EL_REAL_VEC) {
2180     __el_real_vec_axey(a, x, y);
2181     y = CHAIN_NEXT(y, EL_REAL_VEC);
2182   } CHAIN_WHILE(x, const EL_REAL_VEC);
2183 }
2184 
2185 static inline
__el_real_d_vec_axey(REAL a,const EL_REAL_D_VEC * x,EL_REAL_D_VEC * y)2186 void __el_real_d_vec_axey(REAL a, const EL_REAL_D_VEC *x, EL_REAL_D_VEC *y)
2187 {
2188   FUNCNAME("__el_real_d_vec_axey");
2189   int i;
2190   DEBUG_TEST_EXIT(x->n_components == y->n_components,
2191 		  "element vector dimensions do not match, x: %d, y: %d.\n",
2192 		  x->n_components, y->n_components);
2193   for (i = 0; i < x->n_components; i++) {
2194     AXEY_DOW(a, x->vec[i], y->vec[i]);
2195   }
2196 }
2197 
2198 __FLATTEN_ATTRIBUTE__
2199 static inline
el_real_d_vec_axey(REAL a,const EL_REAL_D_VEC * x,EL_REAL_D_VEC * y)2200 void el_real_d_vec_axey(REAL a, const EL_REAL_D_VEC *x, EL_REAL_D_VEC *y)
2201 {
2202   CHAIN_DO(x, const EL_REAL_D_VEC) {
2203     __el_real_d_vec_axey(a, x, y);
2204     y = CHAIN_NEXT(y, EL_REAL_D_VEC);
2205   } CHAIN_WHILE(x, const EL_REAL_D_VEC);
2206 }
2207 
2208 __FLATTEN_ATTRIBUTE__
2209 static inline
el_real_vec_d_axey(REAL a,const EL_REAL_VEC_D * x,EL_REAL_VEC_D * y)2210 void el_real_vec_d_axey(REAL a, const EL_REAL_VEC_D *x, EL_REAL_VEC_D *y)
2211 {
2212   CHAIN_DO(x, const EL_REAL_VEC_D) {
2213     if (x->stride == 1) {
2214       __el_real_vec_axey(a, (const EL_REAL_VEC *)x, (EL_REAL_VEC *)y);
2215     } else {
2216       __el_real_d_vec_axey(a, (const EL_REAL_D_VEC *)x, (EL_REAL_D_VEC *)y);
2217     }
2218     y = CHAIN_NEXT(y, EL_REAL_VEC_D);
2219   } CHAIN_WHILE(x, const EL_REAL_VEC_D);
2220 }
2221 
2222 /* >>> */
2223 
2224 /* <<< set() for element vectors  */
2225 
2226 static inline
__el_real_vec_set(REAL a,EL_REAL_VEC * y)2227 void __el_real_vec_set(REAL a, EL_REAL_VEC *y)
2228 {
2229   int i;
2230   for (i = 0; i < y->n_components; i++) {
2231     y->vec[i] = a;
2232   }
2233 }
2234 
2235 __FLATTEN_ATTRIBUTE__
2236 static inline
el_real_vec_set(REAL a,EL_REAL_VEC * y)2237 void el_real_vec_set(REAL a, EL_REAL_VEC *y)
2238 {
2239   CHAIN_DO(y, EL_REAL_VEC) {
2240     __el_real_vec_set(a, y);
2241   } CHAIN_WHILE(y, EL_REAL_VEC);
2242 }
2243 
2244 static inline
__el_real_d_vec_set(REAL a,EL_REAL_D_VEC * y)2245 void __el_real_d_vec_set(REAL a, EL_REAL_D_VEC *y)
2246 {
2247   int i;
2248   for (i = 0; i < y->n_components; i++) {
2249     SET_DOW(a, y->vec[i]);
2250   }
2251 }
2252 
2253 __FLATTEN_ATTRIBUTE__
2254 static inline
el_real_d_vec_set(REAL a,EL_REAL_D_VEC * y)2255 void el_real_d_vec_set(REAL a, EL_REAL_D_VEC *y)
2256 {
2257   CHAIN_DO(y, EL_REAL_D_VEC) {
2258     __el_real_d_vec_set(a, y);
2259   } CHAIN_WHILE(y, EL_REAL_D_VEC);
2260 }
2261 
2262 __FLATTEN_ATTRIBUTE__
2263 static inline
el_real_vec_d_set(REAL a,EL_REAL_VEC_D * y)2264 void el_real_vec_d_set(REAL a, EL_REAL_VEC_D *y)
2265 {
2266   CHAIN_DO(y, EL_REAL_VEC_D) {
2267     if (y->stride == 1) {
2268       __el_real_vec_set(a, (EL_REAL_VEC *)y);
2269     } else {
2270       __el_real_d_vec_set(a, (EL_REAL_D_VEC *)y);
2271     }
2272   } CHAIN_WHILE(y, EL_REAL_VEC_D);
2273 }
2274 
2275 /* >>> */
2276 
2277 /* >>> */
2278 
2279 #endif /* _ALBERTA_EL_VEC_H_ */
2280