1 /*******************************************************************************
2 * ALBERTA: an Adaptive multi Level finite element toolbox using
3 * Bisectioning refinement and Error control by Residual
4 * Techniques for scientific Applications
5 *
6 * file: el_vec.h
7 *
8 * description: support routines for operation on element vector and matrices
9 *
10 *******************************************************************************
11 *
12 * this file's author(s): Claus-Justus Heine
13 * Abteilung fuer Angewandte Mathematik
14 * Albert-Ludwigs-Universitaet Freiburg
15 * Hermann-Herder-Str. 10
16 * D-79104 Freiburg im Breisgau, Germany
17 *
18 * http://www.alberta-fem.de
19 *
20 * (c) by C.-J. Heine (2009)
21 *
22 ******************************************************************************/
23
24 #ifndef _ALBERTA_EL_VEC_H_
25 #define _ALBERTA_EL_VEC_H_
26
27 #include "alberta.h"
28
29 /* Initializer for an uninitialized element-vector. */
30 #define DEFUN_INIT_EL_VEC(VECNAME) \
31 static inline EL_##VECNAME##_VEC * \
32 CPP_CONCAT3(init_el_, VECNAME##_name, _vec)( \
33 EL_##VECNAME##_VEC *vec, int size, int size_max) \
34 { \
35 vec->n_components = size; \
36 vec->n_components_max = size_max; \
37 vec->reserved = 1; \
38 DBL_LIST_INIT(&vec->chain); \
39 return vec; \
40 } \
41 struct _AI_semicolon_dummy
42
43 DEFUN_INIT_EL_VEC(INT);
44 DEFUN_INIT_EL_VEC(DOF);
45 DEFUN_INIT_EL_VEC(REAL);
46 DEFUN_INIT_EL_VEC(REAL_DD);
47 /*DEFUN_INIT_EL_VEC(REAL_D);*/
48 DEFUN_INIT_EL_VEC(UCHAR);
49 DEFUN_INIT_EL_VEC(SCHAR);
50 DEFUN_INIT_EL_VEC(PTR);
51
52 static inline EL_REAL_D_VEC *
init_el_real_d_vec(EL_REAL_D_VEC * vec,int size,int size_max)53 init_el_real_d_vec(EL_REAL_D_VEC *vec, int size, int size_max)
54 {
55 vec->n_components = size;
56 vec->n_components_max = size_max;
57 vec->reserved = DIM_OF_WORLD;
58 DBL_LIST_INIT(&vec->chain);
59
60 return vec;
61 }
62
63 #define EL_VEC_SIZEOF(VECNAME, _size_max) \
64 (((_size_max) - 1) \
65 * \
66 sizeof(EL_##VECNAME##_VEC_TYPE) \
67 + \
68 sizeof(EL_##VECNAME##_VEC))
69
70 /* An automatic element vector, beware that this is just an
71 * uninitialized memory region if _init == false.
72 */
73 #define DEF_EL_VEC_VAR(VECNAME, name, _size, _size_max, _init) \
74 char _AI_##name##_space[EL_VEC_SIZEOF(VECNAME, (_size_max))]; \
75 EL_##VECNAME##_VEC *name = \
76 (_init) \
77 ? CPP_CONCAT3(init_el_, VECNAME##_name, _vec)( \
78 (EL_##VECNAME##_VEC *)_AI_##name##_space, \
79 (_size), (_size_max)) \
80 : (EL_##VECNAME##_VEC *)_AI_##name##_space
81
82 /* Automatic element vector of fixed maximal size. The macro
83 * "size_max" needs to be constant.
84 */
85 #define DEF_EL_VEC_CONST(VECNAME, name, _size, _size_max) \
86 struct { \
87 EL_##VECNAME##_VEC el_vec; \
88 EL_##VECNAME##_VEC_TYPE data[MAX(1, (_size_max)-1)]; \
89 } _AI_##name = { \
90 { \
91 (_size), /* n_components */ \
92 (_size_max), /* n_components_max */ \
93 DBL_LIST_INITIALIZER(_AI_##name.el_vec.chain), /* chain */ \
94 ((sizeof(EL_##VECNAME##_VEC_TYPE) + sizeof(REAL) - 1) \
95 / sizeof(REAL)), /* reserved (stride) */ \
96 }, \
97 }; \
98 EL_##VECNAME##_VEC *name = &_AI_##name.el_vec
99
100 /* Allocate and initialize a single, un-chained element vector. */
101 #define ALLOC_EL_VEC(VECNAME, _size, _size_max) \
102 CPP_CONCAT3(init_el_, VECNAME##_name, _vec)( \
103 (EL_##VECNAME##_VEC *)MEM_ALLOC( \
104 EL_VEC_SIZEOF(VECNAME, (_size_max)), char), \
105 (_size), (_size_max))
106
107 /* <<< get_dof_indices() */
108
get_dof_indices(EL_DOF_VEC * dofs,const FE_SPACE * fe_space,const EL * el)109 static inline EL_DOF_VEC *get_dof_indices(EL_DOF_VEC *dofs,
110 const FE_SPACE *fe_space,
111 const EL *el)
112 {
113 const BAS_FCTS *bas_fcts = fe_space->bas_fcts;
114 const FE_SPACE *fe_chain;
115
116 if (dofs) {
117 CHAIN_DO(fe_space, const FE_SPACE) {
118 GET_DOF_INDICES(fe_space->bas_fcts, el, fe_space->admin, dofs->vec);
119 dofs->n_components = fe_space->bas_fcts->n_bas_fcts;
120 dofs = CHAIN_NEXT(dofs, EL_DOF_VEC);
121 } CHAIN_WHILE(fe_space, const FE_SPACE);
122 } else {
123 EL_DOF_VEC *chain_dofs;
124
125 dofs = (EL_DOF_VEC *)GET_DOF_INDICES(bas_fcts, el, fe_space->admin, NULL);
126 dofs->n_components = bas_fcts->n_bas_fcts;
127 DBL_LIST_INIT(&dofs->chain);
128 CHAIN_FOREACH(fe_chain, fe_space, const FE_SPACE) {
129 chain_dofs = (EL_DOF_VEC *)
130 GET_DOF_INDICES(fe_chain->bas_fcts, el, fe_chain->admin, NULL);
131 chain_dofs->n_components = fe_chain->bas_fcts->n_bas_fcts;
132 CHAIN_ADD_TAIL(dofs, chain_dofs);
133 }
134 }
135 return dofs;
136 }
137
138 /* >>> */
139
140 /* <<< get_bound() */
141
get_bound(EL_BNDRY_VEC * bndry,const BAS_FCTS * bas_fcts,const EL_INFO * el_info)142 static inline EL_BNDRY_VEC *get_bound(EL_BNDRY_VEC *bndry,
143 const BAS_FCTS *bas_fcts,
144 const EL_INFO *el_info)
145
146 {
147 const BAS_FCTS *bfcts_chain;
148
149 if (bndry) {
150 CHAIN_DO(bas_fcts, const BAS_FCTS) {
151 GET_BOUND(bas_fcts, el_info, bndry->vec);
152 bndry->n_components = bas_fcts->n_bas_fcts;
153 bndry = CHAIN_NEXT(bndry, EL_BNDRY_VEC);
154 } CHAIN_WHILE(bas_fcts, const BAS_FCTS);
155 } else {
156 EL_BNDRY_VEC *chain_bndry;
157
158 bndry = (EL_BNDRY_VEC *)GET_BOUND(bas_fcts, el_info, NULL);
159 bndry->n_components = bas_fcts->n_bas_fcts;
160 DBL_LIST_INIT(&bndry->chain);
161 CHAIN_FOREACH(bfcts_chain, bas_fcts, const BAS_FCTS) {
162 chain_bndry = (EL_BNDRY_VEC *)GET_BOUND(bfcts_chain, el_info, NULL);
163 chain_bndry->n_components = bfcts_chain->n_bas_fcts;
164 CHAIN_ADD_TAIL(bndry, chain_bndry);
165 }
166 }
167 return bndry;
168 }
169
170 /* >>> */
171
172 /* <<< interpol() */
173
174 static inline
el_interpol(EL_REAL_VEC * coeff,const EL_INFO * el_info,int wall,const EL_INT_VEC * indices,LOC_FCT_AT_QP f,void * ud,const BAS_FCTS * bas_fcts)175 void el_interpol(EL_REAL_VEC *coeff,
176 const EL_INFO *el_info, int wall,
177 const EL_INT_VEC *indices,
178 LOC_FCT_AT_QP f, void *ud,
179 const BAS_FCTS *bas_fcts)
180 {
181 const BAS_FCTS *bfcts_chain;
182 const int *ind = NULL;
183 int n_ind = -1;
184
185 if (indices) {
186 ind = indices->vec;
187 n_ind = indices->n_components;
188 }
189 INTERPOL(bas_fcts, coeff, el_info, wall, n_ind, ind, f, ud);
190 CHAIN_FOREACH(bfcts_chain, bas_fcts, const BAS_FCTS) {
191 if (indices) {
192 indices = CHAIN_NEXT(indices, EL_INT_VEC);
193 ind = indices->vec;
194 n_ind = indices->n_components;
195 }
196 coeff = CHAIN_NEXT(coeff, EL_REAL_VEC);
197 INTERPOL(bfcts_chain, coeff, el_info, wall, n_ind, ind, f, ud);
198 }
199 }
200
201 /* >>> */
202
203 /* <<< interpol_dow() */
204
205 static inline
el_interpol_dow(EL_REAL_VEC_D * coeff,const EL_INFO * el_info,int wall,const EL_INT_VEC * indices,LOC_FCT_D_AT_QP f,void * f_data,const BAS_FCTS * bas_fcts)206 void el_interpol_dow(EL_REAL_VEC_D *coeff,
207 const EL_INFO *el_info, int wall,
208 const EL_INT_VEC *indices,
209 LOC_FCT_D_AT_QP f, void *f_data,
210 const BAS_FCTS *bas_fcts)
211 {
212 const BAS_FCTS *bfcts_chain;
213 const int *ind = NULL;
214 int n_ind = -1;
215
216 if (indices) {
217 ind = indices->vec;
218 n_ind = indices->n_components;
219 }
220 INTERPOL_DOW(bas_fcts, coeff, el_info, wall, n_ind, ind, f, f_data);
221 CHAIN_FOREACH(bfcts_chain, bas_fcts, const BAS_FCTS) {
222 if (indices) {
223 indices = CHAIN_NEXT(indices, EL_INT_VEC);
224 ind = indices->vec;
225 n_ind = indices->n_components;
226 }
227 coeff = CHAIN_NEXT(coeff, EL_REAL_VEC_D);
228 INTERPOL_DOW(bfcts_chain, coeff, el_info, wall, n_ind, ind, f, f_data);
229 }
230 }
231
232 /* >>> */
233
234 /* <<< dirichlet_map() */
235
__dirichlet_map(S_CHAR * bound,const BNDRY_FLAGS * bndry_bits,int n_components,const BNDRY_FLAGS mask)236 static inline void __dirichlet_map(S_CHAR *bound,
237 const BNDRY_FLAGS *bndry_bits,
238 int n_components,
239 const BNDRY_FLAGS mask)
240 {
241 int i;
242 BNDRY_FLAGS all;
243
244 if (mask == NULL) {
245 BNDRY_FLAGS_ALL(all);
246 mask = all;
247 }
248
249 for (i = 0; i < n_components; i++) {
250 if (BNDRY_FLAGS_IS_INTERIOR(bndry_bits[i])) {
251 bound[i] = INTERIOR;
252 continue;
253 }
254 if (BNDRY_FLAGS_IS_PARTOF(bndry_bits[i], mask)) {
255 bound[i] = DIRICHLET;
256 } else {
257 bound[i] = INTERIOR;
258 }
259 }
260 }
261
dirichlet_map_single(EL_SCHAR_VEC * bound,const EL_BNDRY_VEC * bndry_bits,const BNDRY_FLAGS mask)262 static inline void dirichlet_map_single(EL_SCHAR_VEC *bound,
263 const EL_BNDRY_VEC *bndry_bits,
264 const BNDRY_FLAGS mask)
265 {
266 bound->n_components = bndry_bits->n_components;
267
268 __dirichlet_map(bound->vec, bndry_bits->vec, bound->n_components, mask);
269 }
270
dirichlet_map(EL_SCHAR_VEC * bound,const EL_BNDRY_VEC * bndry_bits,const BNDRY_FLAGS mask)271 static inline void dirichlet_map(EL_SCHAR_VEC *bound,
272 const EL_BNDRY_VEC *bndry_bits,
273 const BNDRY_FLAGS mask)
274 {
275 const EL_BNDRY_VEC *bndry_chain;
276
277 dirichlet_map_single(bound, bndry_bits, mask);
278 CHAIN_FOREACH(bndry_chain, bndry_bits, EL_BNDRY_VEC) {
279 bound = CHAIN_NEXT(bound, EL_SCHAR_VEC);
280 dirichlet_map_single(bound, bndry_chain, mask);
281 }
282 }
283
284 /* >>> */
285
286 /* <<< filling el-vecs with data */
287
288 #define DEFUN_FILL_EL_VEC(VECNAME, vecname) \
289 static inline const EL_##VECNAME##_VEC * \
290 fill_el_##vecname##_vec(EL_##VECNAME##_VEC *el_vec, \
291 EL *el, \
292 const DOF_##VECNAME##_VEC *dof_vec) \
293 { \
294 const FE_SPACE *fe_space = dof_vec->fe_space; \
295 const FE_SPACE *fe_chain; \
296 \
297 if (el_vec) { \
298 CHAIN_DO(fe_space, const FE_SPACE) { \
299 fe_space->bas_fcts->get_##vecname##_vec(el_vec->vec, el, dof_vec); \
300 el_vec->n_components = fe_space->bas_fcts->n_bas_fcts; \
301 el_vec = CHAIN_NEXT(el_vec, EL_##VECNAME##_VEC); \
302 dof_vec = CHAIN_NEXT(dof_vec, DOF_##VECNAME##_VEC); \
303 } CHAIN_WHILE(fe_space, const FE_SPACE); \
304 } else { \
305 EL_##VECNAME##_VEC *chain_vec; \
306 \
307 el_vec = (EL_##VECNAME##_VEC *) \
308 fe_space->bas_fcts->get_##vecname##_vec(NULL, el, dof_vec); \
309 el_vec->n_components = fe_space->bas_fcts->n_bas_fcts; \
310 DBL_LIST_INIT(&el_vec->chain); \
311 CHAIN_FOREACH(fe_chain, fe_space, const FE_SPACE) { \
312 dof_vec = CHAIN_NEXT(dof_vec, DOF_##VECNAME##_VEC); \
313 chain_vec = (EL_##VECNAME##_VEC *) \
314 fe_chain->bas_fcts->get_##vecname##_vec(NULL, el, dof_vec); \
315 chain_vec->n_components = fe_chain->bas_fcts->n_bas_fcts; \
316 CHAIN_ADD_TAIL(el_vec, chain_vec); \
317 } \
318 } \
319 return el_vec; \
320 } \
321 struct _AI_semicolon_dummy
322
323 DEFUN_FILL_EL_VEC(INT, int);
324 DEFUN_FILL_EL_VEC(REAL, real);
325 DEFUN_FILL_EL_VEC(REAL_D, real_d);
326 DEFUN_FILL_EL_VEC(REAL_DD, real_dd);
327 DEFUN_FILL_EL_VEC(UCHAR, uchar);
328 DEFUN_FILL_EL_VEC(SCHAR, schar);
329
330 static inline
331 const EL_REAL_VEC_D *
fill_el_real_vec_d(EL_REAL_VEC_D * el_vec,EL * el,const DOF_REAL_VEC_D * dof_vec)332 fill_el_real_vec_d(EL_REAL_VEC_D *el_vec, EL *el, const DOF_REAL_VEC_D *dof_vec)
333 {
334 const FE_SPACE *fe_space = dof_vec->fe_space;
335 const FE_SPACE *fe_chain;
336
337 if (el_vec) {
338 CHAIN_DO(fe_space, const FE_SPACE) {
339 fe_space->bas_fcts->get_real_vec_d(el_vec->vec, el, dof_vec);
340 el_vec->n_components = fe_space->bas_fcts->n_bas_fcts;
341 el_vec = CHAIN_NEXT(el_vec, EL_REAL_VEC_D);
342 dof_vec = CHAIN_NEXT(dof_vec, DOF_REAL_VEC_D);
343 } CHAIN_WHILE(fe_space, const FE_SPACE);
344 } else {
345 EL_REAL_VEC_D *el_vec_chain;
346
347 el_vec = (EL_REAL_VEC_D *)
348 fe_space->bas_fcts->get_real_vec_d(NULL, el, dof_vec);
349 el_vec->n_components = fe_space->bas_fcts->n_bas_fcts;
350 DBL_LIST_INIT(&el_vec->chain);
351 CHAIN_FOREACH(fe_chain, fe_space, const FE_SPACE) {
352 dof_vec = CHAIN_NEXT(dof_vec, DOF_REAL_VEC_D);
353 el_vec_chain =
354 (EL_REAL_VEC_D *)fe_chain->bas_fcts->get_real_vec_d(NULL, el, dof_vec);
355 el_vec_chain->n_components = fe_chain->bas_fcts->n_bas_fcts;
356 CHAIN_ADD_TAIL(el_vec, el_vec_chain);
357 }
358 }
359
360 return el_vec;
361 }
362
363 /* >>> */
364
365 /* <<< f = c * f + (a A + b B) u, result is an element-vector */
366
367 /* We have two kinds of functions: those starting with two underscores
368 * are for internal use, they assume fixed data-types and ignore any
369 * chain attached to the respective objects.
370 *
371 * The naming scheme rather treats the beasts as local operators; the
372 * different kinds are distinguished by RangeDim/DomainDim
373 * combinations.
374 *
375 */
376
377 /* <<< SCAL x SCAL (RANGE x DOMAIN) */
378
__el_bi_mat_vec(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_VEC * f_h)379 static inline void __el_bi_mat_vec(REAL a, const EL_MATRIX *A,
380 REAL b, const EL_MATRIX *B,
381 const EL_REAL_VEC *u_h,
382 REAL c, EL_REAL_VEC *f_h)
383 {
384 int i, j;
385 REAL sum;
386
387 if (A && B) {
388 for (i = 0; i < A->n_row; i++) {
389 sum = 0.0;
390 for (j = 0; j < A->n_col; j++) {
391 sum += (a * A->data.real[i][j] + b * B->data.real[i][j]) * u_h->vec[j];
392 }
393 f_h->vec[i] = c * f_h->vec[i] + sum;
394 }
395 } else {
396 if (B) {
397 A = B;
398 a = b;
399 B = NULL;
400 b = 0.0;
401 }
402 for (i = 0; i < A->n_row; i++) {
403 sum = 0.0;
404 for (j = 0; j < A->n_col; j++) {
405 sum += a * A->data.real[i][j] * u_h->vec[j];
406 }
407 f_h->vec[i] = c * f_h->vec[i] + sum;
408 }
409 }
410 }
411
412 __FLATTEN_ATTRIBUTE__
413 static inline
el_bi_mat_vec(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_VEC * f_h)414 EL_REAL_VEC *el_bi_mat_vec(REAL a,
415 const EL_MATRIX *A,
416 REAL b,
417 const EL_MATRIX *B,
418 const EL_REAL_VEC *u_h,
419 REAL c,
420 EL_REAL_VEC *f_h)
421 {
422 const EL_MATRIX *A_chain;
423
424 if (A == NULL) {
425 A = B;
426 a = b;
427 B = NULL;
428 b = 0.0;
429 }
430 COL_CHAIN_DO(A, const EL_MATRIX) {
431 __el_bi_mat_vec(a, A, b, B, u_h, c, f_h);
432 ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
433 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
434 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
435 __el_bi_mat_vec(a, A_chain, b, B, u_h, 1.0, f_h);
436 }
437 /* roll-over to start of this row */
438 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
439 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
440 /* next row */
441 f_h = CHAIN_NEXT(f_h, EL_REAL_VEC);
442 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
443 } COL_CHAIN_WHILE(A, const EL_MATRIX);
444
445 return f_h;
446 }
447
448 /* >>> */
449
450 /* <<< SCAL x DOW (RANGE x DOMAIN) */
451
452 /* <<< REAL x REAL_D, fixed stride */
453
__el_bi_mat_vec_rrd(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,EL_REAL_VEC * f_h)454 static inline void __el_bi_mat_vec_rrd(REAL a, const EL_MATRIX *A,
455 REAL b, const EL_MATRIX *B,
456 const EL_REAL_D_VEC *u_h,
457 REAL c, EL_REAL_VEC *f_h)
458 {
459 int i, j;
460 REAL sum;
461
462 /* A and B must be either REAL or REAL_D matrices */
463 if (A && B) {
464 if (A->type == MATENT_REAL) {
465 if (B->type == MATENT_REAL) {
466 for (j = 0; j < A->n_col; j++) {
467 sum = SUM_DOW(u_h->vec[j]);
468 for (i = 0; i < A->n_row; i++) {
469 f_h->vec[i] =
470 c * f_h->vec[i]
471 +
472 (a * A->data.real[i][j] + b * B->data.real[i][j]) * sum;
473 }
474 }
475 return;
476 } else {
477 for (j = 0; j < A->n_col; j++) {
478 REAL u_sum = SUM_DOW(u_h->vec[j]);
479 for (i = 0; i < A->n_row; i++) {
480 f_h->vec[i] =
481 c * f_h->vec[i]
482 +
483 a * A->data.real[i][j] * u_sum
484 +
485 b * SCP_DOW(B->data.real_d[i][j], u_h->vec[j]);
486 }
487 }
488 return;
489 }
490 } else {
491 if (B->type == MATENT_REAL) {
492 __el_bi_mat_vec_rrd(b, B, a, A, u_h, c, f_h);
493 } else {
494 for (j = 0; j < A->n_col; j++) {
495 for (i = 0; i < A->n_row; i++) {
496 f_h->vec[i] =
497 c * f_h->vec[i]
498 +
499 a * SCP_DOW(A->data.real_d[i][j], u_h->vec[j])
500 +
501 b * SCP_DOW(B->data.real_d[i][j], u_h->vec[j]);
502 }
503 }
504 }
505 return;
506 }
507 } else if (A) {
508 if (A->type == MATENT_REAL) {
509 for (j = 0; j < A->n_col; j++) {
510 sum = SUM_DOW(u_h->vec[j]);
511 for (i = 0; i < A->n_row; i++) {
512 f_h->vec[i] = c * f_h->vec[i] + a * A->data.real[i][j] * sum;
513 }
514 }
515 } else {
516 for (i = 0; i < A->n_row; i++) {
517 for (j = 0; j < A->n_col; j++) {
518 f_h->vec[i] =
519 c * f_h->vec[i] + a * SCP_DOW(A->data.real_d[i][j], u_h->vec[j]);
520 }
521 }
522 }
523 return;
524 }
525 }
526
527 __FLATTEN_ATTRIBUTE__
528 static inline
el_bi_mat_vec_rrd(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,EL_REAL_VEC * f_h)529 EL_REAL_VEC *el_bi_mat_vec_rrd(REAL a,
530 const EL_MATRIX *A,
531 REAL b,
532 const EL_MATRIX *B,
533 const EL_REAL_D_VEC *u_h,
534 REAL c,
535 EL_REAL_VEC *f_h)
536 {
537 const EL_MATRIX *A_chain;
538
539 if (A == NULL) {
540 A = B;
541 a = b;
542 B = NULL;
543 b = 0.0;
544 }
545 COL_CHAIN_DO(A, const EL_MATRIX) {
546 __el_bi_mat_vec_rrd(a, A, b, B, u_h, c, f_h);
547 ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
548 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
549 u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
550 __el_bi_mat_vec_rrd(a, A_chain, b, B, u_h, 1.0, f_h);
551 }
552 /* roll over to start of this row */
553 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
554 u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
555 /* next row */
556 f_h = CHAIN_NEXT(f_h, EL_REAL_VEC);
557 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
558 } COL_CHAIN_WHILE(A, const EL_MATRIX);
559
560 return f_h;
561 }
562
563 /* >>> */
564
565 /* <<< REAL x REAL_D, vaiable stride */
566
567 static inline
__el_bi_mat_vec_scl_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,EL_REAL_VEC * f_h)568 void __el_bi_mat_vec_scl_dow(REAL a,
569 const EL_MATRIX *A,
570 REAL b,
571 const EL_MATRIX *B,
572 const EL_REAL_VEC_D *u_h,
573 REAL c,
574 EL_REAL_VEC *f_h)
575 {
576 if (u_h->stride != 1) {
577 __el_bi_mat_vec_rrd(a, A, b, B, (const EL_REAL_D_VEC *)u_h, c, f_h);
578 } else {
579 __el_bi_mat_vec(a, A, b, B, (const EL_REAL_VEC *)u_h, c, f_h);
580 }
581 }
582
583 __FLATTEN_ATTRIBUTE__
584 static inline
el_bi_mat_vec_scl_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,EL_REAL_VEC * f_h)585 EL_REAL_VEC *el_bi_mat_vec_scl_dow(REAL a,
586 const EL_MATRIX *A,
587 REAL b,
588 const EL_MATRIX *B,
589 const EL_REAL_VEC_D *u_h,
590 REAL c,
591 EL_REAL_VEC *f_h)
592 {
593 const EL_MATRIX *A_chain;
594 if (A == NULL) {
595 A = B;
596 a = b;
597 B = NULL;
598 b = 0.0;
599 }
600 COL_CHAIN_DO(A, const EL_MATRIX) {
601 __el_bi_mat_vec_scl_dow(a, A, b, B, u_h, c, f_h);
602 ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
603 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
604 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
605 __el_bi_mat_vec_scl_dow(a, A_chain, b, B, u_h, 1.0, f_h);
606 }
607 /* roll-over to start of row */
608 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
609 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
610 /* next row */
611 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
612 f_h = CHAIN_NEXT(f_h, EL_REAL_VEC);
613 } COL_CHAIN_WHILE(A, const EL_MATRIX);
614
615 return f_h;
616 }
617
618 /* >>> */
619
620 /* >>> */
621
622 /* <<< DOW x SCAL (RANGE x DOMAIN) */
623
624 /* <<< READ_D x REAL, fixed stride */
625
626 static inline void
__el_bi_mat_vec_rdr(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_D_VEC * f_h)627 __el_bi_mat_vec_rdr(REAL a, const EL_MATRIX *A,
628 REAL b, const EL_MATRIX *B,
629 const EL_REAL_VEC *u_h,
630 REAL c, EL_REAL_D_VEC *f_h)
631 {
632 int i, j;
633 REAL val;
634
635 if (A && B) {
636 if (A->type == MATENT_REAL) {
637 if (B->type == MATENT_REAL) {
638 for (i = 0; i < A->n_row; i++) {
639 val = 0.0;
640 for (j = 0; j < A->n_col; j++) {
641 val +=
642 (a * A->data.real[i][j] + b * B->data.real[i][j]) * u_h->vec[j];
643 }
644 for (j = 0; j < DIM_OF_WORLD; j++) {
645 f_h->vec[i][j] = c * f_h->vec[i][j] + val;
646 }
647 }
648 return;
649 } else {
650 for (i = 0; i < A->n_row; i++) {
651 val = 0.0;
652 for (j = 0; j < A->n_col; j++) {
653 val += a * A->data.real[i][j] * u_h->vec[j];
654 }
655 for (j = 0; j < DIM_OF_WORLD; j++) {
656 f_h->vec[i][j] = c * f_h->vec[i][j] + val;
657 }
658 for (j = 0; j < A->n_col; j++) {
659 AXPY_DOW(b * u_h->vec[j], B->data.real_d[i][j], f_h->vec[i]);
660 }
661 }
662 return;
663 }
664 } else {
665 if (B->type == MATENT_REAL) {
666 __el_bi_mat_vec_rdr(b, B, a, A, u_h, c, f_h);
667 } else {
668 for (i = 0; i < A->n_row; i++) {
669 for (j = 0; j < DIM_OF_WORLD; j++) {
670 f_h->vec[i][j] = c * f_h->vec[i][j];
671 }
672 for (j = 0; j < A->n_col; j++) {
673 AXPY_DOW(a * u_h->vec[j], A->data.real_d[i][j], f_h->vec[i]);
674 AXPY_DOW(b * u_h->vec[j], B->data.real_d[i][j], f_h->vec[i]);
675 }
676 }
677 }
678 return;
679 }
680 } else if (A) {
681 if (A->type == MATENT_REAL) {
682 for (i = 0; i < A->n_row; i++) {
683 val = 0.0;
684 for (j = 0; j < A->n_col; j++) {
685 val += a * A->data.real[i][j] * u_h->vec[j];
686 }
687 for (j = 0; j < DIM_OF_WORLD; j++) {
688 f_h->vec[i][j] = c * f_h->vec[i][j] + val;
689 }
690 }
691 } else {
692 for (i = 0; i < A->n_row; i++) {
693 for (j = 0; j < A->n_col; j++) {
694 AXPBY_DOW(a * u_h->vec[j], A->data.real_d[i][j], c, f_h->vec[i],
695 f_h->vec[i]);
696 }
697 }
698 }
699 return;
700 }
701 }
702
703 __FLATTEN_ATTRIBUTE__
704 static inline
el_bi_mat_vec_rdr(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_D_VEC * f_h)705 EL_REAL_D_VEC *el_bi_mat_vec_rdr(REAL a,
706 const EL_MATRIX *A,
707 REAL b,
708 const EL_MATRIX *B,
709 const EL_REAL_VEC *u_h,
710 REAL c,
711 EL_REAL_D_VEC *f_h)
712 {
713 const EL_MATRIX *A_chain;
714
715 if (A == NULL) {
716 A = B;
717 a = b;
718 B = NULL;
719 b = 0.0;
720 }
721 COL_CHAIN_DO(A, const EL_MATRIX) {
722 __el_bi_mat_vec_rdr(a, A, b, B, u_h, c, f_h);
723 ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
724 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
725 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
726 __el_bi_mat_vec_rdr(a, A, b, B, u_h, 1.0, f_h);
727 }
728 /* roll-over to start of row */
729 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
730 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
731 /* next row */
732 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
733 f_h = CHAIN_NEXT(f_h, EL_REAL_D_VEC);
734 } COL_CHAIN_WHILE(A, const EL_MATRIX);
735
736 return f_h;
737 }
738
739 /* >>> */
740
741 /* <<< REAL_D x REAL, variable stride */
742
743 static inline
__el_bi_mat_vec_dow_scl(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_VEC_D * f_h)744 void __el_bi_mat_vec_dow_scl(REAL a,
745 const EL_MATRIX *A,
746 REAL b,
747 const EL_MATRIX *B,
748 const EL_REAL_VEC *u_h,
749 REAL c,
750 EL_REAL_VEC_D *f_h)
751 {
752 if (f_h->stride != 1) {
753 __el_bi_mat_vec_rdr(a, A, b, B, u_h, c, (EL_REAL_D_VEC *)f_h);
754 } else {
755 __el_bi_mat_vec(a, A, b, B, u_h, c, (EL_REAL_VEC *)f_h);
756 }
757 }
758
759 __FLATTEN_ATTRIBUTE__
760 static inline
el_bi_mat_vec_dow_scl(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,EL_REAL_VEC_D * f_h)761 EL_REAL_VEC_D *el_bi_mat_vec_dow_scl(REAL a,
762 const EL_MATRIX *A,
763 REAL b,
764 const EL_MATRIX *B,
765 const EL_REAL_VEC *u_h,
766 REAL c,
767 EL_REAL_VEC_D *f_h)
768 {
769 const EL_MATRIX *A_chain;
770
771 if (A == NULL) {
772 A = B;
773 a = b;
774 B = NULL;
775 b = 0.0;
776 }
777 COL_CHAIN_DO(A, const EL_MATRIX) {
778 __el_bi_mat_vec_dow_scl(a, A, b, B, u_h, c, f_h);
779 ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
780 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
781 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
782 __el_bi_mat_vec_dow_scl(a, A, b, B, u_h, 1.0, f_h);
783 }
784 /* roll-over to start of row */
785 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
786 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
787 /* next row */
788 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
789 f_h = CHAIN_NEXT(f_h, EL_REAL_VEC_D);
790 } COL_CHAIN_WHILE(A, const EL_MATRIX);
791
792 return f_h;
793 }
794
795 /* >>> */
796
797 /* >>> */
798
799 /* <<< DOW x DOW (RANGE x DOMAIN) */
800
801 /* <<< fixed stride */
802
803 /* Multiply an element matrix by an element vector and store the
804 * result inside an element vector.
805 */
806 static inline
__el_bi_mat_vec_d(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,EL_REAL_D_VEC * f_h)807 void __el_bi_mat_vec_d(REAL a, const EL_MATRIX *A,
808 REAL b, const EL_MATRIX *B,
809 const EL_REAL_D_VEC *u_h,
810 REAL c, EL_REAL_D_VEC *f_h)
811 {
812 int i, j;
813 REAL *sum;
814
815 if (A && B) {
816 #undef MAT_BI_BODY
817 #define MAT_BI_BODY(PFX1, CC1, C1, S1, TYPE1, \
818 PFX2, CC2, C2, S2, TYPE2) \
819 for (i = 0; i < A->n_row; i++) { \
820 sum = f_h->vec[i]; \
821 for (j = 0; j < A->n_col; j++) { \
822 PFX1##PFX2##BIMV_DOW(a, CC1 A->data.S1[i][j], \
823 b, CC2 B->data.S2[i][j], \
824 u_h->vec[j], \
825 c, sum); \
826 } \
827 }
828 MAT_EMIT_BI_BODY_SWITCH(A->type, B->type);
829 } else {
830 if (A == NULL) {
831 A = B;
832 a = b;
833 B = NULL;
834 b = 0.0;
835 }
836 #undef MAT_BODY
837 #define MAT_BODY(PFX, CC, C, S, TYPE) \
838 for (i = 0; i < A->n_row; i++) { \
839 sum = f_h->vec[i]; \
840 for (j = 0; j < A->n_col; j++) { \
841 PFX##GEMV_DOW(a, CC A->data.S[i][j], u_h->vec[j], c, sum); \
842 } \
843 }
844 MAT_EMIT_BODY_SWITCH(A->type);
845 }
846 }
847
848 __FLATTEN_ATTRIBUTE__
849 static inline
el_bi_mat_vec_d(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,EL_REAL_D_VEC * f_h)850 EL_REAL_D_VEC *el_bi_mat_vec_d(REAL a,
851 const EL_MATRIX *A,
852 REAL b,
853 const EL_MATRIX *B,
854 const EL_REAL_D_VEC *u_h,
855 REAL c,
856 EL_REAL_D_VEC *f_h)
857 {
858 const EL_MATRIX *A_chain;
859
860 if (A == NULL) {
861 A = B;
862 a = b;
863 B = NULL;
864 b = 0.0;
865 }
866 COL_CHAIN_DO(A, const EL_MATRIX) {
867 __el_bi_mat_vec_d(a, A, b, B, u_h, c, f_h);
868 ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
869 /* next column */
870 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
871 u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
872 __el_bi_mat_vec_d(a, A, b, B, u_h, 1.0, f_h);
873 }
874 /* roll-over to start of row */
875 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
876 u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
877 /* next row */
878 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
879 f_h = CHAIN_NEXT(f_h, EL_REAL_D_VEC);
880 } COL_CHAIN_WHILE(A, const EL_MATRIX);
881
882 return f_h;
883 }
884
885 /* >>> */
886
887 /* <<< REAL_D x REAL_D, variable stride */
888
889 static inline
__el_bi_mat_vec_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,EL_REAL_VEC_D * f_h)890 void __el_bi_mat_vec_dow(REAL a,
891 const EL_MATRIX *A,
892 REAL b,
893 const EL_MATRIX *B,
894 const EL_REAL_VEC_D *u_h,
895 REAL c,
896 EL_REAL_VEC_D *f_h)
897 {
898 if (f_h->stride == 1) {
899 __el_bi_mat_vec_scl_dow(a, A, b, B, u_h, c, (EL_REAL_VEC *)f_h);
900 } else if (u_h->stride == 1) {
901 __el_bi_mat_vec_dow_scl(a, A, b, B, (const EL_REAL_VEC *)u_h, c, f_h);
902 } else {
903 __el_bi_mat_vec_d(a, A, b, B, (const EL_REAL_D_VEC *)u_h,
904 c, (EL_REAL_D_VEC *)f_h);
905 }
906 }
907
908 __FLATTEN_ATTRIBUTE__
909 static inline
el_bi_mat_vec_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,EL_REAL_VEC_D * f_h)910 EL_REAL_VEC_D *el_bi_mat_vec_dow(REAL a,
911 const EL_MATRIX *A,
912 REAL b,
913 const EL_MATRIX *B,
914 const EL_REAL_VEC_D *u_h,
915 REAL c,
916 EL_REAL_VEC_D *f_h)
917 {
918 const EL_MATRIX *A_chain;
919
920 if (A == NULL) {
921 A = B;
922 a = b;
923 B = NULL;
924 b = 0.0;
925 }
926 COL_CHAIN_DO(A, const EL_MATRIX) {
927 __el_bi_mat_vec_dow(a, A, b, B, u_h, c, f_h);
928 ROW_CHAIN_FOREACH(A_chain, A, const EL_MATRIX) {
929 /* next column */
930 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
931 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
932 __el_bi_mat_vec_dow(a, A, b, B, u_h, 1.0, f_h);
933 }
934 /* roll-over to start of row */
935 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
936 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
937 /* next row */
938 f_h = CHAIN_NEXT(f_h, EL_REAL_VEC_D);
939 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
940 } COL_CHAIN_WHILE(A, const EL_MATRIX);
941
942 return f_h;
943 }
944
945 /* >>> */
946
947 /* >>> */
948
949 /* <<< f = c * f + a A u, f += a A u versions */
950
951 /* <<< SCL x SCL */
952
953 static inline EL_REAL_VEC *
el_gen_mat_vec(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,EL_REAL_VEC * f_h)954 el_gen_mat_vec(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
955 REAL b, EL_REAL_VEC *f_h)
956 {
957 return el_bi_mat_vec(a, A, 0.0, NULL, u_h, b, f_h);
958 }
959
960 static inline EL_REAL_VEC *
el_mat_vec(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,EL_REAL_VEC * f_h)961 el_mat_vec(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h, EL_REAL_VEC *f_h)
962 {
963 return el_gen_mat_vec(a, A, u_h, 1.0, f_h);
964 }
965
966 /* >>> */
967
968 /* <<< SCL x DOW */
969
970 static inline EL_REAL_VEC *
el_gen_mat_vec_rrd(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,REAL b,EL_REAL_VEC * f_h)971 el_gen_mat_vec_rrd(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
972 REAL b, EL_REAL_VEC *f_h)
973 {
974 return el_bi_mat_vec_rrd(a, A, 0.0, NULL, u_h, b, f_h);
975 }
976
977 static inline EL_REAL_VEC *
el_mat_vec_rrd(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,EL_REAL_VEC * f_h)978 el_mat_vec_rrd(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
979 EL_REAL_VEC *f_h)
980 {
981 return el_gen_mat_vec_rrd(a, A, u_h, 1.0, f_h);
982 }
983
984 static inline EL_REAL_VEC *
el_gen_mat_vec_scl_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,REAL b,EL_REAL_VEC * f_h)985 el_gen_mat_vec_scl_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
986 REAL b, EL_REAL_VEC *f_h)
987 {
988 return el_bi_mat_vec_scl_dow(a, A, 0.0, NULL, u_h, b, f_h);
989 }
990
991 static inline EL_REAL_VEC *
el_mat_vec_scl_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,EL_REAL_VEC * f_h)992 el_mat_vec_scl_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
993 EL_REAL_VEC *f_h)
994 {
995 return el_gen_mat_vec_scl_dow(a, A, u_h, 1.0, f_h);
996 }
997
998 /* >>> */
999
1000 /* <<< DOW x SCL */
1001
1002 static inline EL_REAL_D_VEC *
el_gen_mat_vec_rdr(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,EL_REAL_D_VEC * f_h)1003 el_gen_mat_vec_rdr(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1004 REAL b, EL_REAL_D_VEC *f_h)
1005 {
1006 return el_bi_mat_vec_rdr(a, A, 0.0, NULL, u_h, b, f_h);
1007 }
1008
1009 static inline EL_REAL_D_VEC *
el_mat_vec_rdr(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,EL_REAL_D_VEC * f_h)1010 el_mat_vec_rdr(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1011 EL_REAL_D_VEC *f_h)
1012 {
1013 return el_gen_mat_vec_rdr(a, A, u_h, 1.0, f_h);
1014 }
1015
1016 static inline EL_REAL_VEC_D *
el_gen_mat_vec_dow_scl(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,EL_REAL_VEC_D * f_h)1017 el_gen_mat_vec_dow_scl(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1018 REAL b, EL_REAL_VEC_D *f_h)
1019 {
1020 return el_bi_mat_vec_dow_scl(a, A, 0.0, NULL, u_h, b, f_h);
1021 }
1022
1023 static inline EL_REAL_VEC_D *
el_mat_vec_dow_scl(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,EL_REAL_VEC_D * f_h)1024 el_mat_vec_dow_scl(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1025 EL_REAL_VEC_D *f_h)
1026 {
1027 return el_gen_mat_vec_dow_scl(a, A, u_h, 1.0, f_h);
1028 }
1029
1030 /* >>> */
1031
1032 /* <<< DOW x DOW */
1033
1034 static inline EL_REAL_D_VEC *
el_gen_mat_vec_d(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,REAL b,EL_REAL_D_VEC * f_h)1035 el_gen_mat_vec_d(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1036 REAL b, EL_REAL_D_VEC *f_h)
1037 {
1038 return el_bi_mat_vec_d(a, A, 0.0, NULL, u_h, b, f_h);
1039 }
1040
1041 static inline EL_REAL_D_VEC *
el_mat_vec_d(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,EL_REAL_D_VEC * f_h)1042 el_mat_vec_d(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1043 EL_REAL_D_VEC *f_h)
1044 {
1045 return el_gen_mat_vec_d(a, A, u_h, 1.0, f_h);
1046 }
1047
1048 static inline EL_REAL_VEC_D *
el_gen_mat_vec_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,REAL b,EL_REAL_VEC_D * f_h)1049 el_gen_mat_vec_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1050 REAL b, EL_REAL_VEC_D *f_h)
1051 {
1052 return el_bi_mat_vec_dow(a, A, 0.0, NULL, u_h, b, f_h);
1053 }
1054
1055 static inline EL_REAL_VEC_D *
el_mat_vec_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,EL_REAL_VEC_D * f_h)1056 el_mat_vec_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1057 EL_REAL_VEC_D *f_h)
1058 {
1059 return el_gen_mat_vec_dow(a, A, u_h, 1.0, f_h);
1060 }
1061
1062 /* >>> */
1063
1064 /* >>> */
1065
1066 /* >>> */
1067
1068 /* <<< f = c * f + (a A + b B) u, result is a global DOF-vector */
1069
1070 /* <<< SCAL-SCAL */
1071
1072 static inline
__bi_mat_el_vec(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1073 void __bi_mat_el_vec(REAL a, const EL_MATRIX *A,
1074 REAL b, const EL_MATRIX *B,
1075 const EL_REAL_VEC *u_h,
1076 REAL c, DOF_REAL_VEC *f_h,
1077 const EL_DOF_VEC *dof,
1078 const EL_SCHAR_VEC *bnd)
1079 {
1080 int i, j;
1081 REAL sum;
1082
1083 if (A && B) {
1084 for (i = 0; i < A->n_row; i++) {
1085 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1086 sum = 0.0;
1087 for (j = 0; j < A->n_col; j++) {
1088 sum +=
1089 (a * A->data.real[i][j] + b * B->data.real[i][j]) * u_h->vec[j];
1090 }
1091 f_h->vec[dof->vec[i]] = c * f_h->vec[dof->vec[i]] + sum;
1092 }
1093 }
1094 } else {
1095 if (B) {
1096 A = B;
1097 a = b;
1098 B = NULL;
1099 b = 0.0;
1100 }
1101 for (i = 0; i < A->n_row; i++) {
1102 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1103 sum = 0.0;
1104 for (j = 0; j < A->n_col; j++) {
1105 sum += a * A->data.real[i][j] * u_h->vec[j];
1106 }
1107 f_h->vec[dof->vec[i]] = c * f_h->vec[dof->vec[i]] + sum;
1108 }
1109 }
1110 }
1111 }
1112
1113 __FLATTEN_ATTRIBUTE__
1114 static inline
bi_mat_el_vec(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1115 void bi_mat_el_vec(REAL a,
1116 const EL_MATRIX *A,
1117 REAL b,
1118 const EL_MATRIX *B,
1119 const EL_REAL_VEC *u_h,
1120 REAL c,
1121 DOF_REAL_VEC *f_h,
1122 const EL_DOF_VEC *dof,
1123 const EL_SCHAR_VEC *bnd)
1124 {
1125 if (A == NULL) {
1126 A = B;
1127 a = b;
1128 B = NULL;
1129 b = 0.0;
1130 }
1131 COL_CHAIN_DO(A, const EL_MATRIX) {
1132 ROW_CHAIN_DO(A, const EL_MATRIX) {
1133
1134 __bi_mat_el_vec(a, A, b, B, u_h, c, f_h, dof, bnd);
1135
1136 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1137 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
1138 } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1139 f_h = CHAIN_NEXT(f_h, DOF_REAL_VEC);
1140 dof = CHAIN_NEXT(dof, EL_DOF_VEC);
1141 bnd = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1142 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1143 } COL_CHAIN_WHILE(A, const EL_MATRIX);
1144 }
1145
1146 /* >>> */
1147
1148 /* <<< SCAL-DOW version */
1149
1150 /* <<< fixed stride */
1151
1152 static inline
__bi_mat_el_vec_rrd(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1153 void __bi_mat_el_vec_rrd(REAL a, const EL_MATRIX *A,
1154 REAL b, const EL_MATRIX *B,
1155 const EL_REAL_D_VEC *u_h,
1156 REAL c, DOF_REAL_VEC *f_h,
1157 const EL_DOF_VEC *dof,
1158 const EL_SCHAR_VEC *bnd)
1159 {
1160 int i, j;
1161 REAL sum;
1162
1163 /* A and B must be either REAL or REAL_D matrices */
1164 if (A && B) {
1165 if (A->type == MATENT_REAL) {
1166 if (B->type == MATENT_REAL) {
1167 for (j = 0; j < A->n_col; j++) {
1168 sum = SUM_DOW(u_h->vec[j]);
1169 for (i = 0; i < A->n_row; i++) {
1170 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1171 f_h->vec[dof->vec[i]] =
1172 c * f_h->vec[dof->vec[i]]
1173 +
1174 (a * A->data.real[i][j] + b * B->data.real[i][j]) * sum;
1175 }
1176 }
1177 }
1178 return;
1179 } else {
1180 for (j = 0; j < A->n_col; j++) {
1181 REAL u_sum = SUM_DOW(u_h->vec[j]);
1182 for (i = 0; i < A->n_row; i++) {
1183 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1184 f_h->vec[dof->vec[i]] =
1185 c * f_h->vec[dof->vec[i]]
1186 +
1187 a * A->data.real[i][j] * u_sum
1188 +
1189 b * SCP_DOW(B->data.real_d[i][j], u_h->vec[j]);
1190 }
1191 }
1192 }
1193 return;
1194 }
1195 } else {
1196 if (B->type == MATENT_REAL) {
1197 __bi_mat_el_vec_rrd(b, B, a, A, u_h, c, f_h, dof, bnd);
1198 } else {
1199 for (j = 0; j < A->n_col; j++) {
1200 for (i = 0; i < A->n_row; i++) {
1201 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1202 f_h->vec[dof->vec[i]] =
1203 c * f_h->vec[dof->vec[i]]
1204 +
1205 a * SCP_DOW(A->data.real_d[i][j], u_h->vec[j])
1206 +
1207 b * SCP_DOW(B->data.real_d[i][j], u_h->vec[j]);
1208 }
1209 }
1210 }
1211 }
1212 return;
1213 }
1214 } else if (A) {
1215 if (A->type == MATENT_REAL) {
1216 for (j = 0; j < A->n_col; j++) {
1217 sum = SUM_DOW(u_h->vec[j]);
1218 for (i = 0; i < A->n_row; i++) {
1219 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1220 f_h->vec[dof->vec[i]] =
1221 c * f_h->vec[dof->vec[i]] + + a * A->data.real[i][j] * sum;
1222 }
1223 }
1224 }
1225 } else {
1226 for (i = 0; i < A->n_row; i++) {
1227 for (j = 0; j < A->n_col; j++) {
1228 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1229 f_h->vec[dof->vec[i]] =
1230 c * f_h->vec[dof->vec[i]]
1231 +
1232 a * SCP_DOW(A->data.real_d[i][j], u_h->vec[j]);
1233 }
1234 }
1235 }
1236 }
1237 return;
1238 }
1239 }
1240
1241 __FLATTEN_ATTRIBUTE__
1242 static inline
bi_mat_el_vec_rrd(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1243 void bi_mat_el_vec_rrd(REAL a,
1244 const EL_MATRIX *A,
1245 REAL b,
1246 const EL_MATRIX *B,
1247 const EL_REAL_D_VEC *u_h,
1248 REAL c,
1249 DOF_REAL_VEC *f_h,
1250 const EL_DOF_VEC *dof,
1251 const EL_SCHAR_VEC *bnd)
1252 {
1253 if (A == NULL) {
1254 A = B;
1255 a = b;
1256 B = NULL;
1257 b = 0.0;
1258 }
1259 COL_CHAIN_DO(A, const EL_MATRIX) {
1260 ROW_CHAIN_DO(A, const EL_MATRIX) {
1261
1262 __bi_mat_el_vec_rrd(a, A, b, B, u_h, c, f_h, dof, bnd);
1263
1264 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1265 u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
1266 } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1267 f_h = CHAIN_NEXT(f_h, DOF_REAL_VEC);
1268 dof = CHAIN_NEXT(dof, EL_DOF_VEC);
1269 bnd = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1270 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1271 } COL_CHAIN_WHILE(A, const EL_MATRIX);
1272 }
1273
1274 /* >>> */
1275
1276 /* <<< variable stride version */
1277
1278 static inline
__bi_mat_el_vec_scl_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1279 void __bi_mat_el_vec_scl_dow(REAL a, const EL_MATRIX *A,
1280 REAL b, const EL_MATRIX *B,
1281 const EL_REAL_VEC_D *u_h,
1282 REAL c, DOF_REAL_VEC *f_h,
1283 const EL_DOF_VEC *dof,
1284 const EL_SCHAR_VEC *bnd)
1285 {
1286 if (u_h->stride != 1) {
1287 __bi_mat_el_vec_rrd(a, A, b, B, (const EL_REAL_D_VEC *)u_h,
1288 c, f_h, dof, bnd);
1289 } else {
1290 __bi_mat_el_vec(a, A, b, B, (const EL_REAL_VEC *)u_h,
1291 c, f_h, dof, bnd);
1292 }
1293 }
1294
1295 __FLATTEN_ATTRIBUTE__
1296 static inline
bi_mat_el_vec_scl_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1297 void bi_mat_el_vec_scl_dow(REAL a,
1298 const EL_MATRIX *A,
1299 REAL b,
1300 const EL_MATRIX *B,
1301 const EL_REAL_VEC_D *u_h,
1302 REAL c,
1303 DOF_REAL_VEC *f_h,
1304 const EL_DOF_VEC *dof,
1305 const EL_SCHAR_VEC *bnd)
1306 {
1307 if (A == NULL) {
1308 A = B;
1309 a = b;
1310 B = NULL;
1311 b = 0.0;
1312 }
1313 COL_CHAIN_DO(A, const EL_MATRIX) {
1314 ROW_CHAIN_DO(A, const EL_MATRIX) {
1315
1316 __bi_mat_el_vec_scl_dow(a, A, b, B, u_h, c, f_h, dof, bnd);
1317
1318 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1319 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
1320 } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1321 f_h = CHAIN_NEXT(f_h, DOF_REAL_VEC);
1322 dof = CHAIN_NEXT(dof, EL_DOF_VEC);
1323 bnd = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1324 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1325 } COL_CHAIN_WHILE(A, const EL_MATRIX);
1326 }
1327
1328 /* >>> */
1329
1330 /* >>> */
1331
1332 /* <<< DOW-SCAL version */
1333
1334 /* <<< fixed stride */
1335
1336 static inline
__bi_mat_el_vec_rdr(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1337 void __bi_mat_el_vec_rdr(REAL a, const EL_MATRIX *A,
1338 REAL b, const EL_MATRIX *B,
1339 const EL_REAL_VEC *u_h,
1340 REAL c, DOF_REAL_D_VEC *f_h,
1341 const EL_DOF_VEC *dof,
1342 const EL_SCHAR_VEC *bnd)
1343 {
1344 int i, j;
1345 REAL val;
1346
1347 if (A && B) {
1348 if (A->type == MATENT_REAL) {
1349 if (B->type == MATENT_REAL) {
1350 for (i = 0; i < A->n_row; i++) {
1351 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1352 val = 0.0;
1353 for (j = 0; j < A->n_col; j++) {
1354 val +=
1355 (a * A->data.real[i][j] + b * B->data.real[i][j]) * u_h->vec[j];
1356 }
1357 for (j = 0; j < DIM_OF_WORLD; j++) {
1358 f_h->vec[dof->vec[i]][j] = c * f_h->vec[dof->vec[i]][j] + val;
1359 }
1360 }
1361 }
1362 return;
1363 } else {
1364 for (i = 0; i < A->n_row; i++) {
1365 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1366 val = 0.0;
1367 for (j = 0; j < A->n_col; j++) {
1368 val += a * A->data.real[i][j] * u_h->vec[j];
1369 }
1370 for (j = 0; j < DIM_OF_WORLD; j++) {
1371 f_h->vec[dof->vec[i]][j] = c * f_h->vec[dof->vec[i]][j] + val;
1372 }
1373 for (j = 0; j < A->n_col; j++) {
1374 AXPY_DOW(b * u_h->vec[j], B->data.real_d[i][j],
1375 f_h->vec[dof->vec[i]]);
1376 }
1377 }
1378 }
1379 return;
1380 }
1381 } else {
1382 if (B->type == MATENT_REAL) {
1383 __bi_mat_el_vec_rdr(b, B, a, A, u_h, c, f_h, dof, bnd);
1384 } else {
1385 for (i = 0; i < A->n_row; i++) {
1386 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1387 for (j = 0; j < DIM_OF_WORLD; j++) {
1388 f_h->vec[dof->vec[i]][j] = c * f_h->vec[dof->vec[i]][j];
1389 }
1390 for (j = 0; j < A->n_col; j++) {
1391 AXPY_DOW(a * u_h->vec[j], A->data.real_d[i][j],
1392 f_h->vec[dof->vec[i]]);
1393 AXPY_DOW(b * u_h->vec[j], B->data.real_d[i][j],
1394 f_h->vec[dof->vec[i]]);
1395 }
1396 }
1397 }
1398 }
1399 }
1400 } else if (A) {
1401 if (A->type == MATENT_REAL) {
1402 for (i = 0; i < A->n_row; i++) {
1403 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1404 val = 0.0;
1405 for (j = 0; j < A->n_col; j++) {
1406 val += a * A->data.real[i][j] * u_h->vec[j];
1407 }
1408 for (j = 0; j < DIM_OF_WORLD; j++) {
1409 f_h->vec[dof->vec[i]][j] = c * f_h->vec[dof->vec[i]][j] + val;
1410 }
1411 }
1412 }
1413 } else {
1414 for (i = 0; i < A->n_row; i++) {
1415 if (bnd == NULL || bnd->vec[i] < DIRICHLET) {
1416 for (j = 0; j < A->n_col; j++) {
1417 AXPBY_DOW(a * u_h->vec[j], A->data.real_d[i][j],
1418 c, f_h->vec[dof->vec[i]],
1419 f_h->vec[dof->vec[i]]);
1420 }
1421 }
1422 }
1423 }
1424 return;
1425 }
1426 }
1427
bi_mat_el_vec_rdr(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1428 static inline void bi_mat_el_vec_rdr(REAL a,
1429 const EL_MATRIX *A,
1430 REAL b,
1431 const EL_MATRIX *B,
1432 const EL_REAL_VEC *u_h,
1433 REAL c,
1434 DOF_REAL_D_VEC *f_h,
1435 const EL_DOF_VEC *dof,
1436 const EL_SCHAR_VEC *bnd)
1437 {
1438 if (A == NULL) {
1439 A = B;
1440 a = b;
1441 B = NULL;
1442 b = 0.0;
1443 }
1444 COL_CHAIN_DO(A, const EL_MATRIX) {
1445 ROW_CHAIN_DO(A, const EL_MATRIX) {
1446
1447 __bi_mat_el_vec_rdr(a, A, b, B, u_h, c, f_h, dof, bnd);
1448
1449 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1450 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
1451 } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1452 f_h = CHAIN_NEXT(f_h, DOF_REAL_D_VEC);
1453 dof = CHAIN_NEXT(dof, EL_DOF_VEC);
1454 bnd = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1455 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1456 } COL_CHAIN_WHILE(A, const EL_MATRIX);
1457 }
1458
1459 /* >>> */
1460
1461 /* <<< variable stride version */
1462
1463 static inline
__bi_mat_el_vec_dow_scl(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1464 void __bi_mat_el_vec_dow_scl(REAL a, const EL_MATRIX *A,
1465 REAL b, const EL_MATRIX *B,
1466 const EL_REAL_VEC *u_h,
1467 REAL c, DOF_REAL_VEC_D *f_h,
1468 const EL_DOF_VEC *dof,
1469 const EL_SCHAR_VEC *bnd)
1470 {
1471 if (f_h->stride != 1) {
1472 __bi_mat_el_vec_rdr(a, A, b, B, u_h, c, (DOF_REAL_D_VEC *)f_h, dof, bnd);
1473 } else {
1474 __bi_mat_el_vec(a, A, b, B, u_h, c, (DOF_REAL_VEC *)f_h, dof, bnd);
1475 }
1476 }
1477
1478 __FLATTEN_ATTRIBUTE__
1479 static inline
bi_mat_el_vec_dow_scl(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC * u_h,REAL c,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1480 void bi_mat_el_vec_dow_scl(REAL a,
1481 const EL_MATRIX *A,
1482 REAL b,
1483 const EL_MATRIX *B,
1484 const EL_REAL_VEC *u_h,
1485 REAL c,
1486 DOF_REAL_VEC_D *f_h,
1487 const EL_DOF_VEC *dof,
1488 const EL_SCHAR_VEC *bnd)
1489 {
1490 if (A == NULL) {
1491 A = B;
1492 a = b;
1493 B = NULL;
1494 b = 0.0;
1495 }
1496 COL_CHAIN_DO(A, const EL_MATRIX) {
1497 ROW_CHAIN_DO(A, const EL_MATRIX) {
1498
1499 __bi_mat_el_vec_dow_scl(a, A, b, B, u_h, c, f_h, dof, bnd);
1500
1501 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1502 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC);
1503 } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1504 f_h = CHAIN_NEXT(f_h, DOF_REAL_VEC_D);
1505 dof = CHAIN_NEXT(dof, EL_DOF_VEC);
1506 bnd = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1507 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1508 } COL_CHAIN_WHILE(A, const EL_MATRIX);
1509 }
1510
1511 /* >>> */
1512
1513 /* >>> */
1514
1515 /* <<< DOW-DOW version */
1516
1517 /* <<< fixed stride */
1518
1519 /* Multiply an element matrix by an element vector and store the
1520 * result inside an element vector.
1521 */
1522 static inline
__bi_mat_el_vec_d(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1523 void __bi_mat_el_vec_d(REAL a, const EL_MATRIX *A,
1524 REAL b, const EL_MATRIX *B,
1525 const EL_REAL_D_VEC *u_h,
1526 REAL c, DOF_REAL_D_VEC *f_h,
1527 const EL_DOF_VEC *dof,
1528 const EL_SCHAR_VEC *bnd)
1529 {
1530 int i, j;
1531 REAL *sum;
1532
1533 if (A && B) {
1534 #undef MAT_BI_BODY
1535 #define MAT_BI_BODY(PFX1, CC1, C1, S1, TYPE1, \
1536 PFX2, CC2, C2, S2, TYPE2) \
1537 for (i = 0; i < A->n_row; i++) { \
1538 if (bnd == NULL || bnd->vec[i] < DIRICHLET) { \
1539 sum = f_h->vec[dof->vec[i]]; \
1540 for (j = 0; j < A->n_col; j++) { \
1541 PFX1##PFX2##BIMV_DOW(a, CC1 A->data.S1[i][j], \
1542 b, CC2 B->data.S2[i][j], \
1543 u_h->vec[j], \
1544 c, sum); \
1545 } \
1546 } \
1547 }
1548 MAT_EMIT_BI_BODY_SWITCH(A->type, B->type);
1549 } else {
1550 if (A == NULL) {
1551 A = B;
1552 a = b;
1553 B = NULL;
1554 b = 0.0;
1555 }
1556 #undef MAT_BODY
1557 #define MAT_BODY(PFX, CC, C, S, TYPE) \
1558 for (i = 0; i < A->n_row; i++) { \
1559 if (bnd == NULL || bnd->vec[i] < DIRICHLET) { \
1560 sum = f_h->vec[dof->vec[i]]; \
1561 for (j = 0; j < A->n_col; j++) { \
1562 PFX##GEMV_DOW(a, CC A->data.S[i][j], u_h->vec[j], c, sum); \
1563 } \
1564 } \
1565 }
1566 MAT_EMIT_BODY_SWITCH(A->type);
1567 }
1568 }
1569
1570 __FLATTEN_ATTRIBUTE__
1571 static inline
bi_mat_el_vec_d(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_D_VEC * u_h,REAL c,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1572 void bi_mat_el_vec_d(REAL a,
1573 const EL_MATRIX *A,
1574 REAL b,
1575 const EL_MATRIX *B,
1576 const EL_REAL_D_VEC *u_h,
1577 REAL c,
1578 DOF_REAL_D_VEC *f_h,
1579 const EL_DOF_VEC *dof,
1580 const EL_SCHAR_VEC *bnd)
1581 {
1582 if (A == NULL) {
1583 A = B;
1584 a = b;
1585 B = NULL;
1586 b = 0.0;
1587 }
1588 COL_CHAIN_DO(A, const EL_MATRIX) {
1589 ROW_CHAIN_DO(A, const EL_MATRIX) {
1590
1591 __bi_mat_el_vec_d(a, A, b, B, u_h, c, f_h, dof, bnd);
1592
1593 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1594 u_h = CHAIN_NEXT(u_h, const EL_REAL_D_VEC);
1595 } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1596 f_h = CHAIN_NEXT(f_h, DOF_REAL_D_VEC);
1597 dof = CHAIN_NEXT(dof, EL_DOF_VEC);
1598 bnd = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1599 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1600 } COL_CHAIN_WHILE(A, const EL_MATRIX);
1601 }
1602
1603 /* >>> */
1604
1605 /* <<< variable stride version */
1606
1607 static inline
__bi_mat_el_vec_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1608 void __bi_mat_el_vec_dow(REAL a, const EL_MATRIX *A,
1609 REAL b, const EL_MATRIX *B,
1610 const EL_REAL_VEC_D *u_h,
1611 REAL c, DOF_REAL_VEC_D *f_h,
1612 const EL_DOF_VEC *dof,
1613 const EL_SCHAR_VEC *bnd)
1614 {
1615 if (f_h->stride == 1) {
1616 __bi_mat_el_vec_scl_dow(a, A, b, B, u_h,
1617 c, (DOF_REAL_VEC *)f_h, dof, bnd);
1618 } else if (u_h->stride == 1) {
1619 __bi_mat_el_vec_dow_scl(a, A, b, B, (const EL_REAL_VEC *)u_h,
1620 c, f_h, dof, bnd);
1621 } else {
1622 __bi_mat_el_vec_d(a, A, b, B, (const EL_REAL_D_VEC *)u_h,
1623 c, (DOF_REAL_D_VEC *)f_h, dof, bnd);
1624 }
1625 }
1626
1627 __FLATTEN_ATTRIBUTE__
1628 static inline
bi_mat_el_vec_dow(REAL a,const EL_MATRIX * A,REAL b,const EL_MATRIX * B,const EL_REAL_VEC_D * u_h,REAL c,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1629 void bi_mat_el_vec_dow(REAL a,
1630 const EL_MATRIX *A,
1631 REAL b,
1632 const EL_MATRIX *B,
1633 const EL_REAL_VEC_D *u_h,
1634 REAL c,
1635 DOF_REAL_VEC_D *f_h,
1636 const EL_DOF_VEC *dof,
1637 const EL_SCHAR_VEC *bnd)
1638 {
1639 if (A == NULL) {
1640 A = B;
1641 a = b;
1642 B = NULL;
1643 b = 0.0;
1644 }
1645 COL_CHAIN_DO(A, const EL_MATRIX) {
1646 ROW_CHAIN_DO(A, const EL_MATRIX) {
1647
1648 __bi_mat_el_vec_dow(a, A, b, B, u_h, c, f_h, dof, bnd);
1649
1650 B = B ? ROW_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1651 u_h = CHAIN_NEXT(u_h, const EL_REAL_VEC_D);
1652 } ROW_CHAIN_WHILE(A, const EL_MATRIX);
1653 f_h = CHAIN_NEXT(f_h, DOF_REAL_VEC_D);
1654 dof = CHAIN_NEXT(dof, EL_DOF_VEC);
1655 bnd = bnd ? CHAIN_NEXT(bnd, EL_SCHAR_VEC) : NULL;
1656 B = B ? COL_CHAIN_NEXT(B, const EL_MATRIX) : NULL;
1657 } COL_CHAIN_WHILE(A, const EL_MATRIX);
1658 }
1659
1660 /* >>> */
1661
1662 /* >>> */
1663
1664 /* <<< f = c * f + a A u, f += a A u versions */
1665
1666 /* <<< SCL x SCL */
1667
1668 static inline void
gen_mat_el_vec(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1669 gen_mat_el_vec(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1670 REAL b, DOF_REAL_VEC *f_h,
1671 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1672 {
1673 bi_mat_el_vec(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1674 }
1675
1676 static inline void
mat_el_vec(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1677 mat_el_vec(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1678 DOF_REAL_VEC *f_h, const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1679 {
1680 gen_mat_el_vec(a, A, u_h, 1.0, f_h, dof, bnd);
1681 }
1682
1683 /* >>> */
1684
1685 /* <<< DOW x DOW */
1686
1687 static inline void
gen_mat_el_vec_d(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,REAL b,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1688 gen_mat_el_vec_d(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1689 REAL b, DOF_REAL_D_VEC *f_h,
1690 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1691 {
1692 bi_mat_el_vec_d(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1693 }
1694
1695 static inline void
mat_el_vec_d(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1696 mat_el_vec_d(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1697 DOF_REAL_D_VEC *f_h,
1698 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1699 {
1700 gen_mat_el_vec_d(a, A, u_h, 1.0, f_h, dof, bnd);
1701 }
1702
1703 static inline void
gen_mat_el_vec_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,REAL b,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1704 gen_mat_el_vec_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1705 REAL b, DOF_REAL_VEC_D *f_h,
1706 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1707 {
1708 bi_mat_el_vec_dow(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1709 }
1710
1711 static inline void
mat_el_vec_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1712 mat_el_vec_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1713 DOF_REAL_VEC_D *f_h,
1714 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1715 {
1716 gen_mat_el_vec_dow(a, A, u_h, 1.0, f_h, dof, bnd);
1717 }
1718
1719 /* >>> */
1720
1721 /* <<< SCL x DOW */
1722
1723 static inline void
gen_mat_el_vec_rrd(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,REAL b,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1724 gen_mat_el_vec_rrd(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1725 REAL b, DOF_REAL_VEC *f_h,
1726 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1727 {
1728 bi_mat_el_vec_rrd(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1729 }
1730
1731 static inline void
mat_el_vec_rrd(REAL a,const EL_MATRIX * A,const EL_REAL_D_VEC * u_h,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1732 mat_el_vec_rrd(REAL a, const EL_MATRIX *A, const EL_REAL_D_VEC *u_h,
1733 DOF_REAL_VEC *f_h,
1734 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1735 {
1736 gen_mat_el_vec_rrd(a, A, u_h, 1.0, f_h, dof, bnd);
1737 }
1738
1739 static inline void
gen_mat_el_vec_scl_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,REAL b,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1740 gen_mat_el_vec_scl_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1741 REAL b, DOF_REAL_VEC *f_h,
1742 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1743 {
1744 bi_mat_el_vec_scl_dow(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1745 }
1746
1747 static inline void
mat_el_vec_scl_dow(REAL a,const EL_MATRIX * A,const EL_REAL_VEC_D * u_h,DOF_REAL_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1748 mat_el_vec_scl_dow(REAL a, const EL_MATRIX *A, const EL_REAL_VEC_D *u_h,
1749 DOF_REAL_VEC *f_h,
1750 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1751 {
1752 gen_mat_el_vec_scl_dow(a, A, u_h, 1.0, f_h, dof, bnd);
1753 }
1754
1755 /* >>> */
1756
1757 /* <<< DOW x SCL */
1758
1759 static inline void
gen_mat_el_vec_rdr(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1760 gen_mat_el_vec_rdr(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1761 REAL b, DOF_REAL_D_VEC *f_h,
1762 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1763 {
1764 bi_mat_el_vec_rdr(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1765 }
1766
1767 static inline void
mat_el_vec_rdr(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,DOF_REAL_D_VEC * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1768 mat_el_vec_rdr(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1769 DOF_REAL_D_VEC *f_h,
1770 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1771 {
1772 gen_mat_el_vec_rdr(a, A, u_h, 1.0, f_h, dof, bnd);
1773 }
1774
1775 static inline void
gen_mat_el_vec_dow_scl(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,REAL b,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1776 gen_mat_el_vec_dow_scl(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1777 REAL b, DOF_REAL_VEC_D *f_h,
1778 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1779 {
1780 bi_mat_el_vec_dow_scl(a, A, 0.0, NULL, u_h, b, f_h, dof, bnd);
1781 }
1782
1783 static inline void
mat_el_vec_dow_scl(REAL a,const EL_MATRIX * A,const EL_REAL_VEC * u_h,DOF_REAL_VEC_D * f_h,const EL_DOF_VEC * dof,const EL_SCHAR_VEC * bnd)1784 mat_el_vec_dow_scl(REAL a, const EL_MATRIX *A, const EL_REAL_VEC *u_h,
1785 DOF_REAL_VEC_D *f_h,
1786 const EL_DOF_VEC *dof, const EL_SCHAR_VEC *bnd)
1787 {
1788 gen_mat_el_vec_dow_scl(a, A, u_h, 1.0, f_h, dof, bnd);
1789 }
1790
1791 /* >>> */
1792
1793 /* >>> */
1794
1795 /* >>> */
1796
1797 /* <<< some blas functions (incomplete) */
1798
1799 /* <<< axpy, axey, set for EL_MATRIX */
1800
1801 static inline
__el_mat_set(REAL a,EL_MATRIX * res)1802 EL_MATRIX *__el_mat_set(REAL a, EL_MATRIX *res)
1803 {
1804 int i, j;
1805
1806 #undef MAT_BODY
1807 #define MAT_BODY(PFX, CC, C, S, TYPE) \
1808 for (i = 0; i < res->n_row; i++) { \
1809 for (j = 0; j < res->n_col; j++) { \
1810 PFX##SET_DOW(a, res->data.S[i][j]); \
1811 } \
1812 }
1813 MAT_EMIT_BODY_SWITCH(res->type);
1814
1815 return res;
1816 }
1817
el_mat_set(REAL a,EL_MATRIX * res)1818 static inline EL_MATRIX *el_mat_set(REAL a, EL_MATRIX *res)
1819 {
1820 COL_CHAIN_DO(res, EL_MATRIX) {
1821 ROW_CHAIN_DO(res, EL_MATRIX) {
1822 __el_mat_set(a, res);
1823 } ROW_CHAIN_WHILE(res, EL_MATRIX);
1824 } COL_CHAIN_WHILE(res, EL_MATRIX);
1825
1826 return res;
1827 }
1828
1829 static inline
__el_mat_axey(REAL a,const EL_MATRIX * A,EL_MATRIX * res)1830 EL_MATRIX *__el_mat_axey(REAL a, const EL_MATRIX *A, EL_MATRIX *res)
1831 {
1832 int i, j;
1833
1834 #undef MAT_TRI_BODY
1835 #define MAT_TRI_BODY(PFX1, CC1, C1, S1, TYPE1, \
1836 PFX2, CC2, C2, S2, TYPE2) \
1837 for (i = 0; i < A->n_row; i++) { \
1838 for (j = 0; j < A->n_col; j++) { \
1839 PFX1##PFX2##AXEY_DOW(a, CC2 A->data.S2[i][j], res->data.S1[i][j]); \
1840 } \
1841 }
1842 MAT_EMIT_TRI_BODY_SWITCH(A->type, res->type);
1843
1844 return res;
1845 }
1846
1847 static inline EL_MATRIX *
el_mat_axey(REAL a,const EL_MATRIX * A,EL_MATRIX * res)1848 el_mat_axey(REAL a, const EL_MATRIX *A, EL_MATRIX *res)
1849 {
1850 COL_CHAIN_DO(res, EL_MATRIX) {
1851 ROW_CHAIN_DO(res, EL_MATRIX) {
1852 __el_mat_axey(a, A, res);
1853 A = ROW_CHAIN_NEXT(A, const EL_MATRIX);
1854 } ROW_CHAIN_WHILE(res, EL_MATRIX);
1855 A = COL_CHAIN_NEXT(A, const EL_MATRIX);
1856 } COL_CHAIN_WHILE(res, EL_MATRIX);
1857
1858 return res;
1859 }
1860
1861 static inline
__el_mat_axpy(REAL a,const EL_MATRIX * A,EL_MATRIX * res)1862 EL_MATRIX *__el_mat_axpy(REAL a, const EL_MATRIX *A, EL_MATRIX *res)
1863 {
1864 int i, j;
1865
1866 #undef MAT_TRI_BODY
1867 #define MAT_TRI_BODY(PFX1, CC1, C1, S1, TYPE1, \
1868 PFX2, CC2, C2, S2, TYPE2) \
1869 for (i = 0; i < A->n_row; i++) { \
1870 for (j = 0; j < A->n_col; j++) { \
1871 PFX1##PFX2##AXPY_DOW(a, CC2 A->data.S2[i][j], res->data.S1[i][j]); \
1872 } \
1873 }
1874 MAT_EMIT_TRI_BODY_SWITCH(res->type, A->type);
1875
1876 return res;
1877 }
1878
1879 static inline EL_MATRIX *
el_mat_axpy(REAL a,const EL_MATRIX * A,EL_MATRIX * res)1880 el_mat_axpy(REAL a, const EL_MATRIX *A, EL_MATRIX *res)
1881 {
1882 COL_CHAIN_DO(res, EL_MATRIX) {
1883 ROW_CHAIN_DO(res, EL_MATRIX) {
1884 __el_mat_axpy(a, A, res);
1885 A = ROW_CHAIN_NEXT(A, const EL_MATRIX);
1886 } ROW_CHAIN_WHILE(res, EL_MATRIX);
1887 A = COL_CHAIN_NEXT(A, const EL_MATRIX);
1888 } COL_CHAIN_WHILE(res, EL_MATRIX);
1889
1890 return res;
1891 }
1892
1893 static inline
M_el_mat_axpby(REAL a,const EL_MATRIX * X,REAL b,const EL_MATRIX * Y,EL_MATRIX * res)1894 EL_MATRIX *M_el_mat_axpby(REAL a, const EL_MATRIX *X,
1895 REAL b, const EL_MATRIX *Y, EL_MATRIX *res)
1896 {
1897
1898 int i, j;
1899
1900 switch (X->type) {
1901 case MATENT_REAL_DD:
1902 switch (Y->type) {
1903 case MATENT_REAL_DD:
1904 for (i = 0; i < res->n_row; i++) {
1905 for (j = 0; j < res->n_col; j++) {
1906 MMMAXPBY_DOW(a, (const REAL_D *) X->data.real_dd[i][j],
1907 b, (const REAL_D *) Y->data.real_dd[i][j],
1908 res->data.real_dd[i][j]);
1909 }
1910 }
1911 break;
1912 case MATENT_REAL_D:
1913 for (i = 0; i < res->n_row; i++) {
1914 for (j = 0; j < res->n_col; j++) {
1915 MMDMAXPBY_DOW(a, (const REAL_D *) X->data.real_dd[i][j],
1916 b, Y->data.real_d[i][j], res->data.real_dd[i][j]);
1917 }
1918 }
1919 break;
1920 case MATENT_REAL:
1921 for (i = 0; i < res->n_row; i++) {
1922 for (j = 0; j < res->n_col; j++) {
1923 MMSCMAXPBY_DOW(a, (const REAL_D *) X->data.real_dd[i][j],
1924 b, Y->data.real[i][j], res->data.real_dd[i][j]);
1925 }
1926 }
1927 break;
1928 default:
1929 ERROR_EXIT("!!! unknown MATENT_TYPE: %d", Y->type);
1930 break;
1931 }
1932 break;
1933 case MATENT_REAL_D:
1934 if (Y->type == MATENT_REAL_D) {
1935 for (i = 0; i < res->n_row; i++) {
1936 for (j = 0; j < res->n_col; j++) {
1937 MDMDMAXPBY_DOW(a, X->data.real_d[i][j],
1938 b, Y->data.real_d[i][j], res->data.real_dd[i][j]);
1939 }
1940 }
1941 } else if (Y->type == MATENT_REAL) {
1942 for (i = 0; i < res->n_row; i++) {
1943 for (j = 0; j < res->n_col; j++) {
1944 MDMSCMAXPBY_DOW(a, X->data.real_d[i][j],
1945 b, Y->data.real[i][j], res->data.real_dd[i][j]);
1946 }
1947 }
1948 }
1949 break;
1950 case MATENT_REAL:
1951 if (res->type == MATENT_REAL) {
1952 for (i = 0; i < res->n_row; i++) {
1953 for (j = 0; j < res->n_col; j++) {
1954 MSCMSCMAXPBY_DOW(a, X->data.real[i][j],
1955 b, Y->data.real[i][j], res->data.real_dd[i][j]);
1956 }
1957 }
1958 }
1959 break;
1960 default:
1961 ERROR_EXIT("!!! unknown MATENT_TYPE: %d", X->type);
1962 break;
1963 }
1964
1965 return res;
1966 }
1967
1968 static inline
DM_el_mat_axpby(REAL a,const EL_MATRIX * X,REAL b,const EL_MATRIX * Y,EL_MATRIX * res)1969 EL_MATRIX *DM_el_mat_axpby(REAL a, const EL_MATRIX *X,
1970 REAL b, const EL_MATRIX *Y, EL_MATRIX *res)
1971 {
1972
1973 int i, j;
1974
1975 switch (X->type) {
1976
1977 case MATENT_REAL_D:
1978 if (Y->type == MATENT_REAL_D) {
1979 for (i = 0; i < res->n_row; i++) {
1980 for (j = 0; j < res->n_col; j++) {
1981 DMDMDMAXPBY_DOW(a, X->data.real_d[i][j],
1982 b, Y->data.real_d[i][j], res->data.real_d[i][j]);
1983 }
1984 }
1985 } else if (Y->type == MATENT_REAL) {
1986 for (i = 0; i < res->n_row; i++) {
1987 for (j = 0; j < res->n_col; j++) {
1988 DMDMSCMAXPBY_DOW(a, X->data.real_d[i][j],
1989 b, Y->data.real[i][j], res->data.real_d[i][j]);
1990 }
1991 }
1992 }
1993 break;
1994 case MATENT_REAL:
1995 if (Y->type == MATENT_REAL) {
1996 for (i = 0; i < res->n_row; i++) {
1997 for (j = 0; j < res->n_col; j++) {
1998 DMSCMSCMAXPBY_DOW(a, X->data.real[i][j],
1999 b, Y->data.real[i][j], res->data.real_d[i][j]);
2000 }
2001 }
2002 }
2003 break;
2004 default:
2005 ERROR_EXIT("!!! unknown MATENT_TYPE: %d", X->type);
2006 break;
2007 }
2008
2009 return res;
2010 }
2011
2012 static inline
SCM_el_mat_axpby(REAL a,const EL_MATRIX * X,REAL b,const EL_MATRIX * Y,EL_MATRIX * res)2013 EL_MATRIX *SCM_el_mat_axpby(REAL a, const EL_MATRIX *X,
2014 REAL b, const EL_MATRIX *Y, EL_MATRIX *res)
2015 {
2016 int i, j;
2017
2018 for (i = 0; i < res->n_row; i++) {
2019 for (j = 0; j < res->n_col; j++) {
2020 SCMSCMSCMAXPBY_DOW(a, X->data.real[i][j],
2021 b, Y->data.real[i][j], res->data.real[i][j]);
2022 }
2023 }
2024
2025 return res;
2026 }
2027
2028 static inline
__el_mat_axpby(REAL a,const EL_MATRIX * X,REAL b,const EL_MATRIX * Y,EL_MATRIX * res)2029 EL_MATRIX *__el_mat_axpby(REAL a, const EL_MATRIX *X,
2030 REAL b, const EL_MATRIX *Y, EL_MATRIX *res)
2031 {
2032 FUNCNAME("el_mat_axpby");
2033
2034
2035 REAL tmp;
2036 const EL_MATRIX *tmp_mat;
2037
2038
2039 TEST_EXIT(res,
2040 "please get_el_matrix(result,...) bevore using el_mat_axpby!!!\n");
2041 TEST_EXIT(res->type >= X->type,
2042 "MATENT_TYPE of EL_MATRIX *result have to be equal "
2043 "or greater then MATENT_TYPE of EL_MATRIX *X!!!\n");
2044 TEST_EXIT(res->type >= Y->type,
2045 "MATENT_TYPE of EL_MATRIX *result have to be equal "
2046 "or greater then MATENT_TYPE of EL_MATRIX *Y!!!\n");
2047
2048
2049 if (Y->type > X->type) {
2050 tmp_mat = X;
2051 tmp = a;
2052 X = Y;
2053 a = b;
2054 Y = tmp_mat;
2055 b = tmp;
2056 }
2057
2058 switch (res->type) {
2059 case MATENT_REAL_DD:
2060 res = M_el_mat_axpby(a, X, b, Y, res);
2061 break;
2062 case MATENT_REAL_D:
2063 res = DM_el_mat_axpby(a, X, b, Y, res);
2064 break;
2065 case MATENT_REAL:
2066 res = SCM_el_mat_axpby(a, X, b, Y, res);
2067 break;
2068 default:
2069 ERROR_EXIT("!!! unknown MATENT_TYPE: %d", res->type);
2070 break;
2071 }
2072
2073 return (res);
2074
2075 }
2076
2077 static inline EL_MATRIX *
el_mat_axpby(REAL a,const EL_MATRIX * X,REAL b,const EL_MATRIX * Y,EL_MATRIX * res)2078 el_mat_axpby(REAL a, const EL_MATRIX *X,
2079 REAL b, const EL_MATRIX *Y, EL_MATRIX *res)
2080 {
2081 COL_CHAIN_DO(res, EL_MATRIX) {
2082 ROW_CHAIN_DO(res, EL_MATRIX) {
2083 __el_mat_axpby(a, X, b, Y, res);
2084 X = ROW_CHAIN_NEXT(X, const EL_MATRIX);
2085 Y = ROW_CHAIN_NEXT(Y, const EL_MATRIX);
2086 } ROW_CHAIN_WHILE(res, EL_MATRIX);
2087 X = COL_CHAIN_NEXT(X, const EL_MATRIX);
2088 Y = COL_CHAIN_NEXT(Y, const EL_MATRIX);
2089 } COL_CHAIN_WHILE(res, EL_MATRIX);
2090
2091 return res;
2092 }
2093
2094 /* >>> */
2095
2096 /* <<< axpy() for element vectors */
2097
2098 static inline
__el_real_vec_axpy(REAL a,const EL_REAL_VEC * x,EL_REAL_VEC * y)2099 void __el_real_vec_axpy(REAL a, const EL_REAL_VEC *x, EL_REAL_VEC *y)
2100 {
2101 FUNCNAME("__el_real_vec_axpy");
2102 int i;
2103 DEBUG_TEST_EXIT(x->n_components == y->n_components,
2104 "element vector dimensions do not match, x: %d, y: %d.\n",
2105 x->n_components, y->n_components);
2106 for (i = 0; i < x->n_components; i++) {
2107 y->vec[i] += a * x->vec[i];
2108 }
2109 }
2110
2111 __FLATTEN_ATTRIBUTE__
2112 static inline
el_real_vec_axpy(REAL a,const EL_REAL_VEC * x,EL_REAL_VEC * y)2113 void el_real_vec_axpy(REAL a, const EL_REAL_VEC *x, EL_REAL_VEC *y)
2114 {
2115 CHAIN_DO(x, const EL_REAL_VEC) {
2116 __el_real_vec_axpy(a, x, y);
2117 y = CHAIN_NEXT(y, EL_REAL_VEC);
2118 } CHAIN_WHILE(x, const EL_REAL_VEC);
2119 }
2120
2121 static inline
__el_real_d_vec_axpy(REAL a,const EL_REAL_D_VEC * x,EL_REAL_D_VEC * y)2122 void __el_real_d_vec_axpy(REAL a, const EL_REAL_D_VEC *x, EL_REAL_D_VEC *y)
2123 {
2124 FUNCNAME("__el_real_d_vec_axpy");
2125 int i;
2126 DEBUG_TEST_EXIT(x->n_components == y->n_components,
2127 "element vector dimensions do not match, x: %d, y: %d.\n",
2128 x->n_components, y->n_components);
2129 for (i = 0; i < x->n_components; i++) {
2130 AXPY_DOW(a, x->vec[i], y->vec[i]);
2131 }
2132 }
2133
2134 __FLATTEN_ATTRIBUTE__
2135 static inline
el_real_d_vec_axpy(REAL a,const EL_REAL_D_VEC * x,EL_REAL_D_VEC * y)2136 void el_real_d_vec_axpy(REAL a, const EL_REAL_D_VEC *x, EL_REAL_D_VEC *y)
2137 {
2138 CHAIN_DO(x, const EL_REAL_D_VEC) {
2139 __el_real_d_vec_axpy(a, x, y);
2140 y = CHAIN_NEXT(y, EL_REAL_D_VEC);
2141 } CHAIN_WHILE(x, const EL_REAL_D_VEC);
2142 }
2143
2144 __FLATTEN_ATTRIBUTE__
2145 static inline
el_real_vec_d_axpy(REAL a,const EL_REAL_VEC_D * x,EL_REAL_VEC_D * y)2146 void el_real_vec_d_axpy(REAL a, const EL_REAL_VEC_D *x, EL_REAL_VEC_D *y)
2147 {
2148 CHAIN_DO(x, const EL_REAL_VEC_D) {
2149 if (x->stride == 1) {
2150 __el_real_vec_axpy(a, (const EL_REAL_VEC *)x, (EL_REAL_VEC *)y);
2151 } else {
2152 __el_real_d_vec_axpy(a, (const EL_REAL_D_VEC *)x, (EL_REAL_D_VEC *)y);
2153 }
2154 y = CHAIN_NEXT(y, EL_REAL_VEC_D);
2155 } CHAIN_WHILE(x, const EL_REAL_VEC_D);
2156 }
2157
2158 /* >>> */
2159
2160 /* <<< axey() for element vectors */
2161
2162 static inline
__el_real_vec_axey(REAL a,const EL_REAL_VEC * x,EL_REAL_VEC * y)2163 void __el_real_vec_axey(REAL a, const EL_REAL_VEC *x, EL_REAL_VEC *y)
2164 {
2165 FUNCNAME("__el_real_vec_axey");
2166 int i;
2167 DEBUG_TEST_EXIT(x->n_components == y->n_components,
2168 "element vector dimensions do not match, x: %d, y: %d.\n",
2169 x->n_components, y->n_components);
2170 for (i = 0; i < x->n_components; i++) {
2171 y->vec[i] = a * x->vec[i];
2172 }
2173 }
2174
2175 __FLATTEN_ATTRIBUTE__
2176 static inline
el_real_vec_axey(REAL a,const EL_REAL_VEC * x,EL_REAL_VEC * y)2177 void el_real_vec_axey(REAL a, const EL_REAL_VEC *x, EL_REAL_VEC *y)
2178 {
2179 CHAIN_DO(x, const EL_REAL_VEC) {
2180 __el_real_vec_axey(a, x, y);
2181 y = CHAIN_NEXT(y, EL_REAL_VEC);
2182 } CHAIN_WHILE(x, const EL_REAL_VEC);
2183 }
2184
2185 static inline
__el_real_d_vec_axey(REAL a,const EL_REAL_D_VEC * x,EL_REAL_D_VEC * y)2186 void __el_real_d_vec_axey(REAL a, const EL_REAL_D_VEC *x, EL_REAL_D_VEC *y)
2187 {
2188 FUNCNAME("__el_real_d_vec_axey");
2189 int i;
2190 DEBUG_TEST_EXIT(x->n_components == y->n_components,
2191 "element vector dimensions do not match, x: %d, y: %d.\n",
2192 x->n_components, y->n_components);
2193 for (i = 0; i < x->n_components; i++) {
2194 AXEY_DOW(a, x->vec[i], y->vec[i]);
2195 }
2196 }
2197
2198 __FLATTEN_ATTRIBUTE__
2199 static inline
el_real_d_vec_axey(REAL a,const EL_REAL_D_VEC * x,EL_REAL_D_VEC * y)2200 void el_real_d_vec_axey(REAL a, const EL_REAL_D_VEC *x, EL_REAL_D_VEC *y)
2201 {
2202 CHAIN_DO(x, const EL_REAL_D_VEC) {
2203 __el_real_d_vec_axey(a, x, y);
2204 y = CHAIN_NEXT(y, EL_REAL_D_VEC);
2205 } CHAIN_WHILE(x, const EL_REAL_D_VEC);
2206 }
2207
2208 __FLATTEN_ATTRIBUTE__
2209 static inline
el_real_vec_d_axey(REAL a,const EL_REAL_VEC_D * x,EL_REAL_VEC_D * y)2210 void el_real_vec_d_axey(REAL a, const EL_REAL_VEC_D *x, EL_REAL_VEC_D *y)
2211 {
2212 CHAIN_DO(x, const EL_REAL_VEC_D) {
2213 if (x->stride == 1) {
2214 __el_real_vec_axey(a, (const EL_REAL_VEC *)x, (EL_REAL_VEC *)y);
2215 } else {
2216 __el_real_d_vec_axey(a, (const EL_REAL_D_VEC *)x, (EL_REAL_D_VEC *)y);
2217 }
2218 y = CHAIN_NEXT(y, EL_REAL_VEC_D);
2219 } CHAIN_WHILE(x, const EL_REAL_VEC_D);
2220 }
2221
2222 /* >>> */
2223
2224 /* <<< set() for element vectors */
2225
2226 static inline
__el_real_vec_set(REAL a,EL_REAL_VEC * y)2227 void __el_real_vec_set(REAL a, EL_REAL_VEC *y)
2228 {
2229 int i;
2230 for (i = 0; i < y->n_components; i++) {
2231 y->vec[i] = a;
2232 }
2233 }
2234
2235 __FLATTEN_ATTRIBUTE__
2236 static inline
el_real_vec_set(REAL a,EL_REAL_VEC * y)2237 void el_real_vec_set(REAL a, EL_REAL_VEC *y)
2238 {
2239 CHAIN_DO(y, EL_REAL_VEC) {
2240 __el_real_vec_set(a, y);
2241 } CHAIN_WHILE(y, EL_REAL_VEC);
2242 }
2243
2244 static inline
__el_real_d_vec_set(REAL a,EL_REAL_D_VEC * y)2245 void __el_real_d_vec_set(REAL a, EL_REAL_D_VEC *y)
2246 {
2247 int i;
2248 for (i = 0; i < y->n_components; i++) {
2249 SET_DOW(a, y->vec[i]);
2250 }
2251 }
2252
2253 __FLATTEN_ATTRIBUTE__
2254 static inline
el_real_d_vec_set(REAL a,EL_REAL_D_VEC * y)2255 void el_real_d_vec_set(REAL a, EL_REAL_D_VEC *y)
2256 {
2257 CHAIN_DO(y, EL_REAL_D_VEC) {
2258 __el_real_d_vec_set(a, y);
2259 } CHAIN_WHILE(y, EL_REAL_D_VEC);
2260 }
2261
2262 __FLATTEN_ATTRIBUTE__
2263 static inline
el_real_vec_d_set(REAL a,EL_REAL_VEC_D * y)2264 void el_real_vec_d_set(REAL a, EL_REAL_VEC_D *y)
2265 {
2266 CHAIN_DO(y, EL_REAL_VEC_D) {
2267 if (y->stride == 1) {
2268 __el_real_vec_set(a, (EL_REAL_VEC *)y);
2269 } else {
2270 __el_real_d_vec_set(a, (EL_REAL_D_VEC *)y);
2271 }
2272 } CHAIN_WHILE(y, EL_REAL_VEC_D);
2273 }
2274
2275 /* >>> */
2276
2277 /* >>> */
2278
2279 #endif /* _ALBERTA_EL_VEC_H_ */
2280