1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "_hypre_parcsr_ls.h"
9 #include "Common.h"
10 #include "_hypre_lapack.h"
11
12 /* -------------------------------------------------------------------------
13 dof_domain: for each dof defines neighborhood to build interpolation,
14 using
15 domain_diagmat (for cut--off scaling) and
16 i_domain_dof, j_dof_domain (for extracting the block of A);
17
18 domain_matrixinverse: contains the inverse of subdomain matrix;
19
20 B can be used to define strength matrix;
21 ----------------------------------------------------------------------- */
22
23 /*--------------------------------------------------------------------------
24 * hypre_AMGNodalSchwarzSmoother: (Not used currently)
25 *--------------------------------------------------------------------------*/
26
27 HYPRE_Int
hypre_AMGNodalSchwarzSmoother(hypre_CSRMatrix * A,HYPRE_Int num_functions,HYPRE_Int option,hypre_CSRMatrix ** domain_structure_pointer)28 hypre_AMGNodalSchwarzSmoother( hypre_CSRMatrix *A,
29 HYPRE_Int num_functions,
30 HYPRE_Int option,
31 hypre_CSRMatrix **domain_structure_pointer)
32 {
33 /* option = 0: nodal symGS;
34 1: next to nodal symGS (overlapping Schwarz) */
35
36 HYPRE_Int *i_domain_dof, *j_domain_dof;
37 HYPRE_Real *domain_matrixinverse;
38 HYPRE_Int num_domains;
39 hypre_CSRMatrix *domain_structure;
40
41 HYPRE_Int *i_dof_node, *j_dof_node;
42 HYPRE_Int *i_node_dof, *j_node_dof;
43
44 HYPRE_Int *i_node_dof_dof, *j_node_dof_dof;
45
46 HYPRE_Int *i_node_node, *j_node_node;
47
48 HYPRE_Int num_nodes;
49
50 HYPRE_Int *i_dof_dof = hypre_CSRMatrixI(A);
51 HYPRE_Int *j_dof_dof = hypre_CSRMatrixJ(A);
52 HYPRE_Real *a_dof_dof = hypre_CSRMatrixData(A);
53 HYPRE_Int num_dofs = hypre_CSRMatrixNumRows(A);
54
55
56 HYPRE_Int ierr = 0;
57 HYPRE_Int i,j,k, l_loc, i_loc, j_loc;
58 HYPRE_Int i_dof, j_dof;
59 HYPRE_Int *i_local_to_global;
60 HYPRE_Int *i_global_to_local;
61
62 HYPRE_Int *i_int;
63 HYPRE_Int *i_int_to_local;
64
65 HYPRE_Int int_dof_counter, local_dof_counter, max_local_dof_counter=0;
66
67 HYPRE_Int domain_dof_counter = 0, domain_matrixinverse_counter = 0;
68
69 HYPRE_Real *AE;
70
71 char uplo = 'L';
72
73 HYPRE_Int cnt;
74
75 /* PCG arrays: ---------------------------------------------------
76 HYPRE_Real *x, *rhs, *v, *w, *d, *aux;
77
78 HYPRE_Int max_iter;
79
80 ------------------------------------------------------------------ */
81
82 /* build dof_node graph: ----------------------------------------- */
83
84 num_nodes = num_dofs / num_functions;
85
86 /*hypre_printf("\nnum_nodes: %d, num_dofs: %d = %d x %d\n", num_nodes, num_dofs,
87 num_nodes, num_functions);*/
88
89 i_dof_node = hypre_CTAlloc(HYPRE_Int, num_dofs+1, HYPRE_MEMORY_HOST);
90 j_dof_node = hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
91
92 for (i=0; i < num_dofs+1; i++)
93 i_dof_node[i] = i;
94
95 for (j = 0; j < num_nodes; j++)
96 for (k = 0; k < num_functions; k++)
97 j_dof_node[j*num_functions+k] = j;
98
99 /* build node_dof graph: ----------------------------------------- */
100
101 ierr = transpose_matrix_create(&i_node_dof, &j_node_dof,
102 i_dof_node, j_dof_node,
103 num_dofs, num_nodes);
104
105
106 /* build node_node graph: ----------------------------------------- */
107
108 ierr = matrix_matrix_product(&i_node_dof_dof,
109 &j_node_dof_dof,
110 i_node_dof, j_node_dof,
111 i_dof_dof, j_dof_dof,
112 num_nodes, num_dofs, num_dofs);
113
114 ierr = matrix_matrix_product(&i_node_node,
115 &j_node_node,
116 i_node_dof_dof,
117 j_node_dof_dof,
118 i_dof_node, j_dof_node,
119 num_nodes, num_dofs, num_nodes);
120
121 hypre_TFree(i_node_dof_dof, HYPRE_MEMORY_HOST);
122 hypre_TFree(j_node_dof_dof, HYPRE_MEMORY_HOST);
123
124 /* compute for each node the local information: -------------------- */
125
126 i_global_to_local = i_dof_node;
127
128 for (i_dof =0; i_dof < num_dofs; i_dof++)
129 i_global_to_local[i_dof] = -1;
130
131 domain_matrixinverse_counter = 0;
132 domain_dof_counter = 0;
133 for (i=0; i < num_nodes; i++)
134 {
135 local_dof_counter = 0;
136
137 for (j=i_node_node[i]; j < i_node_node[i+1]; j++)
138 for (k=i_node_dof[j_node_node[j]];
139 k<i_node_dof[j_node_node[j]+1]; k++)
140 {
141 j_dof = j_node_dof[k];
142
143 if (i_global_to_local[j_dof] < 0)
144 {
145 i_global_to_local[j_dof] = local_dof_counter;
146 local_dof_counter++;
147 }
148
149 }
150 domain_matrixinverse_counter += local_dof_counter*local_dof_counter;
151 domain_dof_counter += local_dof_counter;
152
153 if (local_dof_counter > max_local_dof_counter)
154 max_local_dof_counter = local_dof_counter;
155
156 for (j=i_node_node[i]; j < i_node_node[i+1]; j++)
157 for (k=i_node_dof[j_node_node[j]];
158 k<i_node_dof[j_node_node[j]+1]; k++)
159 {
160 j_dof = j_node_dof[k];
161 i_global_to_local[j_dof] = -1;
162 }
163 }
164
165 num_domains = num_nodes;
166
167 i_domain_dof = hypre_CTAlloc(HYPRE_Int, num_domains+1, HYPRE_MEMORY_HOST);
168
169 if (option == 1)
170 j_domain_dof = hypre_CTAlloc(HYPRE_Int, domain_dof_counter, HYPRE_MEMORY_HOST);
171 else
172 j_domain_dof = hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
173
174 if (option == 1)
175 domain_matrixinverse = hypre_CTAlloc(HYPRE_Real, domain_matrixinverse_counter, HYPRE_MEMORY_HOST);
176 else
177 domain_matrixinverse = hypre_CTAlloc(HYPRE_Real, num_dofs * num_functions, HYPRE_MEMORY_HOST);
178
179 i_local_to_global = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter, HYPRE_MEMORY_HOST);
180
181 i_int_to_local = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter, HYPRE_MEMORY_HOST);
182 i_int = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter, HYPRE_MEMORY_HOST);
183
184 for (l_loc=0; l_loc < max_local_dof_counter; l_loc++)
185 i_int[l_loc] = -1;
186
187 domain_dof_counter = 0;
188 domain_matrixinverse_counter = 0;
189 for (i=0; i < num_nodes; i++)
190 {
191 i_domain_dof[i] = domain_dof_counter;
192 local_dof_counter = 0;
193
194 for (j=i_node_node[i]; j < i_node_node[i+1]; j++)
195 for (k=i_node_dof[j_node_node[j]];
196 k<i_node_dof[j_node_node[j]+1]; k++)
197 {
198 j_dof = j_node_dof[k];
199
200 if (i_global_to_local[j_dof] < 0)
201 {
202 i_global_to_local[j_dof] = local_dof_counter;
203 i_local_to_global[local_dof_counter] = j_dof;
204 local_dof_counter++;
205 }
206
207 }
208
209 for (j=i_node_dof[i]; j < i_node_dof[i+1]; j++)
210 for (k=i_dof_dof[j_node_dof[j]]; k < i_dof_dof[j_node_dof[j]+1]; k++)
211 if (i_global_to_local[j_dof_dof[k]] < 0)
212 hypre_error_w_msg(HYPRE_ERROR_GENERIC,"WRONG local indexing: ====================== \n");
213
214 int_dof_counter = 0;
215 for (k=i_node_dof[i]; k < i_node_dof[i+1]; k++)
216 {
217 i_dof = j_node_dof[k];
218 i_loc = i_global_to_local[i_dof];
219 i_int[i_loc] = int_dof_counter;
220 i_int_to_local[int_dof_counter] = i_loc;
221 int_dof_counter++;
222 }
223
224 /* get local matrix AE: ======================================== */
225
226 if (option == 1)
227 {
228 AE = &domain_matrixinverse[domain_matrixinverse_counter];
229 cnt = 0;
230 for (i_loc=0; i_loc < local_dof_counter; i_loc++)
231 for (j_loc=0; j_loc < local_dof_counter; j_loc++)
232 AE[cnt++] = 0.e0;
233
234 for (i_loc=0; i_loc < local_dof_counter; i_loc++)
235 {
236 i_dof = i_local_to_global[i_loc];
237 for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++)
238 {
239 j_loc = i_global_to_local[j_dof_dof[j]];
240 if (j_loc >=0)
241 AE[i_loc + j_loc * local_dof_counter] = a_dof_dof[j];
242 }
243 }
244
245 /* get block for Schwarz smoother: ============================= */
246 /* ierr = hypre_matinv(XE, AE, local_dof_counter); */
247 /* hypre_printf("ierr_AE_inv: %d\n", ierr); */
248 hypre_dpotrf(&uplo, &local_dof_counter, AE,
249 &local_dof_counter, &ierr);
250 if (ierr == 1) hypre_error_w_msg(HYPRE_ERROR_GENERIC,"Error! Matrix not SPD\n");
251
252 for (i_loc=0; i_loc < local_dof_counter; i_loc++)
253 j_domain_dof[domain_dof_counter+i_loc]
254 = i_local_to_global[i_loc];
255 }
256
257 if (option == 0)
258 {
259 AE = &domain_matrixinverse[domain_matrixinverse_counter];
260 for (i_loc=0; i_loc < int_dof_counter; i_loc++)
261 for (j_loc=0; j_loc < int_dof_counter; j_loc++)
262 AE[i_loc + j_loc * int_dof_counter] = 0.e0;
263
264 for (l_loc=0; l_loc < int_dof_counter; l_loc++)
265 {
266 i_loc = i_int_to_local[l_loc];
267 i_dof = i_local_to_global[i_loc];
268 for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++)
269 {
270 j_loc = i_global_to_local[j_dof_dof[j]];
271 if (j_loc >=0)
272 if (i_int[j_loc] >=0)
273 AE[i_loc + i_int[j_loc] * int_dof_counter]
274 = a_dof_dof[j];
275 }
276 }
277
278 /* ierr = hypre_matinv(XE, AE, int_dof_counter); */
279 hypre_dpotrf(&uplo, &local_dof_counter, AE,
280 &local_dof_counter, &ierr);
281
282 if (ierr) hypre_error_w_msg(HYPRE_ERROR_GENERIC," error in dpotrf !!!\n");
283
284 for (i_loc=0; i_loc < int_dof_counter; i_loc++)
285 {
286 j_domain_dof[domain_dof_counter + i_loc] =
287 i_local_to_global[i_int_to_local[i_loc]];
288
289 for (j_loc=0; j_loc < int_dof_counter; j_loc++)
290 domain_matrixinverse[domain_matrixinverse_counter
291 + i_loc + j_loc * int_dof_counter]
292 = AE[i_loc + j_loc * int_dof_counter];
293 }
294
295 domain_dof_counter+=int_dof_counter;
296 domain_matrixinverse_counter+=int_dof_counter*int_dof_counter;
297 }
298 else
299 {
300 domain_dof_counter+=local_dof_counter;
301 domain_matrixinverse_counter+=local_dof_counter*local_dof_counter;
302 }
303
304 for (l_loc=0; l_loc < local_dof_counter; l_loc++)
305 {
306 i_int[l_loc] = -1;
307 i_global_to_local[i_local_to_global[l_loc]] = -1;
308 }
309 }
310
311 i_domain_dof[num_nodes] = domain_dof_counter;
312
313 hypre_TFree(i_dof_node, HYPRE_MEMORY_HOST);
314 hypre_TFree(j_dof_node, HYPRE_MEMORY_HOST);
315
316 hypre_TFree(i_node_dof, HYPRE_MEMORY_HOST);
317 hypre_TFree(j_node_dof, HYPRE_MEMORY_HOST);
318 hypre_TFree(i_node_node, HYPRE_MEMORY_HOST);
319 hypre_TFree(j_node_node, HYPRE_MEMORY_HOST);
320
321 hypre_TFree(i_int, HYPRE_MEMORY_HOST);
322 hypre_TFree(i_int_to_local, HYPRE_MEMORY_HOST);
323
324 hypre_TFree(i_local_to_global, HYPRE_MEMORY_HOST);
325
326 domain_structure = hypre_CSRMatrixCreate(num_domains, max_local_dof_counter,
327 i_domain_dof[num_domains]);
328 hypre_CSRMatrixI(domain_structure) = i_domain_dof;
329 hypre_CSRMatrixJ(domain_structure) = j_domain_dof;
330 hypre_CSRMatrixData(domain_structure) = domain_matrixinverse;
331
332 *domain_structure_pointer = domain_structure;
333
334 return hypre_error_flag;
335 }
336
hypre_ParMPSchwarzSolve(hypre_ParCSRMatrix * par_A,hypre_CSRMatrix * A_boundary,hypre_ParVector * rhs_vector,hypre_CSRMatrix * domain_structure,hypre_ParVector * par_x,HYPRE_Real relax_wt,HYPRE_Real * scale,hypre_ParVector * Vtemp,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)337 HYPRE_Int hypre_ParMPSchwarzSolve(hypre_ParCSRMatrix *par_A,
338 hypre_CSRMatrix *A_boundary,
339 hypre_ParVector *rhs_vector,
340 hypre_CSRMatrix *domain_structure,
341 hypre_ParVector *par_x,
342 HYPRE_Real relax_wt,
343 HYPRE_Real *scale,
344 hypre_ParVector *Vtemp, HYPRE_Int *pivots,
345 HYPRE_Int use_nonsymm)
346 {
347 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(par_A);
348 HYPRE_Int num_sends = 0;
349 HYPRE_Int *send_map_starts;
350 HYPRE_Int *send_map_elmts;
351
352 hypre_ParCSRCommHandle *comm_handle;
353
354 HYPRE_Int ierr = 0;
355 /* HYPRE_Int num_dofs; */
356 hypre_CSRMatrix *A_diag;
357 HYPRE_Int *A_diag_i;
358 HYPRE_Int *A_diag_j;
359 HYPRE_Real *A_diag_data;
360 hypre_CSRMatrix *A_offd;
361 HYPRE_Int *A_offd_i;
362 HYPRE_Int *A_offd_j;
363 HYPRE_Real *A_offd_data;
364 HYPRE_Real *x;
365 HYPRE_Real *x_ext;
366 HYPRE_Real *x_ext_old;
367 HYPRE_Real *rhs;
368 HYPRE_Real *rhs_ext;
369 HYPRE_Real *vtemp_data;
370 HYPRE_Real *aux;
371 HYPRE_Real *buf_data;
372 /*hypre_Vector *x_vector;*/
373 MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
374 HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
375 HYPRE_Int max_domain_size = hypre_CSRMatrixNumCols(domain_structure);
376 HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
377 HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
378 HYPRE_Real *domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
379 HYPRE_Int *A_boundary_i;
380 HYPRE_Int *A_boundary_j;
381 HYPRE_Real *A_boundary_data;
382 HYPRE_Int num_variables;
383 HYPRE_Int num_cols_offd;
384
385 HYPRE_Int piv_counter = 0;
386 HYPRE_Int one = 1;
387 char uplo = 'L';
388
389 HYPRE_Int jj,i,j,k, j_loc, k_loc;
390 HYPRE_Int index;
391
392 HYPRE_Int matrix_size, matrix_size_counter = 0;
393
394 HYPRE_Int num_procs;
395
396 hypre_MPI_Comm_size(comm,&num_procs);
397
398 /* initiate: ----------------------------------------------- */
399 /* num_dofs = hypre_CSRMatrixNumRows(A); */
400
401 A_diag = hypre_ParCSRMatrixDiag(par_A);
402 A_offd = hypre_ParCSRMatrixOffd(par_A);
403 num_variables = hypre_CSRMatrixNumRows(A_diag);
404 num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
405 x = hypre_VectorData(hypre_ParVectorLocalVector(par_x));
406 vtemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Vtemp));
407 rhs = hypre_VectorData(hypre_ParVectorLocalVector(rhs_vector));
408
409 if (use_nonsymm)
410 uplo = 'N';
411
412 /*x_vector = hypre_ParVectorLocalVector(par_x);*/
413 A_diag_i = hypre_CSRMatrixI(A_diag);
414 A_diag_j = hypre_CSRMatrixJ(A_diag);
415 A_diag_data = hypre_CSRMatrixData(A_diag);
416 A_offd_i = hypre_CSRMatrixI(A_offd);
417 if (num_cols_offd)
418 {
419 A_offd_j = hypre_CSRMatrixJ(A_offd);
420 A_offd_data = hypre_CSRMatrixData(A_offd);
421 A_boundary_i = hypre_CSRMatrixI(A_boundary);
422 A_boundary_j = hypre_CSRMatrixJ(A_boundary);
423 A_boundary_data = hypre_CSRMatrixData(A_boundary);
424 }
425 aux = hypre_CTAlloc(HYPRE_Real, max_domain_size, HYPRE_MEMORY_HOST);
426
427 hypre_ParVectorCopy(rhs_vector,Vtemp);
428 hypre_ParCSRMatrixMatvec(-1.0,par_A,par_x,1.0,Vtemp);
429
430 if (comm_pkg)
431 {
432 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
433 send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
434 send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
435
436 buf_data = hypre_CTAlloc(HYPRE_Real, send_map_starts[num_sends], HYPRE_MEMORY_HOST);
437 x_ext = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
438 x_ext_old = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
439 rhs_ext = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
440
441 index = 0;
442 for (i=0; i < num_sends; i++)
443 {
444 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
445 buf_data[index++] = vtemp_data[send_map_elmts[j]];
446 }
447
448 comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,
449 rhs_ext);
450 hypre_ParCSRCommHandleDestroy(comm_handle);
451 comm_handle = NULL;
452
453 index = 0;
454 for (i=0; i < num_sends; i++)
455 {
456 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
457 buf_data[index++] = x[send_map_elmts[j]];
458 }
459
460 comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,x_ext);
461 hypre_ParCSRCommHandleDestroy(comm_handle);
462 comm_handle = NULL;
463 }
464
465 /* correction of residual for exterior points to be updated locally */
466 for (i=0; i < num_cols_offd; i++)
467 {
468 x_ext_old[i] = x_ext[i];
469 for (j = A_boundary_i[i]; j < A_boundary_i[i+1]; j++)
470 {
471 k_loc = A_boundary_j[j];
472 if (k_loc < num_variables)
473 rhs_ext[i] += A_boundary_data[j]*x[k_loc];
474 else
475 rhs_ext[i] += A_boundary_data[j]*x_ext[k_loc-num_variables];
476 }
477 }
478 /* forward solve: ----------------------------------------------- */
479
480 matrix_size_counter = 0;
481 for (i=0; i < num_domains; i++)
482 {
483 matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
484
485 /* compute residual: ---------------------------------------- */
486
487 jj = 0;
488 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
489 {
490 j_loc = j_domain_dof[j];
491 if (j_loc < num_variables)
492 {
493 aux[jj] = rhs[j_loc];
494 for (k=A_diag_i[j_loc]; k<A_diag_i[j_loc+1]; k++)
495 aux[jj] -= A_diag_data[k] * x[A_diag_j[k]];
496 for (k=A_offd_i[j_loc]; k<A_offd_i[j_loc+1]; k++)
497 aux[jj] -= A_offd_data[k] * x_ext[A_offd_j[k]];
498 }
499 else
500 {
501 j_loc -= num_variables;
502 aux[jj] = rhs_ext[j_loc];
503 for (k=A_boundary_i[j_loc]; k<A_boundary_i[j_loc+1]; k++)
504 k_loc = A_boundary_j[k];
505 if (k_loc < num_variables)
506 aux[jj] -= A_boundary_data[k] * x[k_loc];
507 else
508 aux[jj] -= A_boundary_data[k] * x_ext[k_loc-num_variables];
509 }
510 jj++;
511 }
512 /* solve for correction: ------------------------------------- */
513 if (use_nonsymm)
514 {
515 hypre_dgetrs(&uplo, &matrix_size, &one,
516 &domain_matrixinverse[matrix_size_counter],
517 &matrix_size, &pivots[piv_counter], aux,
518 &matrix_size, &ierr);
519 }
520 else
521 {
522 hypre_dpotrs(&uplo, &matrix_size, &one,
523 &domain_matrixinverse[matrix_size_counter],
524 &matrix_size, aux,
525 &matrix_size, &ierr);
526 }
527 if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
528 jj = 0;
529 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
530 {
531 j_loc = j_domain_dof[j];
532 if (j_loc < num_variables)
533 x[j_loc] += relax_wt*aux[jj++];
534 else
535 x_ext[j_loc-num_variables] += relax_wt*aux[jj++];
536 }
537 matrix_size_counter += matrix_size * matrix_size;
538 piv_counter += matrix_size;
539
540
541 }
542 /*
543 for (i=0; i < num_cols_offd; i++)
544 x_ext[i] -= x_ext_old[i];
545
546 if (comm_pkg)
547 {
548 comm_handle=hypre_ParCSRCommHandleCreate (2,comm_pkg,x_ext,buf_data);
549
550 hypre_ParCSRCommHandleDestroy(comm_handle);
551 comm_handle = NULL;
552
553 index = 0;
554 for (i=0; i < num_sends; i++)
555 {
556 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
557 x[send_map_elmts[j]] += buf_data[index++];
558 }
559 }
560 for (i=0; i < num_variables; i++)
561 x[i] *= scale[i];
562
563 hypre_ParVectorCopy(rhs_vector,Vtemp);
564 hypre_ParCSRMatrixMatvec(-1.0,par_A,par_x,1.0,Vtemp);
565
566 if (comm_pkg)
567 {
568 index = 0;
569 for (i=0; i < num_sends; i++)
570 {
571 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
572 buf_data[index++] = vtemp_data[send_map_elmts[j]];
573 }
574
575 comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,
576 rhs_ext);
577 hypre_ParCSRCommHandleDestroy(comm_handle);
578 comm_handle = NULL;
579
580 index = 0;
581 for (i=0; i < num_sends; i++)
582 {
583 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
584 buf_data[index++] = x[send_map_elmts[j]];
585 }
586
587 comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,x_ext);
588 hypre_ParCSRCommHandleDestroy(comm_handle);
589 comm_handle = NULL;
590 }
591 */
592 /* correction of residual for exterior points to be updated locally */
593 /* for (i=0; i < num_cols_offd; i++)
594 {
595 x_ext_old[i] = x_ext[i];
596 for (j = A_boundary_i[i]; j < A_boundary_i[i+1]; j++)
597 {
598 k_loc = A_boundary_j[j];
599 if (k_loc < num_variables)
600 rhs_ext[i] += A_boundary_i[k]*x[k_loc];
601 else
602 rhs_ext[i] += A_boundary_i[k]*x_ext[k_loc-num_variables];
603 }
604 }
605 */
606 /* backward solve: ------------------------------------------------ */
607 for (i=num_domains-1; i > -1; i--)
608 {
609 matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
610 matrix_size_counter -= matrix_size * matrix_size;
611 piv_counter -= matrix_size;
612
613 /* compute residual: ---------------------------------------- */
614 jj = 0;
615 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
616 {
617 j_loc = j_domain_dof[j];
618 if (j_loc < num_variables)
619 {
620 aux[jj] = rhs[j_loc];
621 for (k=A_diag_i[j_loc]; k<A_diag_i[j_loc+1]; k++)
622 aux[jj] -= A_diag_data[k] * x[A_diag_j[k]];
623 for (k=A_offd_i[j_loc]; k<A_offd_i[j_loc+1]; k++)
624 aux[jj] -= A_offd_data[k] * x_ext[A_offd_j[k]];
625 }
626 else
627 {
628 j_loc -= num_variables;
629 aux[jj] = rhs_ext[j_loc];
630 for (k=A_boundary_i[j_loc]; k<A_boundary_i[j_loc+1]; k++)
631 k_loc = A_boundary_j[k];
632 if (k_loc < num_variables)
633 aux[jj] -= A_boundary_data[k] * x[k_loc];
634 else
635 aux[jj] -= A_boundary_data[k] * x_ext[k_loc-num_variables];
636 }
637 jj++;
638 }
639
640 /* solve for correction: ------------------------------------- */
641 if (use_nonsymm)
642 {
643 hypre_dgetrs(&uplo, &matrix_size, &one,
644 &domain_matrixinverse[matrix_size_counter],
645 &matrix_size, &pivots[piv_counter], aux,
646 &matrix_size, &ierr);
647 }
648 else
649 {
650 hypre_dpotrs(&uplo, &matrix_size, &one,
651 &domain_matrixinverse[matrix_size_counter],
652 &matrix_size, aux,
653 &matrix_size, &ierr);
654 }
655
656 if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
657 jj = 0;
658 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
659 {
660 j_loc = j_domain_dof[j];
661 if (j_loc < num_variables)
662 x[j_loc] += relax_wt*aux[jj++];
663 else
664 x_ext[j_loc-num_variables] += relax_wt*aux[jj++];
665 }
666 }
667
668 for (i=0; i < num_cols_offd; i++)
669 x_ext[i] -= x_ext_old[i];
670
671 if (comm_pkg)
672 {
673 comm_handle=hypre_ParCSRCommHandleCreate (2,comm_pkg,x_ext,buf_data);
674
675 hypre_ParCSRCommHandleDestroy(comm_handle);
676 comm_handle = NULL;
677
678 index = 0;
679 for (i=0; i < num_sends; i++)
680 {
681 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
682 x[send_map_elmts[j]] += buf_data[index++];
683 }
684
685 hypre_TFree(buf_data, HYPRE_MEMORY_HOST);
686 hypre_TFree(x_ext, HYPRE_MEMORY_HOST);
687 hypre_TFree(x_ext_old, HYPRE_MEMORY_HOST);
688 hypre_TFree(rhs_ext, HYPRE_MEMORY_HOST);
689 }
690 for (i=0; i < num_variables; i++)
691 x[i] *= scale[i];
692
693 hypre_TFree(aux, HYPRE_MEMORY_HOST);
694
695 return hypre_error_flag;
696
697 }
698
hypre_MPSchwarzSolve(hypre_ParCSRMatrix * par_A,hypre_Vector * rhs_vector,hypre_CSRMatrix * domain_structure,hypre_ParVector * par_x,HYPRE_Real relax_wt,hypre_Vector * aux_vector,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)699 HYPRE_Int hypre_MPSchwarzSolve(hypre_ParCSRMatrix *par_A,
700 hypre_Vector *rhs_vector,
701 hypre_CSRMatrix *domain_structure,
702 hypre_ParVector *par_x,
703 HYPRE_Real relax_wt,
704 hypre_Vector *aux_vector, HYPRE_Int *pivots,
705 HYPRE_Int use_nonsymm)
706 {
707 HYPRE_Int ierr = 0;
708 /* HYPRE_Int num_dofs; */
709 HYPRE_Int *i_dof_dof;
710 HYPRE_Int *j_dof_dof;
711 HYPRE_Real *a_dof_dof;
712 HYPRE_Real *x;
713 hypre_Vector *rhs;
714 HYPRE_Real *aux;
715 hypre_CSRMatrix *A;
716 hypre_Vector *x_vector;
717 MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
718 HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
719 HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
720 HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
721 HYPRE_Real *domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
722
723 HYPRE_Int piv_counter = 0;
724 HYPRE_Int one = 1;
725 char uplo = 'L';
726
727 HYPRE_Int jj,i,j,k; /*, j_loc, k_loc;*/
728
729
730 HYPRE_Int matrix_size, matrix_size_counter = 0;
731
732 HYPRE_Int num_procs;
733
734 hypre_MPI_Comm_size(comm,&num_procs);
735
736 /* initiate: ----------------------------------------------- */
737 /* num_dofs = hypre_CSRMatrixNumRows(A); */
738 x_vector = hypre_ParVectorLocalVector(par_x);
739 A = hypre_ParCSRMatrixDiag(par_A);
740 i_dof_dof = hypre_CSRMatrixI(A);
741 j_dof_dof = hypre_CSRMatrixJ(A);
742 a_dof_dof = hypre_CSRMatrixData(A);
743 x = hypre_VectorData(x_vector);
744 aux = hypre_VectorData(aux_vector);
745 /* for (i=0; i < num_dofs; i++)
746 x[i] = 0.e0; */
747
748 if (use_nonsymm)
749 uplo = 'N';
750
751
752 if (num_procs > 1)
753 {
754 hypre_parCorrRes(par_A, par_x, rhs_vector, &rhs);
755 }
756 else
757 {
758 rhs = rhs_vector;
759 }
760
761 /* forward solve: ----------------------------------------------- */
762
763 matrix_size_counter = 0;
764 for (i=0; i < num_domains; i++)
765 {
766 matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
767
768 /* compute residual: ---------------------------------------- */
769
770 jj = 0;
771 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
772 {
773 aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
774 for (k=i_dof_dof[j_domain_dof[j]];
775 k<i_dof_dof[j_domain_dof[j]+1]; k++)
776 aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
777 jj++;
778 }
779 /* solve for correction: ------------------------------------- */
780 if (use_nonsymm)
781 {
782 hypre_dgetrs(&uplo, &matrix_size, &one,
783 &domain_matrixinverse[matrix_size_counter],
784 &matrix_size, &pivots[piv_counter], aux,
785 &matrix_size, &ierr);
786 }
787 else
788 {
789 hypre_dpotrs(&uplo, &matrix_size, &one,
790 &domain_matrixinverse[matrix_size_counter],
791 &matrix_size, aux,
792 &matrix_size, &ierr);
793 }
794
795 if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
796 jj = 0;
797 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
798 {
799 x[j_domain_dof[j]]+= relax_wt * aux[jj++];
800 }
801 matrix_size_counter += matrix_size * matrix_size;
802 piv_counter += matrix_size;
803 }
804
805 /* backward solve: ------------------------------------------------ */
806 for (i=num_domains-1; i > -1; i--)
807 {
808 matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
809 matrix_size_counter -= matrix_size * matrix_size;
810 piv_counter -= matrix_size;
811
812 /* compute residual: ---------------------------------------- */
813
814 jj = 0;
815 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
816 {
817 aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
818 for (k=i_dof_dof[j_domain_dof[j]];
819 k<i_dof_dof[j_domain_dof[j]+1]; k++)
820 aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
821 jj++;
822 }
823
824 /* solve for correction: ------------------------------------- */
825
826 if (use_nonsymm)
827 {
828 hypre_dgetrs(&uplo, &matrix_size, &one,
829 &domain_matrixinverse[matrix_size_counter],
830 &matrix_size, &pivots[piv_counter], aux,
831 &matrix_size, &ierr);
832 }
833 else
834 {
835 hypre_dpotrs(&uplo, &matrix_size, &one,
836 &domain_matrixinverse[matrix_size_counter],
837 &matrix_size, aux,
838 &matrix_size, &ierr);
839 }
840
841 if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
842 jj = 0;
843 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
844 {
845 x[j_domain_dof[j]] += relax_wt*aux[jj++];
846 }
847 }
848
849 if (num_procs > 1)
850 {
851 hypre_SeqVectorDestroy(rhs);
852 }
853
854 return hypre_error_flag;
855 }
856
hypre_MPSchwarzCFSolve(hypre_ParCSRMatrix * par_A,hypre_Vector * rhs_vector,hypre_CSRMatrix * domain_structure,hypre_ParVector * par_x,HYPRE_Real relax_wt,hypre_Vector * aux_vector,HYPRE_Int * CF_marker,HYPRE_Int rlx_pt,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)857 HYPRE_Int hypre_MPSchwarzCFSolve(hypre_ParCSRMatrix *par_A,
858 hypre_Vector *rhs_vector,
859 hypre_CSRMatrix *domain_structure,
860 hypre_ParVector *par_x,
861 HYPRE_Real relax_wt,
862 hypre_Vector *aux_vector,
863 HYPRE_Int *CF_marker,
864 HYPRE_Int rlx_pt, HYPRE_Int *pivots,
865 HYPRE_Int use_nonsymm)
866 {
867 HYPRE_Int ierr = 0;
868 /* HYPRE_Int num_dofs; */
869 HYPRE_Int *i_dof_dof;
870 HYPRE_Int *j_dof_dof;
871 HYPRE_Real *a_dof_dof;
872 HYPRE_Real *x;
873 hypre_Vector *rhs;
874 HYPRE_Real *aux;
875 hypre_CSRMatrix *A;
876 hypre_Vector *x_vector;
877 MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
878 HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
879 HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
880 HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
881 HYPRE_Real *domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
882
883 HYPRE_Int piv_counter = 0;
884 HYPRE_Int one = 1;
885 char uplo = 'L';
886 HYPRE_Int jj,i,j,k; /*, j_loc, k_loc;*/
887
888 HYPRE_Int matrix_size, matrix_size_counter = 0;
889
890 HYPRE_Int num_procs;
891
892 hypre_MPI_Comm_size(comm,&num_procs);
893
894 /* initiate: ----------------------------------------------- */
895 /* num_dofs = hypre_CSRMatrixNumRows(A); */
896 x_vector = hypre_ParVectorLocalVector(par_x);
897 A = hypre_ParCSRMatrixDiag(par_A);
898 i_dof_dof = hypre_CSRMatrixI(A);
899 j_dof_dof = hypre_CSRMatrixJ(A);
900 a_dof_dof = hypre_CSRMatrixData(A);
901 x = hypre_VectorData(x_vector);
902 aux = hypre_VectorData(aux_vector);
903 /* for (i=0; i < num_dofs; i++)
904 x[i] = 0.e0; */
905
906 if (use_nonsymm)
907 uplo = 'N';
908
909 if (num_procs > 1)
910 {
911 hypre_parCorrRes(par_A, par_x, rhs_vector, &rhs);
912 }
913 else
914 {
915 rhs = rhs_vector;
916 }
917
918 /* forward solve: ----------------------------------------------- */
919
920 matrix_size_counter = 0;
921 for (i=0; i < num_domains; i++)
922 {
923 if (CF_marker[i] == rlx_pt)
924 {
925 matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
926
927 /* compute residual: ---------------------------------------- */
928
929 jj = 0;
930 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
931 {
932 aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
933 if (CF_marker[j_domain_dof[j]] == rlx_pt)
934 {
935 for (k=i_dof_dof[j_domain_dof[j]];
936 k<i_dof_dof[j_domain_dof[j]+1]; k++)
937 aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
938 }
939 jj++;
940 }
941 /* solve for correction: ------------------------------------- */
942 if (use_nonsymm)
943 {
944 hypre_dgetrs(&uplo, &matrix_size, &one,
945 &domain_matrixinverse[matrix_size_counter],
946 &matrix_size, &pivots[piv_counter], aux,
947 &matrix_size, &ierr);
948 }
949 else
950 {
951 hypre_dpotrs(&uplo, &matrix_size, &one,
952 &domain_matrixinverse[matrix_size_counter],
953 &matrix_size, aux,
954 &matrix_size, &ierr);
955 }
956
957 if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
958 jj = 0;
959 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
960 {
961 x[j_domain_dof[j]]+= relax_wt * aux[jj++];
962 }
963 matrix_size_counter += matrix_size * matrix_size;
964 piv_counter += matrix_size;
965 }
966 }
967
968 /* backward solve: ------------------------------------------------ */
969 for (i=num_domains-1; i > -1; i--)
970 {
971 if (CF_marker[i] == rlx_pt)
972 {
973 matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
974 matrix_size_counter -= matrix_size * matrix_size;
975 piv_counter -= matrix_size;
976
977 /* compute residual: ---------------------------------------- */
978 jj = 0;
979 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
980 {
981 aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
982 if (CF_marker[j_domain_dof[j]] == rlx_pt)
983 {
984 for (k=i_dof_dof[j_domain_dof[j]];
985 k<i_dof_dof[j_domain_dof[j]+1]; k++)
986 aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
987 }
988 jj++;
989 }
990
991 /* solve for correction: ------------------------------------- */
992 if (use_nonsymm)
993 {
994 hypre_dgetrs(&uplo, &matrix_size, &one,
995 &domain_matrixinverse[matrix_size_counter],
996 &matrix_size, &pivots[piv_counter], aux,
997 &matrix_size, &ierr);
998 }
999 else
1000 {
1001 hypre_dpotrs(&uplo, &matrix_size, &one,
1002 &domain_matrixinverse[matrix_size_counter],
1003 &matrix_size, aux,
1004 &matrix_size, &ierr);
1005 }
1006
1007 if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
1008 jj = 0;
1009 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1010 {
1011 x[j_domain_dof[j]]+= relax_wt*aux[jj++];
1012 }
1013 }
1014 }
1015
1016 if (num_procs > 1)
1017 {
1018 hypre_SeqVectorDestroy(rhs);
1019 }
1020
1021 return hypre_error_flag;
1022 }
1023
hypre_MPSchwarzFWSolve(hypre_ParCSRMatrix * par_A,hypre_Vector * rhs_vector,hypre_CSRMatrix * domain_structure,hypre_ParVector * par_x,HYPRE_Real relax_wt,hypre_Vector * aux_vector,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)1024 HYPRE_Int hypre_MPSchwarzFWSolve(hypre_ParCSRMatrix *par_A,
1025 hypre_Vector *rhs_vector,
1026 hypre_CSRMatrix *domain_structure,
1027 hypre_ParVector *par_x,
1028 HYPRE_Real relax_wt,
1029 hypre_Vector *aux_vector, HYPRE_Int *pivots,
1030 HYPRE_Int use_nonsymm)
1031 {
1032 HYPRE_Int ierr = 0;
1033 /* HYPRE_Int num_dofs; */
1034 HYPRE_Int *i_dof_dof;
1035 HYPRE_Int *j_dof_dof;
1036 HYPRE_Real *a_dof_dof;
1037 HYPRE_Real *x;
1038 hypre_Vector *rhs;
1039 HYPRE_Real *aux;
1040 hypre_CSRMatrix *A;
1041 hypre_Vector *x_vector;
1042 MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
1043 HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
1044 HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
1045 HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
1046 HYPRE_Real *domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
1047
1048
1049 HYPRE_Int piv_counter = 0;
1050 HYPRE_Int one = 1;
1051 char uplo = 'L';
1052 HYPRE_Int jj,i,j,k; /*, j_loc, k_loc;*/
1053
1054
1055 HYPRE_Int matrix_size, matrix_size_counter = 0;
1056
1057 HYPRE_Int num_procs;
1058
1059 hypre_MPI_Comm_size(comm,&num_procs);
1060
1061 /* initiate: ----------------------------------------------- */
1062 /* num_dofs = hypre_CSRMatrixNumRows(A); */
1063 x_vector = hypre_ParVectorLocalVector(par_x);
1064 A = hypre_ParCSRMatrixDiag(par_A);
1065 i_dof_dof = hypre_CSRMatrixI(A);
1066 j_dof_dof = hypre_CSRMatrixJ(A);
1067 a_dof_dof = hypre_CSRMatrixData(A);
1068 x = hypre_VectorData(x_vector);
1069 aux = hypre_VectorData(aux_vector);
1070 /* for (i=0; i < num_dofs; i++)
1071 x[i] = 0.e0; */
1072
1073 if (num_procs > 1)
1074 {
1075 hypre_parCorrRes(par_A, par_x, rhs_vector, &rhs);
1076 }
1077 else
1078 {
1079 rhs = rhs_vector;
1080 }
1081
1082 /* forward solve: ----------------------------------------------- */
1083
1084 matrix_size_counter = 0;
1085 for (i=0; i < num_domains; i++)
1086 {
1087 matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
1088
1089 /* compute residual: ---------------------------------------- */
1090
1091 jj = 0;
1092 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1093 {
1094 aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
1095 for (k=i_dof_dof[j_domain_dof[j]];
1096 k<i_dof_dof[j_domain_dof[j]+1]; k++)
1097 aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
1098 jj++;
1099 }
1100 /* solve for correction: ------------------------------------- */
1101 if (use_nonsymm)
1102 {
1103 hypre_dgetrs(&uplo, &matrix_size, &one,
1104 &domain_matrixinverse[matrix_size_counter],
1105 &matrix_size, &pivots[piv_counter], aux,
1106 &matrix_size, &ierr);
1107 }
1108 else
1109 {
1110 hypre_dpotrs(&uplo, &matrix_size, &one,
1111 &domain_matrixinverse[matrix_size_counter],
1112 &matrix_size, aux,
1113 &matrix_size, &ierr);
1114 }
1115
1116
1117 if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
1118 jj = 0;
1119 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1120 {
1121 x[j_domain_dof[j]]+= relax_wt * aux[jj++];
1122 }
1123 matrix_size_counter += matrix_size * matrix_size;
1124 piv_counter += matrix_size;
1125 }
1126
1127 if (num_procs > 1)
1128 {
1129 hypre_SeqVectorDestroy(rhs);
1130 }
1131
1132 return hypre_error_flag;
1133 }
1134
hypre_MPSchwarzCFFWSolve(hypre_ParCSRMatrix * par_A,hypre_Vector * rhs_vector,hypre_CSRMatrix * domain_structure,hypre_ParVector * par_x,HYPRE_Real relax_wt,hypre_Vector * aux_vector,HYPRE_Int * CF_marker,HYPRE_Int rlx_pt,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)1135 HYPRE_Int hypre_MPSchwarzCFFWSolve(hypre_ParCSRMatrix *par_A,
1136 hypre_Vector *rhs_vector,
1137 hypre_CSRMatrix *domain_structure,
1138 hypre_ParVector *par_x,
1139 HYPRE_Real relax_wt,
1140 hypre_Vector *aux_vector,
1141 HYPRE_Int *CF_marker,
1142 HYPRE_Int rlx_pt, HYPRE_Int *pivots,
1143 HYPRE_Int use_nonsymm)
1144 {
1145 HYPRE_Int ierr = 0;
1146 /* HYPRE_Int num_dofs; */
1147 HYPRE_Int *i_dof_dof;
1148 HYPRE_Int *j_dof_dof;
1149 HYPRE_Real *a_dof_dof;
1150 HYPRE_Real *x;
1151 hypre_Vector *rhs;
1152 HYPRE_Real *aux;
1153 hypre_CSRMatrix *A;
1154 hypre_Vector *x_vector;
1155 MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
1156 HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
1157 HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
1158 HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
1159 HYPRE_Real *domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
1160
1161 HYPRE_Int piv_counter = 0;
1162 HYPRE_Int one = 1;
1163
1164 char uplo = 'L';
1165 HYPRE_Int jj,i,j,k; /*, j_loc, k_loc;*/
1166
1167
1168 HYPRE_Int matrix_size, matrix_size_counter = 0;
1169
1170 HYPRE_Int num_procs;
1171
1172 hypre_MPI_Comm_size(comm,&num_procs);
1173
1174 /* initiate: ----------------------------------------------- */
1175 /* num_dofs = hypre_CSRMatrixNumRows(A); */
1176 x_vector = hypre_ParVectorLocalVector(par_x);
1177 A = hypre_ParCSRMatrixDiag(par_A);
1178 i_dof_dof = hypre_CSRMatrixI(A);
1179 j_dof_dof = hypre_CSRMatrixJ(A);
1180 a_dof_dof = hypre_CSRMatrixData(A);
1181 x = hypre_VectorData(x_vector);
1182 aux = hypre_VectorData(aux_vector);
1183 /* for (i=0; i < num_dofs; i++)
1184 x[i] = 0.e0; */
1185
1186 if (use_nonsymm)
1187 uplo = 'N';
1188
1189 if (num_procs > 1)
1190 {
1191 hypre_parCorrRes(par_A, par_x, rhs_vector, &rhs);
1192 }
1193 else
1194 {
1195 rhs = rhs_vector;
1196 }
1197
1198 /* forward solve: ----------------------------------------------- */
1199
1200 matrix_size_counter = 0;
1201 for (i=0; i < num_domains; i++)
1202 {
1203 if (CF_marker[i] == rlx_pt)
1204 {
1205 matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
1206
1207 /* compute residual: ---------------------------------------- */
1208
1209 jj = 0;
1210 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1211 {
1212 aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
1213 if (CF_marker[j_domain_dof[j]] == rlx_pt)
1214 {
1215 for (k=i_dof_dof[j_domain_dof[j]];
1216 k<i_dof_dof[j_domain_dof[j]+1]; k++)
1217 aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
1218 }
1219 jj++;
1220 }
1221 /* solve for correction: ------------------------------------- */
1222 if (use_nonsymm)
1223 {
1224 hypre_dgetrs(&uplo, &matrix_size, &one,
1225 &domain_matrixinverse[matrix_size_counter],
1226 &matrix_size, &pivots[piv_counter], aux,
1227 &matrix_size, &ierr);
1228 }
1229
1230 else
1231 {
1232 hypre_dpotrs(&uplo, &matrix_size, &one,
1233 &domain_matrixinverse[matrix_size_counter],
1234 &matrix_size, aux,
1235 &matrix_size, &ierr);
1236 }
1237
1238 if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
1239 jj = 0;
1240 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1241 {
1242 x[j_domain_dof[j]]+= relax_wt * aux[jj++];
1243 }
1244 matrix_size_counter += matrix_size * matrix_size;
1245 piv_counter += matrix_size;
1246 }
1247 }
1248
1249 if (num_procs > 1)
1250 {
1251 hypre_SeqVectorDestroy(rhs);
1252 }
1253
1254 return hypre_error_flag;
1255 }
1256
1257 HYPRE_Int
transpose_matrix_create(HYPRE_Int ** i_face_element_pointer,HYPRE_Int ** j_face_element_pointer,HYPRE_Int * i_element_face,HYPRE_Int * j_element_face,HYPRE_Int num_elements,HYPRE_Int num_faces)1258 transpose_matrix_create( HYPRE_Int **i_face_element_pointer,
1259 HYPRE_Int **j_face_element_pointer,
1260 HYPRE_Int *i_element_face, HYPRE_Int *j_element_face,
1261 HYPRE_Int num_elements, HYPRE_Int num_faces)
1262 {
1263 /* FILE *f; */
1264 HYPRE_Int i, j;
1265
1266 HYPRE_Int *i_face_element, *j_face_element;
1267
1268 /* ======================================================================
1269 first create face_element graph: -------------------------------------
1270 ====================================================================== */
1271
1272 i_face_element = hypre_TAlloc(HYPRE_Int, (num_faces+1) , HYPRE_MEMORY_HOST);
1273 j_face_element = hypre_TAlloc(HYPRE_Int, i_element_face[num_elements] , HYPRE_MEMORY_HOST);
1274
1275
1276 for (i=0; i < num_faces; i++)
1277 i_face_element[i] = 0;
1278
1279 for (i=0; i < num_elements; i++)
1280 for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
1281 i_face_element[j_element_face[j]]++;
1282
1283 i_face_element[num_faces] = i_element_face[num_elements];
1284
1285 for (i=num_faces-1; i > -1; i--)
1286 i_face_element[i] = i_face_element[i+1] - i_face_element[i];
1287
1288 for (i=0; i < num_elements; i++)
1289 for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
1290 {
1291 j_face_element[i_face_element[j_element_face[j]]] = i;
1292 i_face_element[j_element_face[j]]++;
1293 }
1294
1295 for (i=num_faces-1; i > -1; i--)
1296 i_face_element[i+1] = i_face_element[i];
1297
1298 i_face_element[0] = 0;
1299
1300 /* hypre_printf("end building face--element graph: ++++++++++++++++++\n"); */
1301
1302 /* END building face_element graph; ================================ */
1303
1304 *i_face_element_pointer = i_face_element;
1305 *j_face_element_pointer = j_face_element;
1306
1307 return 0;
1308
1309 }
1310 HYPRE_Int
matrix_matrix_product(HYPRE_Int ** i_element_edge_pointer,HYPRE_Int ** j_element_edge_pointer,HYPRE_Int * i_element_face,HYPRE_Int * j_element_face,HYPRE_Int * i_face_edge,HYPRE_Int * j_face_edge,HYPRE_Int num_elements,HYPRE_Int num_faces,HYPRE_Int num_edges)1311 matrix_matrix_product( HYPRE_Int **i_element_edge_pointer,
1312 HYPRE_Int **j_element_edge_pointer,
1313 HYPRE_Int *i_element_face, HYPRE_Int *j_element_face,
1314 HYPRE_Int *i_face_edge, HYPRE_Int *j_face_edge,
1315 HYPRE_Int num_elements, HYPRE_Int num_faces, HYPRE_Int num_edges)
1316 {
1317 /* FILE *f; */
1318 HYPRE_Int i, j, k, l, m;
1319
1320 HYPRE_Int i_edge_on_local_list, i_edge_on_list;
1321 HYPRE_Int local_element_edge_counter = 0, element_edge_counter = 0;
1322 HYPRE_Int *j_local_element_edge;
1323
1324 HYPRE_Int *i_element_edge, *j_element_edge;
1325
1326 j_local_element_edge = hypre_TAlloc(HYPRE_Int, (num_edges+1) , HYPRE_MEMORY_HOST);
1327
1328 i_element_edge = hypre_TAlloc(HYPRE_Int, (num_elements+1) , HYPRE_MEMORY_HOST);
1329
1330 for (i=0; i < num_elements+1; i++)
1331 i_element_edge[i] = 0;
1332
1333 for (i=0; i < num_elements; i++)
1334 {
1335 local_element_edge_counter = 0;
1336 for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
1337 {
1338 k = j_element_face[j];
1339
1340 for (l=i_face_edge[k]; l < i_face_edge[k+1]; l++)
1341 {
1342 /* element i and edge j_face_edge[l] are connected */
1343
1344 /* hypre_printf("element %d contains edge %d;\n",
1345 i, j_face_edge[l]); */
1346
1347 i_edge_on_local_list = -1;
1348 for (m=0; m < local_element_edge_counter; m++)
1349 if (j_local_element_edge[m] == j_face_edge[l])
1350 {
1351 i_edge_on_local_list++;
1352 break;
1353 }
1354
1355 if (i_edge_on_local_list == -1)
1356 {
1357 i_element_edge[i]++;
1358 j_local_element_edge[local_element_edge_counter]=
1359 j_face_edge[l];
1360 local_element_edge_counter++;
1361 }
1362 }
1363 }
1364 }
1365
1366 hypre_TFree(j_local_element_edge, HYPRE_MEMORY_HOST);
1367
1368 for (i=0; i < num_elements; i++)
1369 i_element_edge[i+1] += i_element_edge[i];
1370
1371 for (i=num_elements; i>0; i--)
1372 i_element_edge[i] = i_element_edge[i-1];
1373
1374 i_element_edge[0] = 0;
1375
1376 j_element_edge = hypre_TAlloc(HYPRE_Int, i_element_edge[num_elements]
1377 , HYPRE_MEMORY_HOST);
1378
1379 /* fill--in the actual j_element_edge array: --------------------- */
1380
1381 element_edge_counter = 0;
1382 for (i=0; i < num_elements; i++)
1383 {
1384 i_element_edge[i] = element_edge_counter;
1385 for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
1386 {
1387 for (k=i_face_edge[j_element_face[j]];
1388 k<i_face_edge[j_element_face[j]+1];k++)
1389 {
1390 /* check if edge j_face_edge[k] is already on list ***/
1391
1392 i_edge_on_list = -1;
1393 for (l=i_element_edge[i];
1394 l<element_edge_counter; l++)
1395 if (j_element_edge[l] == j_face_edge[k])
1396 {
1397 i_edge_on_list++;
1398 break;
1399 }
1400
1401 if (i_edge_on_list == -1)
1402 {
1403 if (element_edge_counter >=
1404 i_element_edge[num_elements])
1405 {
1406 hypre_error_w_msg(HYPRE_ERROR_GENERIC,"error in j_element_edge size: \n");
1407 break;
1408 }
1409
1410 j_element_edge[element_edge_counter] =
1411 j_face_edge[k];
1412 element_edge_counter++;
1413 }
1414 }
1415 }
1416
1417 }
1418
1419 i_element_edge[num_elements] = element_edge_counter;
1420
1421 /*------------------------------------------------------------------
1422 f = fopen("element_edge", "w");
1423 for (i=0; i < num_elements; i++)
1424 {
1425 hypre_printf("\nelement: %d has edges:\n", i);
1426 for (j=i_element_edge[i]; j < i_element_edge[i+1]; j++)
1427 {
1428 hypre_printf("%d ", j_element_edge[j]);
1429 hypre_fprintf(f, "%d %d\n", i, j_element_edge[j]);
1430 }
1431
1432 hypre_printf("\n");
1433 }
1434
1435 fclose(f);
1436 */
1437
1438 /* hypre_printf("end element_edge computation: ++++++++++++++++++++++++ \n");*/
1439
1440 *i_element_edge_pointer = i_element_edge;
1441 *j_element_edge_pointer = j_element_edge;
1442
1443 return hypre_error_flag;
1444 }
1445
1446 /*--------------------------------------------------------------------------
1447 * hypre_AMGCreateDomainDof:
1448 *--------------------------------------------------------------------------*/
1449
1450 /*****************************************************************************
1451 *
1452 * Routine for constructing graph domain_dof with minimal overlap
1453 * and computing the respective matrix inverses to be
1454 * used in an overlapping Schwarz procedure (like smoother
1455 * in AMG);
1456 *
1457 *****************************************************************************/
1458 HYPRE_Int
hypre_AMGCreateDomainDof(hypre_CSRMatrix * A,HYPRE_Int domain_type,HYPRE_Int overlap,HYPRE_Int num_functions,HYPRE_Int * dof_func,hypre_CSRMatrix ** domain_structure_pointer,HYPRE_Int ** piv_pointer,HYPRE_Int use_nonsymm)1459 hypre_AMGCreateDomainDof(hypre_CSRMatrix *A,
1460 HYPRE_Int domain_type, HYPRE_Int overlap,
1461 HYPRE_Int num_functions, HYPRE_Int *dof_func,
1462 hypre_CSRMatrix **domain_structure_pointer,
1463 HYPRE_Int **piv_pointer, HYPRE_Int use_nonsymm)
1464 {
1465
1466 HYPRE_Int *i_domain_dof, *j_domain_dof;
1467 HYPRE_Real *domain_matrixinverse;
1468 HYPRE_Int num_domains;
1469 hypre_CSRMatrix *domain_structure = NULL;
1470
1471 HYPRE_Int *i_dof_dof = hypre_CSRMatrixI(A);
1472 HYPRE_Int *j_dof_dof = hypre_CSRMatrixJ(A);
1473 HYPRE_Real *a_dof_dof = hypre_CSRMatrixData(A);
1474 HYPRE_Int num_dofs = hypre_CSRMatrixNumRows(A);
1475
1476 /* HYPRE_Int *i_dof_to_accept_weight; */
1477 HYPRE_Int *i_dof_to_prefer_weight,
1478 *w_dof_dof, *i_dof_weight;
1479 HYPRE_Int *i_dof_to_aggregate, *i_aggregate_dof, *j_aggregate_dof;
1480
1481 HYPRE_Int *i_dof_index;
1482
1483 HYPRE_Int ierr = 0;
1484 HYPRE_Int i,j,k, l_loc, i_loc, j_loc;
1485 HYPRE_Int i_dof;
1486 HYPRE_Int *i_local_to_global;
1487 HYPRE_Int *i_global_to_local;
1488
1489 HYPRE_Int local_dof_counter, max_local_dof_counter=0;
1490
1491 HYPRE_Int domain_dof_counter = 0, domain_matrixinverse_counter = 0;
1492 HYPRE_Int nf;
1493
1494 HYPRE_Real *AE;
1495
1496 HYPRE_Int piv_counter = 0;
1497 HYPRE_Int *ipiv;
1498 HYPRE_Int *piv = NULL;
1499 char uplo = 'L';
1500 HYPRE_Int cnt;
1501
1502 /* --------------------------------------------------------------------- */
1503
1504 /*=======================================================================*/
1505 /* create artificial domains by agglomeration; */
1506 /*=======================================================================*/
1507
1508 /*hypre_printf("----------- create artificials domain by agglomeration; ======\n");
1509 */
1510
1511 if (num_dofs == 0)
1512 {
1513 *domain_structure_pointer = domain_structure;
1514
1515 *piv_pointer = piv;
1516
1517 return hypre_error_flag;
1518 }
1519
1520 i_aggregate_dof = hypre_CTAlloc(HYPRE_Int, num_dofs+1, HYPRE_MEMORY_HOST);
1521 j_aggregate_dof= hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
1522
1523 if (domain_type == 2)
1524 {
1525 i_dof_to_prefer_weight = hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
1526 w_dof_dof = hypre_CTAlloc(HYPRE_Int, i_dof_dof[num_dofs], HYPRE_MEMORY_HOST);
1527 i_dof_weight = hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
1528
1529 for (i=0; i<num_dofs; i++)
1530 for (j=i_dof_dof[i]; j< i_dof_dof[i+1]; j++)
1531 {
1532 if (j_dof_dof[j] == i)
1533 w_dof_dof[j]=0;
1534 else
1535 w_dof_dof[j]=1;
1536 }
1537
1538 /*hypre_printf("end computing weights for agglomeration procedure: --------\n");
1539 */
1540 hypre_AMGeAgglomerate(i_aggregate_dof, j_aggregate_dof,
1541 i_dof_dof, j_dof_dof, w_dof_dof,
1542 i_dof_dof, j_dof_dof,
1543 i_dof_dof, j_dof_dof,
1544 i_dof_to_prefer_weight,
1545 i_dof_weight,
1546 num_dofs, num_dofs,
1547 &num_domains);
1548
1549 hypre_TFree(i_dof_to_prefer_weight, HYPRE_MEMORY_HOST);
1550 hypre_TFree(i_dof_weight, HYPRE_MEMORY_HOST);
1551 hypre_TFree(w_dof_dof, HYPRE_MEMORY_HOST);
1552 }
1553 else
1554 {
1555 nf = 1;
1556 if (domain_type == 1) nf = num_functions;
1557
1558 num_domains = num_dofs/nf;
1559 for (i=0; i < num_domains+1; i++)
1560 {
1561 i_aggregate_dof[i] = nf*i;
1562 }
1563 for (i=0; i < num_dofs; i++)
1564 {
1565 j_aggregate_dof[i] = i;
1566 }
1567 }
1568 /*hypre_printf("num_dofs: %d, num_domains: %d\n", num_dofs, num_domains);*/
1569
1570
1571 /*
1572 hypre_printf("========================================================\n");
1573 hypre_printf("== artificial non--overlapping domains (aggregates): ===\n");
1574 hypre_printf("========================================================\n");
1575
1576
1577 for (i=0; i < num_domains; i++)
1578 {
1579 hypre_printf("\n aggregate %d:\n", i);
1580 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1581 hypre_printf("%d, ", j_aggregate_dof[j]);
1582
1583 hypre_printf("\n");
1584 }
1585 */
1586
1587 /* make domains from aggregates: *********************************/
1588
1589 if (overlap == 1)
1590 {
1591 i_domain_dof = hypre_CTAlloc(HYPRE_Int, num_domains+1, HYPRE_MEMORY_HOST);
1592
1593 i_dof_index = hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
1594
1595 for (i=0; i < num_dofs; i++)
1596 i_dof_index[i] = -1;
1597
1598 i_dof_to_aggregate = hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
1599 for (i=0; i < num_domains; i++)
1600 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1601 i_dof_to_aggregate[j_aggregate_dof[j]] = i;
1602
1603 domain_dof_counter=0;
1604 for (i=0; i < num_domains; i++)
1605 {
1606 i_domain_dof[i] = domain_dof_counter;
1607 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1608 i_dof_index[j_aggregate_dof[j]]=-1;
1609
1610 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1611 for (k=i_dof_dof[j_aggregate_dof[j]];
1612 k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1613 if (i_dof_to_aggregate[j_dof_dof[k]] >= i
1614 && i_dof_index[j_dof_dof[k]]==-1)
1615 {
1616 i_dof_index[j_dof_dof[k]]++;
1617 domain_dof_counter++;
1618 }
1619 }
1620
1621 i_domain_dof[num_domains] = domain_dof_counter;
1622 j_domain_dof = hypre_CTAlloc(HYPRE_Int, domain_dof_counter, HYPRE_MEMORY_HOST);
1623
1624 for (i=0; i < num_dofs; i++)
1625 i_dof_index[i] = -1;
1626
1627 domain_dof_counter=0;
1628 for (i=0; i < num_domains; i++)
1629 {
1630 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1631 i_dof_index[j_aggregate_dof[j]]=-1;
1632
1633 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1634 for (k=i_dof_dof[j_aggregate_dof[j]];
1635 k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1636 if (i_dof_to_aggregate[j_dof_dof[k]] >= i
1637 && i_dof_index[j_dof_dof[k]]==-1)
1638 {
1639 i_dof_index[j_dof_dof[k]]++;
1640 j_domain_dof[domain_dof_counter] = j_dof_dof[k];
1641 domain_dof_counter++;
1642 }
1643 }
1644
1645 hypre_TFree(i_aggregate_dof, HYPRE_MEMORY_HOST);
1646 hypre_TFree(j_aggregate_dof, HYPRE_MEMORY_HOST);
1647 hypre_TFree(i_dof_to_aggregate, HYPRE_MEMORY_HOST);
1648 hypre_TFree(i_dof_index, HYPRE_MEMORY_HOST);
1649 }
1650 else if (overlap == 2)
1651 {
1652 i_domain_dof = hypre_CTAlloc(HYPRE_Int, num_domains+1, HYPRE_MEMORY_HOST);
1653
1654 i_dof_index = hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
1655
1656 for (i=0; i < num_dofs; i++)
1657 i_dof_index[i] = -1;
1658
1659 domain_dof_counter=0;
1660 for (i=0; i < num_domains; i++)
1661 {
1662 i_domain_dof[i] = domain_dof_counter;
1663 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1664 for (k=i_dof_dof[j_aggregate_dof[j]];
1665 k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1666 if (i_dof_index[j_dof_dof[k]]==-1)
1667 {
1668 i_dof_index[j_dof_dof[k]]++;
1669 domain_dof_counter++;
1670 }
1671
1672 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1673 for (k=i_dof_dof[j_aggregate_dof[j]];
1674 k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1675 i_dof_index[j_dof_dof[k]]=-1;
1676
1677 }
1678
1679 for (i=0; i < num_dofs; i++)
1680 i_dof_index[i] = -1;
1681
1682 i_domain_dof[num_domains] = domain_dof_counter;
1683 j_domain_dof = hypre_CTAlloc(HYPRE_Int, domain_dof_counter, HYPRE_MEMORY_HOST);
1684
1685 domain_dof_counter=0;
1686 for (i=0; i < num_domains; i++)
1687 {
1688 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1689 for (k=i_dof_dof[j_aggregate_dof[j]];
1690 k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1691 if ( i_dof_index[j_dof_dof[k]]==-1)
1692 {
1693 i_dof_index[j_dof_dof[k]]++;
1694 j_domain_dof[domain_dof_counter] = j_dof_dof[k];
1695 domain_dof_counter++;
1696 }
1697
1698 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1699 for (k=i_dof_dof[j_aggregate_dof[j]];
1700 k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1701 i_dof_index[j_dof_dof[k]]=-1;
1702 }
1703
1704 hypre_TFree(i_aggregate_dof, HYPRE_MEMORY_HOST);
1705 hypre_TFree(j_aggregate_dof, HYPRE_MEMORY_HOST);
1706 hypre_TFree(i_dof_index, HYPRE_MEMORY_HOST);
1707 }
1708 else
1709 {
1710 i_domain_dof = i_aggregate_dof;
1711 j_domain_dof = j_aggregate_dof;
1712 }
1713
1714 /*hypre_printf("END domain_dof computations: =================================\n");
1715 */
1716 domain_matrixinverse_counter = 0;
1717 local_dof_counter = 0;
1718 piv_counter = 0;
1719
1720 for (i=0; i < num_domains; i++)
1721 {
1722 local_dof_counter = i_domain_dof[i+1]-i_domain_dof[i];
1723 domain_matrixinverse_counter+= local_dof_counter * local_dof_counter;
1724 piv_counter += local_dof_counter;
1725
1726 if (local_dof_counter > max_local_dof_counter)
1727 max_local_dof_counter = local_dof_counter;
1728 }
1729
1730 domain_matrixinverse = hypre_CTAlloc(HYPRE_Real, domain_matrixinverse_counter, HYPRE_MEMORY_HOST);
1731 if (use_nonsymm)
1732 piv = hypre_CTAlloc(HYPRE_Int, piv_counter, HYPRE_MEMORY_HOST);
1733
1734 i_local_to_global = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter, HYPRE_MEMORY_HOST);
1735
1736 i_global_to_local = hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
1737
1738 for (i=0; i < num_dofs; i++)
1739 i_global_to_local[i] = -1;
1740
1741 piv_counter = 0;
1742 domain_matrixinverse_counter = 0;
1743 for (i=0; i < num_domains; i++)
1744 {
1745 local_dof_counter = 0;
1746 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1747 {
1748 i_global_to_local[j_domain_dof[j]] = local_dof_counter;
1749 i_local_to_global[local_dof_counter] = j_domain_dof[j];
1750 local_dof_counter++;
1751 }
1752
1753 /* get local matrix in AE: ======================================== */
1754 cnt = 0;
1755
1756 AE = &domain_matrixinverse[domain_matrixinverse_counter];
1757 ipiv = &piv[piv_counter];
1758 for (i_loc=0; i_loc < local_dof_counter; i_loc++)
1759 for (j_loc=0; j_loc < local_dof_counter; j_loc++)
1760 AE[cnt++] = 0.e0;
1761
1762 for (i_loc=0; i_loc < local_dof_counter; i_loc++)
1763 {
1764 i_dof = i_local_to_global[i_loc];
1765 for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++)
1766 {
1767 j_loc = i_global_to_local[j_dof_dof[j]];
1768 if (j_loc >=0)
1769 AE[i_loc + j_loc * local_dof_counter] = a_dof_dof[j];
1770 }
1771 }
1772
1773 if (use_nonsymm)
1774 {
1775 hypre_dgetrf(&local_dof_counter,
1776 &local_dof_counter, AE,
1777 &local_dof_counter, ipiv, &ierr);
1778 piv_counter +=local_dof_counter;
1779 }
1780
1781 else
1782 {
1783 hypre_dpotrf(&uplo,&local_dof_counter, AE,
1784 &local_dof_counter, &ierr);
1785 }
1786
1787 domain_matrixinverse_counter+=local_dof_counter*local_dof_counter;
1788
1789 for (l_loc=0; l_loc < local_dof_counter; l_loc++)
1790 i_global_to_local[i_local_to_global[l_loc]] = -1;
1791
1792 }
1793
1794 hypre_TFree(i_local_to_global, HYPRE_MEMORY_HOST);
1795 hypre_TFree(i_global_to_local, HYPRE_MEMORY_HOST);
1796
1797 domain_structure = hypre_CSRMatrixCreate(num_domains, max_local_dof_counter,
1798 i_domain_dof[num_domains]);
1799
1800 hypre_CSRMatrixMemoryLocation(domain_structure) = HYPRE_MEMORY_HOST;
1801
1802 hypre_CSRMatrixI(domain_structure) = i_domain_dof;
1803 hypre_CSRMatrixJ(domain_structure) = j_domain_dof;
1804 hypre_CSRMatrixData(domain_structure) = domain_matrixinverse;
1805
1806 *domain_structure_pointer = domain_structure;
1807
1808 *piv_pointer = piv;
1809
1810 return hypre_error_flag;
1811 }
1812
1813 /* unacceptable faces: i_face_to_prefer_weight[] = -1; ------------------*/
1814
hypre_AMGeAgglomerate(HYPRE_Int * i_AE_element,HYPRE_Int * j_AE_element,HYPRE_Int * i_face_face,HYPRE_Int * j_face_face,HYPRE_Int * w_face_face,HYPRE_Int * i_face_element,HYPRE_Int * j_face_element,HYPRE_Int * i_element_face,HYPRE_Int * j_element_face,HYPRE_Int * i_face_to_prefer_weight,HYPRE_Int * i_face_weight,HYPRE_Int num_faces,HYPRE_Int num_elements,HYPRE_Int * num_AEs_pointer)1815 HYPRE_Int hypre_AMGeAgglomerate(HYPRE_Int *i_AE_element, HYPRE_Int *j_AE_element,
1816 HYPRE_Int *i_face_face, HYPRE_Int *j_face_face, HYPRE_Int *w_face_face,
1817 HYPRE_Int *i_face_element, HYPRE_Int *j_face_element,
1818 HYPRE_Int *i_element_face, HYPRE_Int *j_element_face,
1819 HYPRE_Int *i_face_to_prefer_weight,
1820 HYPRE_Int *i_face_weight,
1821 HYPRE_Int num_faces, HYPRE_Int num_elements,
1822 HYPRE_Int *num_AEs_pointer)
1823 {
1824 HYPRE_Int ierr = 0;
1825 HYPRE_Int i, j, k, l;
1826
1827 HYPRE_Int face_to_eliminate;
1828 HYPRE_Int max_weight_old, max_weight;
1829
1830 HYPRE_Int AE_counter=0, AE_element_counter=0;
1831
1832 /* HYPRE_Int i_element_face_counter; */
1833
1834 HYPRE_Int *i_element_to_AE;
1835
1836 HYPRE_Int *previous, *next, *first;
1837 HYPRE_Int head, tail, last;
1838
1839 HYPRE_Int face_max_weight, face_local_max_weight, preferred_weight;
1840
1841 HYPRE_Int weight, weight_max;
1842
1843 max_weight = 1;
1844 for (i=0; i < num_faces; i++)
1845 {
1846 weight = 1;
1847 for (j=i_face_face[i]; j < i_face_face[i+1]; j++)
1848 weight+= w_face_face[j];
1849 if (max_weight < weight) max_weight = weight;
1850 }
1851
1852 first = hypre_CTAlloc(HYPRE_Int, max_weight+1, HYPRE_MEMORY_HOST);
1853
1854 next = hypre_CTAlloc(HYPRE_Int, num_faces, HYPRE_MEMORY_HOST);
1855
1856 previous = hypre_CTAlloc(HYPRE_Int, num_faces+1, HYPRE_MEMORY_HOST);
1857
1858 tail = num_faces;
1859 head = -1;
1860
1861 for (i=0; i < num_faces; i++)
1862 {
1863 next[i] = i+1;
1864 previous[i] = i-1;
1865 }
1866
1867 last = num_faces-1;
1868 previous[tail] = last;
1869
1870 for (weight=1; weight <= max_weight; weight++)
1871 first[weight] = tail;
1872
1873 i_element_to_AE = hypre_CTAlloc(HYPRE_Int, num_elements, HYPRE_MEMORY_HOST);
1874
1875 /*=======================================================================
1876 AGGLOMERATION PROCEDURE:
1877 ======================================================================= */
1878
1879 for (k=0; k < num_elements; k++)
1880 i_element_to_AE[k] = -1;
1881
1882 for (k=0; k < num_faces; k++)
1883 i_face_weight[k] = 1;
1884
1885 first[0] = 0;
1886 first[1] = 0;
1887
1888 last = previous[tail];
1889 weight_max = i_face_weight[last];
1890
1891 k = last;
1892 face_max_weight = -1;
1893 while (k!= head)
1894 {
1895 if (i_face_to_prefer_weight[k] > -1)
1896 face_max_weight = k;
1897
1898 if (face_max_weight > -1) break;
1899
1900 k=previous[k];
1901 }
1902
1903 /* this will be used if the faces have been sorted: *****************
1904 k = last;
1905 face_max_weight = -1;
1906 while (k != head)
1907 {
1908 if (i_face_to_prefer_weight[k] > -1)
1909 face_max_weight = k;
1910
1911
1912 if (face_max_weight > -1)
1913 {
1914 max_weight = i_face_weight[face_max_weight];
1915 l = face_max_weight;
1916
1917 while (previous[l] != head)
1918 {
1919
1920 if (i_face_weight[previous[l]] < max_weight)
1921 break;
1922 else
1923 if (i_face_to_prefer_weight[previous[l]] >
1924 i_face_to_prefer_weight[face_max_weight])
1925 {
1926 l = previous[l];
1927 face_max_weight = l;
1928 }
1929 else
1930 l = previous[l];
1931 }
1932
1933 break;
1934 }
1935
1936 l =previous[k];
1937
1938 weight = i_face_weight[k];
1939 last = previous[tail];
1940 if (last == head)
1941 weight_max = 0;
1942 else
1943 weight_max = i_face_weight[last];
1944
1945 ierr = hypre_remove_entry(weight, &weight_max,
1946 previous, next, first, &last,
1947 head, tail,
1948 k);
1949
1950 k=l;
1951 }
1952 */
1953
1954 if (face_max_weight == -1)
1955 {
1956 hypre_error_w_msg(HYPRE_ERROR_GENERIC,"all faces are unacceptable, i.e., no faces to eliminate !\n");
1957
1958 *num_AEs_pointer = 1;
1959
1960 i_AE_element[0] = 0;
1961 for (i=0; i < num_elements; i++)
1962 {
1963 i_element_to_AE[i] = 0;
1964 j_AE_element[i] = i;
1965 }
1966
1967 i_AE_element[1] = num_elements;
1968
1969 return hypre_error_flag;
1970 }
1971
1972 for (k=0; k < num_faces; k++)
1973 if (i_face_to_prefer_weight[k] > i_face_to_prefer_weight[face_max_weight])
1974 face_max_weight = k;
1975
1976 max_weight = i_face_weight[face_max_weight];
1977
1978 AE_counter=0;
1979 AE_element_counter=0;
1980
1981 i_AE_element[AE_counter] = AE_element_counter;
1982
1983 max_weight_old = -1;
1984
1985 face_local_max_weight = face_max_weight;
1986
1987 eliminate_face:
1988
1989 face_to_eliminate = face_local_max_weight;
1990
1991 max_weight = i_face_weight[face_to_eliminate];
1992
1993 last = previous[tail];
1994 if (last == head)
1995 weight_max = 0;
1996 else
1997 weight_max = i_face_weight[last];
1998
1999 ierr = hypre_remove_entry(max_weight, &weight_max,
2000 previous, next, first, &last,
2001 head, tail,
2002 face_to_eliminate);
2003
2004 i_face_weight[face_to_eliminate] = 0;
2005
2006 /*----------------------------------------------------------
2007 * agglomeration step:
2008 *
2009 * put on AE_element -- list all elements
2010 * that share face "face_to_eliminate";
2011 *----------------------------------------------------------*/
2012
2013 for (k = i_face_element[face_to_eliminate];
2014 k < i_face_element[face_to_eliminate+1]; k++)
2015 {
2016 /* check if element j_face_element[k] is already on the list: */
2017
2018 if (j_face_element[k] < num_elements)
2019 {
2020 if (i_element_to_AE[j_face_element[k]] == -1)
2021 {
2022 j_AE_element[AE_element_counter] = j_face_element[k];
2023 i_element_to_AE[j_face_element[k]] = AE_counter;
2024 AE_element_counter++;
2025 }
2026 }
2027 }
2028
2029 /* local update & search:==================================== */
2030
2031 for (j=i_face_face[face_to_eliminate];
2032 j<i_face_face[face_to_eliminate+1]; j++)
2033 if (i_face_weight[j_face_face[j]] > 0)
2034 {
2035 weight = i_face_weight[j_face_face[j]];
2036
2037
2038 last = previous[tail];
2039 if (last == head)
2040 weight_max = 0;
2041 else
2042 weight_max = i_face_weight[last];
2043
2044 ierr = hypre_move_entry(weight, &weight_max,
2045 previous, next, first, &last,
2046 head, tail,
2047 j_face_face[j]);
2048
2049 i_face_weight[j_face_face[j]]+=w_face_face[j];
2050
2051 weight = i_face_weight[j_face_face[j]];
2052
2053 /* hypre_printf("update entry: %d\n", j_face_face[j]); */
2054
2055 last = previous[tail];
2056 if (last == head)
2057 weight_max = 0;
2058 else
2059 weight_max = i_face_weight[last];
2060
2061 ierr = hypre_update_entry(weight, &weight_max,
2062 previous, next, first, &last,
2063 head, tail,
2064 j_face_face[j]);
2065
2066 last = previous[tail];
2067 if (last == head)
2068 weight_max = 0;
2069 else
2070 weight_max = i_face_weight[last];
2071 }
2072
2073 /* find a face of the elements that have already been agglomerated
2074 with a maximal weight: ====================================== */
2075
2076 max_weight_old = max_weight;
2077
2078 face_local_max_weight = -1;
2079 preferred_weight = -1;
2080
2081 for (l = i_AE_element[AE_counter];
2082 l < AE_element_counter; l++)
2083 {
2084 for (j=i_element_face[j_AE_element[l]];
2085 j<i_element_face[j_AE_element[l]+1]; j++)
2086 {
2087 i = j_element_face[j];
2088
2089 if (max_weight_old > 1 && i_face_weight[i] > 0 &&
2090 i_face_to_prefer_weight[i] > -1)
2091 {
2092 if ( max_weight < i_face_weight[i])
2093 {
2094 face_local_max_weight = i;
2095 max_weight = i_face_weight[i];
2096 preferred_weight = i_face_to_prefer_weight[i];
2097 }
2098
2099 if ( max_weight == i_face_weight[i]
2100 && i_face_to_prefer_weight[i] > preferred_weight)
2101 {
2102 face_local_max_weight = i;
2103 preferred_weight = i_face_to_prefer_weight[i];
2104 }
2105 }
2106 }
2107 }
2108
2109 if (face_local_max_weight > -1) goto eliminate_face;
2110
2111 /* ----------------------------------------------------------------
2112 * eliminate and label with i_face_weight[ ] = -1
2113 * "boundary faces of agglomerated elements";
2114 * those faces will be preferred for the next coarse spaces
2115 * in case multiple coarse spaces are to be built;
2116 * ---------------------------------------------------------------*/
2117
2118 for (k = i_AE_element[AE_counter]; k < AE_element_counter; k++)
2119 {
2120 for (j = i_element_face[j_AE_element[k]];
2121 j < i_element_face[j_AE_element[k]+1]; j++)
2122 {
2123 if (i_face_weight[j_element_face[j]] > 0)
2124 {
2125 weight = i_face_weight[j_element_face[j]];
2126 last = previous[tail];
2127 if (last == head)
2128 weight_max = 0;
2129 else
2130 weight_max = i_face_weight[last];
2131
2132
2133 ierr = hypre_remove_entry(weight, &weight_max,
2134 previous, next, first, &last,
2135 head, tail,
2136 j_element_face[j]);
2137
2138 i_face_weight[j_element_face[j]] = -1;
2139
2140 }
2141 }
2142 }
2143
2144 if (AE_element_counter > i_AE_element[AE_counter])
2145 {
2146 /* hypre_printf("completing agglomerated element: %d\n",
2147 AE_counter); */
2148 AE_counter++;
2149 }
2150
2151 i_AE_element[AE_counter] = AE_element_counter;
2152
2153 /* find a face with maximal weight: ---------------------------*/
2154
2155 last = previous[tail];
2156 if (last == head) goto end_agglomerate;
2157
2158 weight_max = i_face_weight[last];
2159
2160 /* hypre_printf("global search: ======================================\n"); */
2161
2162 face_max_weight = -1;
2163
2164 k = last;
2165 while (k != head)
2166 {
2167 if (i_face_to_prefer_weight[k] > -1)
2168 face_max_weight = k;
2169
2170 if (face_max_weight > -1)
2171 {
2172 max_weight = i_face_weight[face_max_weight];
2173 l = face_max_weight;
2174
2175 while (previous[l] != head)
2176 {
2177
2178 if (i_face_weight[previous[l]] < max_weight)
2179 break;
2180 else
2181 if (i_face_to_prefer_weight[previous[l]] >
2182 i_face_to_prefer_weight[face_max_weight])
2183 {
2184 l = previous[l];
2185 face_max_weight = l;
2186 }
2187 else
2188 l = previous[l];
2189 }
2190
2191 break;
2192 }
2193
2194 l =previous[k];
2195 /* remove face k: ---------------------------------------*/
2196
2197 weight = i_face_weight[k];
2198 last = previous[tail];
2199 if (last == head)
2200 weight_max = 0;
2201 else
2202 weight_max = i_face_weight[last];
2203
2204
2205 ierr = hypre_remove_entry(weight, &weight_max,
2206 previous, next, first, &last,
2207 head, tail,
2208 k);
2209
2210 /* i_face_weight[k] = -1; */
2211
2212 k=l;
2213 }
2214
2215 if (face_max_weight == -1) goto end_agglomerate;
2216
2217 max_weight = i_face_weight[face_max_weight];
2218
2219 face_local_max_weight = face_max_weight;
2220
2221 goto eliminate_face;
2222
2223 end_agglomerate:
2224
2225 /* eliminate isolated elements: ----------------------------------*/
2226
2227 for (i=0; i<num_elements; i++)
2228 {
2229
2230 if (i_element_to_AE[i] == -1)
2231 {
2232 for (j=i_element_face[i]; j < i_element_face[i+1]
2233 && i_element_to_AE[i] == -1; j++)
2234 if (i_face_to_prefer_weight[j_element_face[j]] > -1)
2235 for (k=i_face_element[j_element_face[j]];
2236 k<i_face_element[j_element_face[j]+1]
2237 && i_element_to_AE[i] == -1; k++)
2238 if (i_element_to_AE[j_face_element[k]] != -1)
2239 i_element_to_AE[i] = i_element_to_AE[j_face_element[k]];
2240 }
2241
2242 /*
2243 if (i_element_to_AE[i] == -1)
2244 {
2245 i_element_face_counter = 0;
2246 for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
2247 if (i_face_to_prefer_weight[j_element_face[j]] > -1)
2248 i_element_face_counter++;
2249
2250 if (i_element_face_counter == 1)
2251 {
2252 for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
2253 if (i_face_to_prefer_weight[j_element_face[j]] > -1)
2254 for (k=i_face_element[j_element_face[j]];
2255 k<i_face_element[j_element_face[j]+1]; k++)
2256 if (i_element_to_AE[j_face_element[k]] != -1)
2257 i_element_to_AE[i] = i_element_to_AE[j_face_element[k]];
2258 }
2259 }
2260 */
2261
2262 if (i_element_to_AE[i] == -1)
2263 {
2264 i_element_to_AE[i] = AE_counter;
2265 AE_counter++;
2266 }
2267 }
2268
2269 num_AEs_pointer[0] = AE_counter;
2270
2271 /* compute adjoint graph: -------------------------------------------*/
2272
2273 for (i=0; i < AE_counter; i++)
2274 i_AE_element[i] = 0;
2275
2276 for (i=0; i < num_elements; i++)
2277 i_AE_element[i_element_to_AE[i]]++;
2278
2279 i_AE_element[AE_counter] = num_elements;
2280
2281 for (i=AE_counter-1; i > -1; i--)
2282 i_AE_element[i] = i_AE_element[i+1] - i_AE_element[i];
2283
2284 for (i=0; i < num_elements; i++)
2285 {
2286 j_AE_element[i_AE_element[i_element_to_AE[i]]] = i;
2287 i_AE_element[i_element_to_AE[i]]++;
2288 }
2289
2290 for (i=AE_counter-1; i > -1; i--)
2291 i_AE_element[i+1] = i_AE_element[i];
2292
2293 i_AE_element[0] = 0;
2294
2295 /*--------------------------------------------------------------------*/
2296 for (i=0; i < num_faces; i++)
2297 if (i_face_to_prefer_weight[i] == -1) i_face_weight[i] = -1;
2298
2299
2300 hypre_TFree(i_element_to_AE, HYPRE_MEMORY_HOST);
2301
2302 hypre_TFree(previous, HYPRE_MEMORY_HOST);
2303 hypre_TFree(next, HYPRE_MEMORY_HOST);
2304 hypre_TFree(first, HYPRE_MEMORY_HOST);
2305
2306 return ierr;
2307 }
2308
hypre_update_entry(HYPRE_Int weight,HYPRE_Int * weight_max,HYPRE_Int * previous,HYPRE_Int * next,HYPRE_Int * first,HYPRE_Int * last,HYPRE_Int head,HYPRE_Int tail,HYPRE_Int i)2309 HYPRE_Int hypre_update_entry(HYPRE_Int weight, HYPRE_Int *weight_max,
2310 HYPRE_Int *previous, HYPRE_Int *next, HYPRE_Int *first, HYPRE_Int *last,
2311 HYPRE_Int head, HYPRE_Int tail,
2312 HYPRE_Int i)
2313
2314 {
2315 HYPRE_Int weight0;
2316
2317 if (previous[i] != head) next[previous[i]] = next[i];
2318 previous[next[i]] = previous[i];
2319
2320 if (first[weight] == tail)
2321 {
2322 if (weight <= weight_max[0])
2323 {
2324 hypre_printf("ERROR IN UPDATE_ENTRY: ===================\n");
2325 hypre_printf("weight: %d, weight_max: %d\n",
2326 weight, weight_max[0]);
2327 return -1;
2328 }
2329 for (weight0=weight_max[0]+1; weight0 <= weight; weight0++)
2330 {
2331 first[weight0] = i;
2332 /* hypre_printf("create first[%d] = %d\n", weight0, i); */
2333 }
2334
2335 previous[i] = previous[tail];
2336 next[i] = tail;
2337 if (previous[tail] > head)
2338 next[previous[tail]] = i;
2339 previous[tail] = i;
2340
2341 }
2342 else
2343 /* first[weight] already exists: =====================*/
2344 {
2345 previous[i] = previous[first[weight]];
2346 next[i] = first[weight];
2347
2348 if (previous[first[weight]] != head)
2349 next[previous[first[weight]]] = i;
2350
2351 previous[first[weight]] = i;
2352
2353 for (weight0=1; weight0 <= weight; weight0++)
2354 if (first[weight0] == first[weight])
2355 first[weight0] = i;
2356
2357 }
2358
2359 return 0;
2360 }
2361
hypre_remove_entry(HYPRE_Int weight,HYPRE_Int * weight_max,HYPRE_Int * previous,HYPRE_Int * next,HYPRE_Int * first,HYPRE_Int * last,HYPRE_Int head,HYPRE_Int tail,HYPRE_Int i)2362 HYPRE_Int hypre_remove_entry(HYPRE_Int weight, HYPRE_Int *weight_max,
2363 HYPRE_Int *previous, HYPRE_Int *next, HYPRE_Int *first, HYPRE_Int *last,
2364 HYPRE_Int head, HYPRE_Int tail,
2365 HYPRE_Int i)
2366 {
2367 HYPRE_Int weight0;
2368
2369 if (previous[i] != head) next[previous[i]] = next[i];
2370 previous[next[i]] = previous[i];
2371
2372 for (weight0=1; weight0 <= weight_max[0]; weight0++)
2373 {
2374 /* hypre_printf("first[%d}: %d\n", weight0, first[weight0]); */
2375 if (first[weight0] == i)
2376 {
2377 first[weight0] = next[i];
2378 /* hypre_printf("shift: first[%d]= %d to %d\n",
2379 weight0, i, next[i]);
2380 if (i == last[0])
2381 hypre_printf("i= last[0]: %d\n", i); */
2382 }
2383 }
2384
2385 next[i] = i;
2386 previous[i] = i;
2387
2388 return 0;
2389 }
2390
hypre_move_entry(HYPRE_Int weight,HYPRE_Int * weight_max,HYPRE_Int * previous,HYPRE_Int * next,HYPRE_Int * first,HYPRE_Int * last,HYPRE_Int head,HYPRE_Int tail,HYPRE_Int i)2391 HYPRE_Int hypre_move_entry(HYPRE_Int weight, HYPRE_Int *weight_max,
2392 HYPRE_Int *previous, HYPRE_Int *next, HYPRE_Int *first, HYPRE_Int *last,
2393 HYPRE_Int head, HYPRE_Int tail,
2394 HYPRE_Int i)
2395 {
2396 HYPRE_Int weight0;
2397
2398 if (previous[i] != head) next[previous[i]] = next[i];
2399 previous[next[i]] = previous[i];
2400
2401 for (weight0=1; weight0 <= weight_max[0]; weight0++)
2402 {
2403 if (first[weight0] == i)
2404 first[weight0] = next[i];
2405 }
2406
2407 return 0;
2408 }
2409
2410 /*---------------------------------------------------------------------
2411 hypre_matinv: X <-- A**(-1) ; A IS POSITIVE DEFINITE (non--symmetric);
2412 ---------------------------------------------------------------------*/
2413
hypre_matinv(HYPRE_Real * x,HYPRE_Real * a,HYPRE_Int k)2414 HYPRE_Int hypre_matinv(HYPRE_Real *x, HYPRE_Real *a, HYPRE_Int k)
2415 {
2416 HYPRE_Int i,j,l, ierr =0;
2417
2418 for (i=0; i < k; i++)
2419 {
2420 if (a[i+i*k] <= 0.e0)
2421 {
2422 if (i < k-1)
2423 {
2424 /*********
2425 hypre_printf("indefinite singular matrix in *** matinv ***:\n");
2426 hypre_printf("i:%d; diagonal entry: %e\n", i, a[i+k*i]);
2427 */
2428 ierr = -1;
2429 }
2430
2431 a[i+i*k] = 0.e0;
2432 }
2433 else
2434 a[i+k*i] = 1.0 / a[i+i*k];
2435
2436 for (j=1; j < k-i; j++)
2437 {
2438 for (l=1; l < k-i; l++)
2439 {
2440 a[i+l+k*(i+j)] -= a[i+l+k*i] * a[i+k*i] * a[i+k*(i+j)];
2441 }
2442 }
2443
2444 for (j=1; j < k-i; j++)
2445 {
2446 a[i+j+k*i] = a[i+j+k*i] * a[i+k*i];
2447 a[i+k*(i+j)] = a[i+k*(i+j)] * a[i+k*i];
2448 }
2449 }
2450
2451 /* FULL INVERSION: --------------------------------------------*/
2452
2453 x[k*k-1] = a[k*k-1];
2454 for (i=k-1; i > -1; i--)
2455 {
2456 for (j=1; j < k-i; j++)
2457 {
2458 x[i+j+k*i] =0;
2459 x[i+k*(i+j)] =0;
2460
2461 for (l=1; l< k-i; l++)
2462 {
2463 x[i+j+k*i] -= x[i+j+k*(i+l)] * a[i+l+k*i];
2464 x[i+k*(i+j)] -= a[i+k*(i+l)] * x[i+l+k*(i+j)];
2465 }
2466 }
2467
2468 x[i+k*i] = a[i+k*i];
2469 for (j=1; j<k-i; j++)
2470 {
2471 x[i+k*i] -= x[i+k*(i+j)] * a[i+j+k*i];
2472 }
2473 }
2474
2475 return ierr;
2476 }
2477
2478 HYPRE_Int
hypre_parCorrRes(hypre_ParCSRMatrix * A,hypre_ParVector * x,hypre_Vector * rhs,hypre_Vector ** tmp_ptr)2479 hypre_parCorrRes( hypre_ParCSRMatrix *A,
2480 hypre_ParVector *x,
2481 hypre_Vector *rhs,
2482 hypre_Vector **tmp_ptr )
2483 {
2484 HYPRE_Int i, j, index, start;
2485 HYPRE_Int num_sends, num_cols_offd;
2486 HYPRE_Int local_size;
2487 HYPRE_Real *x_buf_data, *x_tmp_data, *x_local_data;
2488
2489 hypre_ParCSRCommPkg *comm_pkg;
2490 hypre_CSRMatrix *offd;
2491 hypre_Vector *x_local, *x_tmp, *tmp_vector;
2492 hypre_ParCSRCommHandle *comm_handle;
2493
2494 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2495 offd = hypre_ParCSRMatrixOffd(A);
2496 num_cols_offd = hypre_CSRMatrixNumCols(offd);
2497
2498 x_local = hypre_ParVectorLocalVector(x);
2499 x_local_data = hypre_VectorData(x_local);
2500 local_size = hypre_VectorSize(x_local);
2501
2502 if (num_cols_offd)
2503 {
2504 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2505 x_buf_data = hypre_CTAlloc(HYPRE_Real,
2506 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
2507
2508 index = 0;
2509 for (i = 0; i < num_sends; i++)
2510 {
2511 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
2512 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
2513 x_buf_data[index++]
2514 = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2515 }
2516
2517 x_tmp = hypre_SeqVectorCreate(num_cols_offd);
2518 hypre_SeqVectorInitialize(x_tmp);
2519 x_tmp_data = hypre_VectorData(x_tmp);
2520
2521 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, x_buf_data,
2522 x_tmp_data);
2523
2524 tmp_vector = hypre_SeqVectorCreate(local_size);
2525 hypre_VectorMemoryLocation(tmp_vector) = HYPRE_MEMORY_DEVICE;
2526 hypre_SeqVectorInitialize(tmp_vector);
2527 hypre_SeqVectorCopy(rhs,tmp_vector);
2528
2529 hypre_ParCSRCommHandleDestroy(comm_handle);
2530 comm_handle = NULL;
2531
2532 hypre_CSRMatrixMatvec(-1.0, offd, x_tmp, 1.0, tmp_vector);
2533
2534 hypre_SeqVectorDestroy(x_tmp);
2535 hypre_TFree(x_buf_data, HYPRE_MEMORY_HOST);
2536 }
2537 else
2538 {
2539 tmp_vector = hypre_SeqVectorCreate(local_size);
2540 hypre_VectorMemoryLocation(tmp_vector) = HYPRE_MEMORY_DEVICE;
2541 hypre_SeqVectorInitialize(tmp_vector);
2542 hypre_SeqVectorCopy(rhs,tmp_vector);
2543 }
2544
2545 *tmp_ptr = tmp_vector;
2546
2547 return 0;
2548 }
2549
2550
hypre_AdSchwarzSolve(hypre_ParCSRMatrix * par_A,hypre_ParVector * par_rhs,hypre_CSRMatrix * domain_structure,HYPRE_Real * scale,hypre_ParVector * par_x,hypre_ParVector * par_aux,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)2551 HYPRE_Int hypre_AdSchwarzSolve(hypre_ParCSRMatrix *par_A,
2552 hypre_ParVector *par_rhs,
2553 hypre_CSRMatrix *domain_structure,
2554 HYPRE_Real *scale,
2555 hypre_ParVector *par_x,
2556 hypre_ParVector *par_aux, HYPRE_Int *pivots,
2557 HYPRE_Int use_nonsymm)
2558 {
2559 HYPRE_Int ierr = 0;
2560 HYPRE_Real *x;
2561 HYPRE_Real *aux;
2562 HYPRE_Real *tmp;
2563 hypre_Vector *x_vector;
2564 hypre_Vector *aux_vector;
2565 MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
2566 HYPRE_Int num_domains;
2567 HYPRE_Int max_domain_size;
2568 HYPRE_Int *i_domain_dof;
2569 HYPRE_Int *j_domain_dof;
2570 HYPRE_Real *domain_matrixinverse;
2571
2572 HYPRE_Int piv_counter = 0;
2573 HYPRE_Int one = 1;
2574 char uplo = 'L';
2575
2576 HYPRE_Int jj,i,j; /*, j_loc, k_loc;*/
2577
2578
2579 HYPRE_Int matrix_size, matrix_size_counter = 0;
2580
2581 HYPRE_Int num_procs;
2582
2583 hypre_MPI_Comm_size(comm,&num_procs);
2584
2585 /* initiate: ----------------------------------------------- */
2586 x_vector = hypre_ParVectorLocalVector(par_x);
2587 aux_vector = hypre_ParVectorLocalVector(par_aux);
2588 x = hypre_VectorData(x_vector);
2589 aux = hypre_VectorData(aux_vector);
2590 num_domains = hypre_CSRMatrixNumRows(domain_structure);
2591 max_domain_size = hypre_CSRMatrixNumCols(domain_structure);
2592 i_domain_dof = hypre_CSRMatrixI(domain_structure);
2593 j_domain_dof = hypre_CSRMatrixJ(domain_structure);
2594 domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
2595
2596 if (use_nonsymm)
2597 uplo = 'N';
2598
2599 hypre_ParVectorCopy(par_rhs,par_aux);
2600 hypre_ParCSRMatrixMatvec(-1.0,par_A,par_x,1.0,par_aux);
2601 tmp = hypre_CTAlloc(HYPRE_Real, max_domain_size, HYPRE_MEMORY_HOST);
2602
2603 /* forward solve: ----------------------------------------------- */
2604
2605 matrix_size_counter = 0;
2606 for (i=0; i < num_domains; i++)
2607 {
2608 matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
2609
2610 /* compute residual: ---------------------------------------- */
2611
2612 jj = 0;
2613 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2614 {
2615 tmp[jj] = aux[j_domain_dof[j]];
2616 jj++;
2617 }
2618 /* solve for correction: ------------------------------------- */
2619 if (use_nonsymm)
2620 {
2621 hypre_dgetrs(&uplo, &matrix_size, &one,
2622 &domain_matrixinverse[matrix_size_counter],
2623 &matrix_size, &pivots[piv_counter], tmp,
2624 &matrix_size, &ierr);
2625 }
2626 else
2627 {
2628 hypre_dpotrs(&uplo, &matrix_size, &one,
2629 &domain_matrixinverse[matrix_size_counter],
2630 &matrix_size, tmp,
2631 &matrix_size, &ierr);
2632 }
2633
2634 if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
2635 jj = 0;
2636 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2637 {
2638 x[j_domain_dof[j]]+= scale[j_domain_dof[j]]*tmp[jj++];
2639 }
2640 matrix_size_counter += matrix_size * matrix_size;
2641 piv_counter += matrix_size;
2642
2643 }
2644
2645 hypre_TFree(tmp, HYPRE_MEMORY_HOST);
2646
2647 return hypre_error_flag;
2648 }
2649
hypre_AdSchwarzCFSolve(hypre_ParCSRMatrix * par_A,hypre_ParVector * par_rhs,hypre_CSRMatrix * domain_structure,HYPRE_Real * scale,hypre_ParVector * par_x,hypre_ParVector * par_aux,HYPRE_Int * CF_marker,HYPRE_Int rlx_pt,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)2650 HYPRE_Int hypre_AdSchwarzCFSolve(hypre_ParCSRMatrix *par_A,
2651 hypre_ParVector *par_rhs,
2652 hypre_CSRMatrix *domain_structure,
2653 HYPRE_Real *scale,
2654 hypre_ParVector *par_x,
2655 hypre_ParVector *par_aux,
2656 HYPRE_Int *CF_marker,
2657 HYPRE_Int rlx_pt, HYPRE_Int *pivots,
2658 HYPRE_Int use_nonsymm)
2659 {
2660 HYPRE_Int ierr = 0;
2661 HYPRE_Real *x;
2662 HYPRE_Real *aux;
2663 HYPRE_Real *tmp;
2664 hypre_Vector *x_vector;
2665 hypre_Vector *aux_vector;
2666 MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
2667 HYPRE_Int num_domains;
2668 HYPRE_Int max_domain_size;
2669 HYPRE_Int *i_domain_dof;
2670 HYPRE_Int *j_domain_dof;
2671 HYPRE_Real *domain_matrixinverse;
2672
2673 HYPRE_Int piv_counter = 0;
2674 HYPRE_Int one = 1;
2675
2676 char uplo = 'L';
2677 HYPRE_Int jj,i,j; /*, j_loc, k_loc;*/
2678
2679 HYPRE_Int matrix_size, matrix_size_counter = 0;
2680
2681 HYPRE_Int num_procs;
2682
2683 hypre_MPI_Comm_size(comm,&num_procs);
2684
2685 /* initiate: ----------------------------------------------- */
2686 x_vector = hypre_ParVectorLocalVector(par_x);
2687 aux_vector = hypre_ParVectorLocalVector(par_aux);
2688 x = hypre_VectorData(x_vector);
2689 aux = hypre_VectorData(aux_vector);
2690 num_domains = hypre_CSRMatrixNumRows(domain_structure);
2691 max_domain_size = hypre_CSRMatrixNumCols(domain_structure);
2692 i_domain_dof = hypre_CSRMatrixI(domain_structure);
2693 j_domain_dof = hypre_CSRMatrixJ(domain_structure);
2694 domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
2695
2696 if (use_nonsymm)
2697 uplo = 'N';
2698
2699 hypre_ParVectorCopy(par_rhs,par_aux);
2700 hypre_ParCSRMatrixMatvec(-1.0,par_A,par_x,1.0,par_aux);
2701 tmp = hypre_CTAlloc(HYPRE_Real, max_domain_size, HYPRE_MEMORY_HOST);
2702
2703 /* forward solve: ----------------------------------------------- */
2704
2705 matrix_size_counter = 0;
2706 for (i=0; i < num_domains; i++)
2707 {
2708 if (CF_marker[i] == rlx_pt)
2709 {
2710 matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
2711
2712 /* compute residual: ---------------------------------------- */
2713
2714 jj = 0;
2715 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2716 {
2717 tmp[jj] = aux[j_domain_dof[j]];
2718 jj++;
2719 }
2720 /* solve for correction: ------------------------------------- */
2721 if (use_nonsymm)
2722 {
2723 hypre_dgetrs(&uplo, &matrix_size, &one,
2724 &domain_matrixinverse[matrix_size_counter],
2725 &matrix_size, &pivots[piv_counter], tmp,
2726 &matrix_size, &ierr);
2727 }
2728
2729 else
2730 {
2731 hypre_dpotrs(&uplo, &matrix_size, &one,
2732 &domain_matrixinverse[matrix_size_counter],
2733 &matrix_size, tmp,
2734 &matrix_size, &ierr);
2735 }
2736
2737 if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
2738 jj = 0;
2739 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2740 {
2741 x[j_domain_dof[j]]+= scale[j_domain_dof[j]]*tmp[jj++];
2742 }
2743 matrix_size_counter += matrix_size * matrix_size;
2744 piv_counter += matrix_size;
2745 }
2746 }
2747
2748 hypre_TFree(tmp, HYPRE_MEMORY_HOST);
2749
2750 return hypre_error_flag;
2751 }
2752
2753 HYPRE_Int
hypre_GenerateScale(hypre_CSRMatrix * domain_structure,HYPRE_Int num_variables,HYPRE_Real relaxation_weight,HYPRE_Real ** scale_pointer)2754 hypre_GenerateScale(hypre_CSRMatrix *domain_structure,
2755 HYPRE_Int num_variables,
2756 HYPRE_Real relaxation_weight,
2757 HYPRE_Real **scale_pointer)
2758 {
2759 HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
2760 HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
2761 HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
2762 HYPRE_Int i, j;
2763 HYPRE_Real *scale;
2764
2765 scale = hypre_CTAlloc(HYPRE_Real, num_variables, HYPRE_MEMORY_HOST);
2766
2767 for (i=0; i < num_domains; i++)
2768 for (j = i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2769 scale[j_domain_dof[j]] += 1.0;
2770
2771 for (i=0; i < num_variables; i++)
2772 scale[i] = relaxation_weight/scale[i];
2773
2774 *scale_pointer = scale;
2775
2776 return hypre_error_flag;
2777 }
2778
hypre_ParAdSchwarzSolve(hypre_ParCSRMatrix * A,hypre_ParVector * F,hypre_CSRMatrix * domain_structure,HYPRE_Real * scale,hypre_ParVector * X,hypre_ParVector * Vtemp,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)2779 HYPRE_Int hypre_ParAdSchwarzSolve(hypre_ParCSRMatrix *A,
2780 hypre_ParVector *F,
2781 hypre_CSRMatrix *domain_structure,
2782 HYPRE_Real *scale,
2783 hypre_ParVector *X,
2784 hypre_ParVector *Vtemp,
2785 HYPRE_Int *pivots,
2786 HYPRE_Int use_nonsymm)
2787 {
2788 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2789 HYPRE_Int num_sends = 0;
2790 HYPRE_Int *send_map_starts;
2791 HYPRE_Int *send_map_elmts;
2792
2793 hypre_ParCSRCommHandle *comm_handle;
2794
2795 HYPRE_Int ierr = 0;
2796 HYPRE_Real *x_data;
2797 HYPRE_Real *x_ext_data;
2798 HYPRE_Real *aux;
2799 HYPRE_Real *vtemp_data;
2800 HYPRE_Real *vtemp_ext_data;
2801 HYPRE_Int num_domains, max_domain_size;
2802 HYPRE_Int *i_domain_dof;
2803 HYPRE_Int *j_domain_dof;
2804 HYPRE_Real *domain_matrixinverse;
2805 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2806 HYPRE_Int num_variables;
2807 HYPRE_Int num_cols_offd;
2808 HYPRE_Real *scale_ext;
2809 HYPRE_Real *buf_data;
2810 HYPRE_Int index;
2811
2812 HYPRE_Int piv_counter = 0;
2813 HYPRE_Int one = 1;
2814
2815 char uplo = 'L';
2816 HYPRE_Int jj,i,j, j_loc; /*, j_loc, k_loc;*/
2817
2818 HYPRE_Int matrix_size, matrix_size_counter = 0;
2819
2820 /* initiate: ----------------------------------------------- */
2821 num_variables = hypre_CSRMatrixNumRows(A_diag);
2822 num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
2823 x_data = hypre_VectorData(hypre_ParVectorLocalVector(X));
2824 vtemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Vtemp));
2825
2826 if (use_nonsymm)
2827 uplo = 'N';
2828
2829 hypre_ParVectorCopy(F,Vtemp);
2830 hypre_ParCSRMatrixMatvec(-1.0,A,X,1.0,Vtemp);
2831
2832 /* forward solve: ----------------------------------------------- */
2833
2834 num_domains = hypre_CSRMatrixNumRows(domain_structure);
2835 max_domain_size = hypre_CSRMatrixNumCols(domain_structure);
2836 i_domain_dof = hypre_CSRMatrixI(domain_structure);
2837 j_domain_dof = hypre_CSRMatrixJ(domain_structure);
2838 domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
2839 aux = hypre_CTAlloc(HYPRE_Real, max_domain_size, HYPRE_MEMORY_HOST);
2840
2841 if (comm_pkg)
2842 {
2843 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2844 send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
2845 send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
2846
2847 buf_data = hypre_CTAlloc(HYPRE_Real, send_map_starts[num_sends], HYPRE_MEMORY_HOST);
2848 x_ext_data = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
2849 vtemp_ext_data = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
2850 scale_ext = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
2851
2852 index = 0;
2853 for (i=0; i < num_sends; i++)
2854 {
2855 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
2856 buf_data[index++] = vtemp_data[send_map_elmts[j]];
2857 }
2858
2859 comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,
2860 vtemp_ext_data);
2861 hypre_ParCSRCommHandleDestroy(comm_handle);
2862 comm_handle = NULL;
2863
2864 index = 0;
2865 for (i=0; i < num_sends; i++)
2866 {
2867 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
2868 buf_data[index++] = scale[send_map_elmts[j]];
2869 }
2870
2871 comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,scale_ext);
2872 hypre_ParCSRCommHandleDestroy(comm_handle);
2873 comm_handle = NULL;
2874 }
2875
2876 for (i=0; i < num_cols_offd; i++)
2877 x_ext_data[i] = 0.0;
2878
2879 matrix_size_counter = 0;
2880 for (i=0; i < num_domains; i++)
2881 {
2882 matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
2883
2884 /* copy data contiguously into aux --------------------------- */
2885
2886 jj = 0;
2887 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2888 {
2889 j_loc = j_domain_dof[j];
2890 if (j_loc < num_variables)
2891 aux[jj] = vtemp_data[j_loc];
2892 else
2893 aux[jj] = vtemp_ext_data[j_loc-num_variables];
2894 jj++;
2895 }
2896 /* solve for correction: ------------------------------------- */
2897 if (use_nonsymm)
2898 {
2899 hypre_dgetrs(&uplo, &matrix_size, &one,
2900 &domain_matrixinverse[matrix_size_counter],
2901 &matrix_size, &pivots[piv_counter], aux,
2902 &matrix_size, &ierr);
2903 }
2904 else
2905 {
2906 hypre_dpotrs(&uplo, &matrix_size, &one,
2907 &domain_matrixinverse[matrix_size_counter],
2908 &matrix_size, aux,
2909 &matrix_size, &ierr);
2910 }
2911
2912 if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
2913 jj = 0;
2914 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2915 {
2916 j_loc = j_domain_dof[j];
2917 if (j_loc < num_variables)
2918 x_data[j_loc]+= scale[j_loc] * aux[jj++];
2919 else
2920 {
2921 j_loc -= num_variables;
2922 x_ext_data[j_loc] += scale_ext[j_loc] * aux[jj++];
2923 }
2924 }
2925 matrix_size_counter += matrix_size * matrix_size;
2926 piv_counter += matrix_size;
2927 }
2928
2929 if (comm_pkg)
2930 {
2931 comm_handle=hypre_ParCSRCommHandleCreate (2,comm_pkg,x_ext_data,buf_data);
2932
2933 hypre_ParCSRCommHandleDestroy(comm_handle);
2934 comm_handle = NULL;
2935
2936 index = 0;
2937 for (i=0; i < num_sends; i++)
2938 {
2939 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
2940 x_data[send_map_elmts[j]] += buf_data[index++];
2941 }
2942
2943 hypre_TFree(buf_data, HYPRE_MEMORY_HOST);
2944 hypre_TFree(x_ext_data, HYPRE_MEMORY_HOST);
2945 hypre_TFree(vtemp_ext_data, HYPRE_MEMORY_HOST);
2946 hypre_TFree(scale_ext, HYPRE_MEMORY_HOST);
2947 }
2948 hypre_TFree(aux, HYPRE_MEMORY_HOST);
2949
2950 return hypre_error_flag;
2951 }
2952
2953 /*--------------------------------------------------------------------------
2954 * hypre_ParAMGCreateDomainDof:
2955 *--------------------------------------------------------------------------*/
2956
2957 /*****************************************************************************
2958 *
2959 * Routine for constructing graph domain_dof with minimal overlap
2960 * and computing the respective matrix inverses to be
2961 * used in an overlapping additive Schwarz procedure (smoother
2962 * in AMG);
2963 *
2964 *****************************************************************************/
2965 HYPRE_Int
hypre_ParAMGCreateDomainDof(hypre_ParCSRMatrix * A,HYPRE_Int domain_type,HYPRE_Int overlap,HYPRE_Int num_functions,HYPRE_Int * dof_func,hypre_CSRMatrix ** domain_structure_pointer,HYPRE_Int ** piv_pointer,HYPRE_Int use_nonsymm)2966 hypre_ParAMGCreateDomainDof(hypre_ParCSRMatrix *A,
2967 HYPRE_Int domain_type, HYPRE_Int overlap,
2968 HYPRE_Int num_functions, HYPRE_Int *dof_func,
2969 hypre_CSRMatrix **domain_structure_pointer,
2970 HYPRE_Int **piv_pointer, HYPRE_Int use_nonsymm)
2971
2972 {
2973 hypre_CSRMatrix *domain_structure = NULL;
2974 HYPRE_Int *i_domain_dof, *j_domain_dof;
2975 HYPRE_Real *domain_matrixinverse;
2976 HYPRE_Int num_domains;
2977
2978 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2979 HYPRE_Int *a_diag_i = hypre_CSRMatrixI(A_diag);
2980 HYPRE_Int *a_diag_j = hypre_CSRMatrixJ(A_diag);
2981 HYPRE_Real *a_diag_data = hypre_CSRMatrixData(A_diag);
2982 HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag);
2983 HYPRE_BigInt first_col_diag = hypre_ParCSRMatrixFirstColDiag(A);
2984 HYPRE_BigInt col_0 = first_col_diag -1 ;
2985 HYPRE_BigInt col_n = first_col_diag + (HYPRE_BigInt)num_variables;
2986
2987 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
2988 HYPRE_Int *a_offd_i = hypre_CSRMatrixI(A_offd);
2989 HYPRE_Int *a_offd_j = hypre_CSRMatrixJ(A_offd);
2990 HYPRE_Real *a_offd_data = hypre_CSRMatrixData(A_offd);
2991 HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
2992 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
2993
2994 hypre_CSRMatrix *A_ext;
2995 HYPRE_Int *a_ext_i;
2996 HYPRE_BigInt *a_ext_j;
2997 HYPRE_Real *a_ext_data;
2998
2999 /* HYPRE_Int *i_dof_to_accept_weight; */
3000 HYPRE_Int *i_dof_to_prefer_weight,
3001 *w_dof_dof, *i_dof_weight;
3002 HYPRE_Int *i_dof_to_aggregate, *i_aggregate_dof, *j_aggregate_dof;
3003
3004 HYPRE_Int *i_dof_index;
3005 HYPRE_Int *i_dof_index_offd;
3006 HYPRE_Int *i_proc;
3007 /* HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A);*/
3008 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3009 HYPRE_Int num_recvs = 0;
3010 HYPRE_Int *recv_vec_starts = NULL;
3011
3012 HYPRE_Int ierr = 0;
3013 HYPRE_Int i,j,k, l_loc, i_loc, j_loc;
3014 HYPRE_Int i_dof;
3015 HYPRE_Int nf;
3016 HYPRE_Int *i_local_to_global;
3017 HYPRE_Int *i_global_to_local;
3018
3019 HYPRE_Int local_dof_counter, max_local_dof_counter=0;
3020
3021 HYPRE_Int domain_dof_counter = 0, domain_matrixinverse_counter = 0;
3022
3023 HYPRE_Real *AE;
3024
3025
3026 HYPRE_Int *ipiv;
3027 char uplo = 'L';
3028 HYPRE_Int piv_counter;
3029 HYPRE_Int *piv = NULL;
3030
3031 HYPRE_Int cnt, indx;
3032 HYPRE_Int num_procs, my_id;
3033
3034 if (num_variables == 0)
3035 {
3036 *domain_structure_pointer = domain_structure;
3037
3038 *piv_pointer = piv;
3039
3040 return hypre_error_flag;
3041 }
3042
3043 hypre_MPI_Comm_size(hypre_ParCSRMatrixComm(A),&num_procs);
3044 hypre_MPI_Comm_size(hypre_ParCSRMatrixComm(A),&my_id);
3045
3046 /* --------------------------------------------------------------------- */
3047
3048 /*=======================================================================*/
3049 /* create artificial domains by agglomeration; */
3050 /*=======================================================================*/
3051
3052 /*hypre_printf("----------- create artificials domain by agglomeration; ======\n");
3053 */
3054 i_aggregate_dof = hypre_CTAlloc(HYPRE_Int, num_variables+1, HYPRE_MEMORY_HOST);
3055 j_aggregate_dof = hypre_CTAlloc(HYPRE_Int, num_variables, HYPRE_MEMORY_HOST);
3056
3057 if (domain_type == 2)
3058 {
3059 i_dof_to_prefer_weight = hypre_CTAlloc(HYPRE_Int, num_variables, HYPRE_MEMORY_HOST);
3060 w_dof_dof = hypre_CTAlloc(HYPRE_Int, a_diag_i[num_variables], HYPRE_MEMORY_HOST);
3061
3062 for (i=0; i<num_variables; i++)
3063 for (j=a_diag_i[i]; j< a_diag_i[i+1]; j++)
3064 {
3065 if (a_diag_j[j] == i)
3066 w_dof_dof[j]=0;
3067 else
3068 w_dof_dof[j]=1;
3069 }
3070
3071 /*hypre_printf("end computing weights for agglomeration procedure: --------\n");
3072 */
3073
3074 i_dof_weight = hypre_CTAlloc(HYPRE_Int, num_variables, HYPRE_MEMORY_HOST);
3075 hypre_AMGeAgglomerate(i_aggregate_dof, j_aggregate_dof,
3076 a_diag_i, a_diag_j, w_dof_dof,
3077 a_diag_i, a_diag_j,
3078 a_diag_i, a_diag_j,
3079 i_dof_to_prefer_weight,
3080 i_dof_weight,
3081 num_variables, num_variables,
3082 &num_domains);
3083
3084 hypre_TFree(i_dof_to_prefer_weight, HYPRE_MEMORY_HOST);
3085 hypre_TFree(i_dof_weight, HYPRE_MEMORY_HOST);
3086 hypre_TFree(w_dof_dof, HYPRE_MEMORY_HOST);
3087 }
3088 else
3089 {
3090 nf = 1;
3091 if (domain_type == 1) nf = num_functions;
3092
3093 num_domains = num_variables/nf;
3094 for (i=0; i < num_domains+1; i++)
3095 {
3096 i_aggregate_dof[i] = nf*i;
3097 }
3098 for (i=0; i < num_variables; i++)
3099 {
3100 j_aggregate_dof[i] = i;
3101 }
3102 }
3103 /*hypre_printf("num_variables: %d, num_domains: %d\n", num_variables, num_domains);
3104 */
3105 if (overlap == 1)
3106 {
3107 i_domain_dof = hypre_CTAlloc(HYPRE_Int, num_domains+1, HYPRE_MEMORY_HOST);
3108
3109 i_dof_to_aggregate = hypre_CTAlloc(HYPRE_Int, num_variables, HYPRE_MEMORY_HOST);
3110 for (i=0; i < num_domains; i++)
3111 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3112 i_dof_to_aggregate[j_aggregate_dof[j]] = i;
3113
3114 i_proc = hypre_CTAlloc(HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST);
3115
3116 for (i=0; i < num_cols_offd; i++)
3117 i_proc[i] = 0;
3118
3119 if (comm_pkg)
3120 {
3121 num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
3122 recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
3123 }
3124 else if (num_procs > 1)
3125 {
3126
3127 hypre_MatvecCommPkgCreate(A);
3128
3129 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3130 num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
3131 recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
3132 }
3133
3134 for (i=0; i < num_recvs; i++)
3135 for (indx=recv_vec_starts[i]; indx < recv_vec_starts[i+1]; indx++)
3136 i_proc[indx] = i;
3137
3138 /* make domains from aggregates: *********************************/
3139
3140 i_dof_index = hypre_CTAlloc(HYPRE_Int, num_variables, HYPRE_MEMORY_HOST);
3141
3142 i_dof_index_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST);
3143
3144 for (i=0; i < num_variables; i++)
3145 i_dof_index[i] = -1;
3146
3147 for (i=0; i < num_cols_offd; i++)
3148 i_dof_index_offd[i] = -1;
3149
3150 domain_dof_counter=0;
3151 for (i=0; i < num_domains; i++)
3152 {
3153 i_domain_dof[i] = domain_dof_counter;
3154 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3155 {
3156 i_dof_index[j_aggregate_dof[j]]=-1;
3157 }
3158 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3159 {
3160 for (k=a_diag_i[j_aggregate_dof[j]];
3161 k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3162 if (i_dof_to_aggregate[a_diag_j[k]] >= i
3163 && i_dof_index[a_diag_j[k]]==-1)
3164 {
3165 i_dof_index[a_diag_j[k]]++;
3166 domain_dof_counter++;
3167 }
3168 for (k=a_offd_i[j_aggregate_dof[j]];
3169 k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3170 if (i_proc[a_offd_j[k]] > my_id
3171 && i_dof_index_offd[a_offd_j[k]]==-1)
3172 {
3173 i_dof_index_offd[a_offd_j[k]]++;
3174 domain_dof_counter++;
3175 }
3176 }
3177 }
3178
3179 for (i=0; i < num_variables; i++)
3180 i_dof_index[i] = -1;
3181
3182 for (i=0; i < num_cols_offd; i++)
3183 i_dof_index_offd[i] = -1;
3184
3185 i_domain_dof[num_domains] = domain_dof_counter;
3186 j_domain_dof = hypre_CTAlloc(HYPRE_Int, domain_dof_counter, HYPRE_MEMORY_HOST);
3187
3188 domain_dof_counter=0;
3189 for (i=0; i < num_domains; i++)
3190 {
3191 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3192 {
3193 i_dof_index[j_aggregate_dof[j]]=-1;
3194 }
3195 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3196 {
3197 for (k=a_diag_i[j_aggregate_dof[j]];
3198 k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3199 if (i_dof_to_aggregate[a_diag_j[k]] >= i
3200 && i_dof_index[a_diag_j[k]]==-1)
3201 {
3202 i_dof_index[a_diag_j[k]]++;
3203 j_domain_dof[domain_dof_counter] = a_diag_j[k];
3204 domain_dof_counter++;
3205 }
3206 for (k=a_offd_i[j_aggregate_dof[j]];
3207 k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3208 if (i_proc[a_offd_j[k]] > my_id
3209 && i_dof_index_offd[a_offd_j[k]]==-1)
3210 {
3211 i_dof_index_offd[a_offd_j[k]]++;
3212 j_domain_dof[domain_dof_counter] = a_offd_j[k]+num_variables;
3213 domain_dof_counter++;
3214 }
3215 }
3216 }
3217
3218 hypre_TFree(i_aggregate_dof, HYPRE_MEMORY_HOST);
3219 hypre_TFree(j_aggregate_dof, HYPRE_MEMORY_HOST);
3220 hypre_TFree(i_dof_to_aggregate, HYPRE_MEMORY_HOST);
3221 hypre_TFree(i_dof_index, HYPRE_MEMORY_HOST);
3222 hypre_TFree(i_dof_index_offd, HYPRE_MEMORY_HOST);
3223 hypre_TFree(i_proc, HYPRE_MEMORY_HOST);
3224 }
3225 else if (overlap == 2)
3226 {
3227 i_domain_dof = hypre_CTAlloc(HYPRE_Int, num_domains+1, HYPRE_MEMORY_HOST);
3228
3229 i_dof_to_aggregate = hypre_CTAlloc(HYPRE_Int, num_variables, HYPRE_MEMORY_HOST);
3230 for (i=0; i < num_domains; i++)
3231 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3232 i_dof_to_aggregate[j_aggregate_dof[j]] = i;
3233
3234 /* make domains from aggregates: *********************************/
3235
3236 i_dof_index = hypre_CTAlloc(HYPRE_Int, num_variables, HYPRE_MEMORY_HOST);
3237
3238 i_dof_index_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST);
3239
3240 for (i=0; i < num_variables; i++)
3241 i_dof_index[i] = -1;
3242
3243 for (i=0; i < num_cols_offd; i++)
3244 i_dof_index_offd[i] = -1;
3245
3246 domain_dof_counter=0;
3247 for (i=0; i < num_domains; i++)
3248 {
3249 i_domain_dof[i] = domain_dof_counter;
3250 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3251 {
3252 for (k=a_diag_i[j_aggregate_dof[j]];
3253 k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3254 if ( i_dof_index[a_diag_j[k]]==-1)
3255 {
3256 i_dof_index[a_diag_j[k]]++;
3257 domain_dof_counter++;
3258 }
3259 for (k=a_offd_i[j_aggregate_dof[j]];
3260 k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3261 if ( i_dof_index_offd[a_offd_j[k]]==-1)
3262 {
3263 i_dof_index_offd[a_offd_j[k]]++;
3264 domain_dof_counter++;
3265 }
3266 }
3267 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3268 {
3269 for (k=a_diag_i[j_aggregate_dof[j]];
3270 k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3271 i_dof_index[a_diag_j[k]]=-1;
3272 for (k=a_offd_i[j_aggregate_dof[j]];
3273 k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3274 i_dof_index_offd[a_offd_j[k]]=-1;
3275 }
3276 }
3277
3278 for (i=0; i < num_variables; i++)
3279 i_dof_index[i] = -1;
3280
3281 for (i=0; i < num_cols_offd; i++)
3282 i_dof_index_offd[i] = -1;
3283
3284 i_domain_dof[num_domains] = domain_dof_counter;
3285 j_domain_dof = hypre_CTAlloc(HYPRE_Int, domain_dof_counter, HYPRE_MEMORY_HOST);
3286
3287 domain_dof_counter=0;
3288 for (i=0; i < num_domains; i++)
3289 {
3290 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3291 {
3292 for (k=a_diag_i[j_aggregate_dof[j]];
3293 k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3294 if ( i_dof_index[a_diag_j[k]]==-1)
3295 {
3296 i_dof_index[a_diag_j[k]]++;
3297 j_domain_dof[domain_dof_counter] = a_diag_j[k];
3298 domain_dof_counter++;
3299 }
3300 for (k=a_offd_i[j_aggregate_dof[j]];
3301 k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3302 if ( i_dof_index_offd[a_offd_j[k]]==-1)
3303 {
3304 i_dof_index_offd[a_offd_j[k]]++;
3305 j_domain_dof[domain_dof_counter] = a_offd_j[k]+num_variables;
3306 domain_dof_counter++;
3307 }
3308 }
3309
3310 for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3311 {
3312 for (k=a_diag_i[j_aggregate_dof[j]];
3313 k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3314 i_dof_index[a_diag_j[k]]=-1;
3315 for (k=a_offd_i[j_aggregate_dof[j]];
3316 k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3317 i_dof_index_offd[a_offd_j[k]]=-1;
3318 }
3319 }
3320
3321 hypre_TFree(i_aggregate_dof, HYPRE_MEMORY_HOST);
3322 hypre_TFree(j_aggregate_dof, HYPRE_MEMORY_HOST);
3323 hypre_TFree(i_dof_to_aggregate, HYPRE_MEMORY_HOST);
3324 hypre_TFree(i_dof_index, HYPRE_MEMORY_HOST);
3325 hypre_TFree(i_dof_index_offd, HYPRE_MEMORY_HOST);
3326 }
3327 else
3328 {
3329 i_domain_dof = i_aggregate_dof;
3330 j_domain_dof = j_aggregate_dof;
3331 }
3332
3333 /*hypre_printf("END domain_dof computations: =================================\n");
3334 */
3335 domain_matrixinverse_counter = 0;
3336 local_dof_counter = 0;
3337 piv_counter = 0;
3338
3339 for (i=0; i < num_domains; i++)
3340 {
3341 local_dof_counter = i_domain_dof[i+1]-i_domain_dof[i];
3342 domain_matrixinverse_counter+= local_dof_counter * local_dof_counter;
3343 piv_counter += local_dof_counter;
3344
3345 if (local_dof_counter > max_local_dof_counter)
3346 max_local_dof_counter = local_dof_counter;
3347 }
3348
3349 domain_matrixinverse = hypre_CTAlloc(HYPRE_Real, domain_matrixinverse_counter, HYPRE_MEMORY_HOST);
3350 if (use_nonsymm)
3351 piv = hypre_CTAlloc(HYPRE_Int, piv_counter, HYPRE_MEMORY_HOST);
3352
3353 if (num_procs > 1)
3354 {
3355 A_ext = hypre_ParCSRMatrixExtractBExt(A,A,1);
3356 a_ext_i = hypre_CSRMatrixI(A_ext);
3357 a_ext_j = hypre_CSRMatrixBigJ(A_ext);
3358 a_ext_data = hypre_CSRMatrixData(A_ext);
3359 }
3360 else
3361 A_ext = NULL;
3362
3363 i_local_to_global = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter, HYPRE_MEMORY_HOST);
3364
3365 i_global_to_local = hypre_CTAlloc(HYPRE_Int, num_variables+num_cols_offd, HYPRE_MEMORY_HOST);
3366
3367 for (i=0; i < num_variables+num_cols_offd; i++)
3368 i_global_to_local[i] = -1;
3369
3370 piv_counter = 0;
3371 domain_matrixinverse_counter = 0;
3372 for (i=0; i < num_domains; i++)
3373 {
3374 local_dof_counter = 0;
3375 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
3376 {
3377 i_global_to_local[j_domain_dof[j]] = local_dof_counter;
3378 i_local_to_global[local_dof_counter] = j_domain_dof[j];
3379 local_dof_counter++;
3380 }
3381
3382 /* get local matrix in AE: ======================================== */
3383
3384 AE = &domain_matrixinverse[domain_matrixinverse_counter];
3385 ipiv = &piv[piv_counter];
3386
3387 cnt = 0;
3388 for (i_loc=0; i_loc < local_dof_counter; i_loc++)
3389 for (j_loc=0; j_loc < local_dof_counter; j_loc++)
3390 AE[cnt++] = 0.e0;
3391
3392 for (i_loc=0; i_loc < local_dof_counter; i_loc++)
3393 {
3394 i_dof = i_local_to_global[i_loc];
3395 if (i_dof < num_variables)
3396 {
3397 for (j=a_diag_i[i_dof]; j < a_diag_i[i_dof+1]; j++)
3398 {
3399 j_loc = i_global_to_local[a_diag_j[j]];
3400 if (j_loc >=0)
3401 AE[i_loc + j_loc * local_dof_counter] = a_diag_data[j];
3402 }
3403 for (j=a_offd_i[i_dof]; j < a_offd_i[i_dof+1]; j++)
3404 {
3405 j_loc = i_global_to_local[a_offd_j[j]+num_variables];
3406 if (j_loc >=0)
3407 AE[i_loc + j_loc * local_dof_counter] = a_offd_data[j];
3408 }
3409 }
3410 else
3411 {
3412 HYPRE_BigInt jj;
3413 HYPRE_Int j2;
3414 i_dof -= num_variables;
3415 for (j=a_ext_i[i_dof]; j < a_ext_i[i_dof+1]; j++)
3416 {
3417 jj = a_ext_j[j];
3418 if (jj > col_0 && jj < col_n)
3419 {
3420 j2 = (HYPRE_Int)(jj - first_col_diag);
3421 }
3422 else
3423 {
3424 j2 = hypre_BigBinarySearch(col_map_offd,jj,num_cols_offd);
3425 if (j2 > -1) j2 += num_variables;
3426 }
3427 if (j2 > -1)
3428 {
3429 j_loc = i_global_to_local[j2];
3430 if (j_loc >=0)
3431 AE[i_loc + j_loc * local_dof_counter] = a_ext_data[j];
3432 }
3433 }
3434 }
3435 }
3436
3437 if (use_nonsymm)
3438 {
3439 hypre_dgetrf(&local_dof_counter,
3440 &local_dof_counter, AE,
3441 &local_dof_counter, ipiv, &ierr);
3442 piv_counter +=local_dof_counter;
3443 }
3444
3445 else
3446 {
3447 hypre_dpotrf(&uplo,&local_dof_counter, AE,
3448 &local_dof_counter, &ierr);
3449 }
3450
3451 domain_matrixinverse_counter+=local_dof_counter*local_dof_counter;
3452
3453
3454
3455 for (l_loc=0; l_loc < local_dof_counter; l_loc++)
3456 i_global_to_local[i_local_to_global[l_loc]] = -1;
3457
3458 }
3459
3460 hypre_TFree(i_local_to_global, HYPRE_MEMORY_HOST);
3461 hypre_TFree(i_global_to_local, HYPRE_MEMORY_HOST);
3462 hypre_CSRMatrixDestroy(A_ext);
3463
3464 domain_structure = hypre_CSRMatrixCreate(num_domains, max_local_dof_counter,
3465 i_domain_dof[num_domains]);
3466
3467 hypre_CSRMatrixI(domain_structure) = i_domain_dof;
3468 hypre_CSRMatrixJ(domain_structure) = j_domain_dof;
3469 hypre_CSRMatrixData(domain_structure) = domain_matrixinverse;
3470
3471 *domain_structure_pointer = domain_structure;
3472 *piv_pointer = piv;
3473 return hypre_error_flag;
3474 }
3475
3476 HYPRE_Int
hypre_ParGenerateScale(hypre_ParCSRMatrix * A,hypre_CSRMatrix * domain_structure,HYPRE_Real relaxation_weight,HYPRE_Real ** scale_pointer)3477 hypre_ParGenerateScale(hypre_ParCSRMatrix *A,
3478 hypre_CSRMatrix *domain_structure,
3479 HYPRE_Real relaxation_weight,
3480 HYPRE_Real **scale_pointer)
3481 {
3482 HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
3483 HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
3484 HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
3485 HYPRE_Int i, j;
3486 HYPRE_Real *scale;
3487 HYPRE_Real *scale_ext;
3488 HYPRE_Real *scale_int;
3489
3490 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3491 HYPRE_Int num_sends = 0;
3492 HYPRE_Int *send_map_starts;
3493 HYPRE_Int *send_map_elmts;
3494
3495 HYPRE_Int num_variables = hypre_ParCSRMatrixNumRows(A);
3496 HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
3497 HYPRE_Int j_loc, index, start;
3498
3499 hypre_ParCSRCommHandle *comm_handle;
3500
3501 if (comm_pkg)
3502 {
3503 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
3504 send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
3505 send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
3506 }
3507
3508 scale = hypre_CTAlloc(HYPRE_Real, num_variables, HYPRE_MEMORY_HOST);
3509 if (num_cols_offd) scale_ext = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
3510
3511 for (i=0; i < num_domains; i++)
3512 {
3513 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
3514 {
3515 j_loc = j_domain_dof[j];
3516 if (j_loc < num_variables)
3517 scale[j_loc] += 1.0;
3518 else
3519 scale_ext[j_loc-num_variables] += 1.0;
3520 }
3521 }
3522 if (comm_pkg)
3523 {
3524 scale_int = hypre_CTAlloc(HYPRE_Real, send_map_starts[num_sends], HYPRE_MEMORY_HOST);
3525 comm_handle = hypre_ParCSRCommHandleCreate (2,comm_pkg,scale_ext,scale_int);
3526
3527 hypre_ParCSRCommHandleDestroy(comm_handle);
3528 comm_handle = NULL;
3529 }
3530
3531 index = 0;
3532 for (i=0; i < num_sends; i++)
3533 {
3534 start = send_map_starts[i];
3535 for (j=start; j < send_map_starts[i+1]; j++)
3536 scale[send_map_elmts[j]] += scale_int[index++];
3537 }
3538
3539 if (comm_pkg) hypre_TFree(scale_int, HYPRE_MEMORY_HOST);
3540 if (num_cols_offd) hypre_TFree(scale_ext, HYPRE_MEMORY_HOST);
3541
3542 for (i=0; i < num_variables; i++)
3543 scale[i] = relaxation_weight/scale[i];
3544
3545 *scale_pointer = scale;
3546
3547 return hypre_error_flag;
3548 }
3549
3550 HYPRE_Int
hypre_ParGenerateHybridScale(hypre_ParCSRMatrix * A,hypre_CSRMatrix * domain_structure,hypre_CSRMatrix ** A_boundary_pointer,HYPRE_Real ** scale_pointer)3551 hypre_ParGenerateHybridScale(hypre_ParCSRMatrix *A,
3552 hypre_CSRMatrix *domain_structure,
3553 hypre_CSRMatrix **A_boundary_pointer,
3554 HYPRE_Real **scale_pointer)
3555 {
3556 hypre_CSRMatrix *A_ext;
3557 HYPRE_Int *A_ext_i;
3558 HYPRE_BigInt *A_ext_j;
3559 HYPRE_Real *A_ext_data;
3560
3561 hypre_CSRMatrix *A_boundary;
3562 HYPRE_Int *A_boundary_i;
3563 HYPRE_Int *A_boundary_j;
3564 HYPRE_Real *A_boundary_data;
3565
3566 HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
3567 HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
3568 HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
3569 HYPRE_Int i, j, jj;
3570 HYPRE_Real *scale;
3571 HYPRE_Real *scale_ext;
3572 HYPRE_Real *scale_int;
3573
3574 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3575 HYPRE_Int num_sends = 0;
3576 HYPRE_Int *send_map_starts;
3577 HYPRE_Int *send_map_elmts;
3578 HYPRE_Int *index_ext;
3579
3580 HYPRE_Int num_variables = hypre_ParCSRMatrixNumRows(A);
3581 HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
3582 HYPRE_Int j_loc, index, start;
3583 HYPRE_BigInt col_0, col_n;
3584 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
3585
3586 hypre_ParCSRCommHandle *comm_handle;
3587
3588 col_0 = hypre_ParCSRMatrixFirstColDiag(A)-1;
3589 col_n = col_0+(HYPRE_Int)num_variables;
3590
3591 A_boundary = NULL;
3592
3593 if (comm_pkg)
3594 {
3595 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
3596 send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
3597 send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
3598 }
3599
3600 scale = hypre_CTAlloc(HYPRE_Real, num_variables, HYPRE_MEMORY_HOST);
3601 if (num_cols_offd)
3602 {
3603 scale_ext = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
3604 index_ext = hypre_CTAlloc(HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST);
3605 }
3606
3607 for (i=0; i < num_variables; i++)
3608 scale[i] = 1;
3609
3610 for (i=0; i < num_cols_offd; i++)
3611 index_ext[i] = -1;
3612
3613 for (i=0; i < num_domains; i++)
3614 {
3615 for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
3616 {
3617 j_loc = j_domain_dof[j];
3618 if (j_loc >= num_variables)
3619 {
3620 j_loc -= num_variables;
3621 if (index_ext[j_loc] == -1)
3622 {
3623 scale_ext[j_loc] += 1.0;
3624 index_ext[j_loc] ++;
3625 }
3626 }
3627 }
3628 }
3629 if (comm_pkg)
3630 {
3631 scale_int = hypre_CTAlloc(HYPRE_Real, send_map_starts[num_sends], HYPRE_MEMORY_HOST);
3632 comm_handle=hypre_ParCSRCommHandleCreate(2,comm_pkg,scale_ext,scale_int);
3633
3634 hypre_ParCSRCommHandleDestroy(comm_handle);
3635 comm_handle = NULL;
3636 A_ext = hypre_ParCSRMatrixExtractBExt(A,A,1);
3637 A_ext_i = hypre_CSRMatrixI(A_ext);
3638 A_boundary_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd+1, HYPRE_MEMORY_HOST);
3639 A_ext_j = hypre_CSRMatrixBigJ(A_ext);
3640 A_ext_data = hypre_CSRMatrixData(A_ext);
3641 /* compress A_ext to contain only local data and
3642 necessary boundary points*/
3643 index = 0;
3644 for (i=0; i < num_cols_offd; i++)
3645 {
3646 A_boundary_i[i] = index;
3647 for (j = A_ext_i[i]; j < A_ext_i[i+1]; j++)
3648 {
3649 HYPRE_BigInt j_col;
3650 j_col = A_ext_j[j];
3651 if (j_col > col_0 && j_col < col_n)
3652 {
3653 A_ext_j[j] = j_col-col_0;
3654 index++;
3655 }
3656 else
3657 {
3658 jj = hypre_BigBinarySearch(col_map_offd,j_col,num_cols_offd);
3659 if (jj > -1 && (scale_ext[jj] > 0))
3660 {
3661 A_ext_j[j] = num_variables+jj;
3662 index++;
3663 }
3664 else
3665 {
3666 A_ext_j[j] = -1;
3667 }
3668 }
3669 }
3670 }
3671 A_boundary_i[num_cols_offd] = index;
3672
3673 A_boundary_j = NULL;
3674 A_boundary_data = NULL;
3675
3676 if (index)
3677 {
3678 A_boundary_j = hypre_CTAlloc(HYPRE_Int, index, HYPRE_MEMORY_HOST);
3679 A_boundary_data = hypre_CTAlloc(HYPRE_Real, index, HYPRE_MEMORY_HOST);
3680 }
3681
3682 index = 0;
3683 for (i=0; i < A_ext_i[num_cols_offd]; i++)
3684 {
3685 if (A_ext_j[i] > -1)
3686 {
3687 A_boundary_j[index] = (HYPRE_Int)A_ext_j[i];
3688 A_boundary_data[index] = A_ext_data[i];
3689 index++;
3690 }
3691 }
3692 A_boundary = hypre_CSRMatrixCreate(num_cols_offd,num_variables, index);
3693 hypre_CSRMatrixI(A_boundary) = A_boundary_i;
3694 hypre_CSRMatrixJ(A_boundary) = A_boundary_j;
3695 hypre_CSRMatrixData(A_boundary) = A_boundary_data;
3696 hypre_CSRMatrixDestroy(A_ext);
3697 }
3698
3699 index = 0;
3700 for (i=0; i < num_sends; i++)
3701 {
3702 start = send_map_starts[i];
3703 for (j=start; j < send_map_starts[i+1]; j++)
3704 scale[send_map_elmts[j]] += scale_int[index++];
3705 }
3706
3707 if (comm_pkg) hypre_TFree(scale_int, HYPRE_MEMORY_HOST);
3708 if (num_cols_offd)
3709 {
3710 hypre_TFree(scale_ext, HYPRE_MEMORY_HOST);
3711 hypre_TFree(index_ext, HYPRE_MEMORY_HOST);
3712 }
3713
3714 for (i=0; i < num_variables; i++)
3715 scale[i] = 1.0/scale[i];
3716
3717 *scale_pointer = scale;
3718 *A_boundary_pointer = A_boundary;
3719
3720 return hypre_error_flag;
3721 }
3722
3723