1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 #include "_hypre_parcsr_ls.h"
9 #include "Common.h"
10 #include "_hypre_lapack.h"
11 
12 /* -------------------------------------------------------------------------
13    dof_domain: for each dof defines neighborhood to build interpolation,
14    using
15    domain_diagmat (for cut--off scaling) and
16    i_domain_dof, j_dof_domain (for extracting the block of A);
17 
18    domain_matrixinverse: contains the inverse of subdomain matrix;
19 
20    B can be used to define strength matrix;
21    ----------------------------------------------------------------------- */
22 
23 /*--------------------------------------------------------------------------
24  * hypre_AMGNodalSchwarzSmoother: (Not used currently)
25  *--------------------------------------------------------------------------*/
26 
27 HYPRE_Int
hypre_AMGNodalSchwarzSmoother(hypre_CSRMatrix * A,HYPRE_Int num_functions,HYPRE_Int option,hypre_CSRMatrix ** domain_structure_pointer)28 hypre_AMGNodalSchwarzSmoother( hypre_CSRMatrix    *A,
29                                HYPRE_Int                 num_functions,
30                                HYPRE_Int                   option,
31                                hypre_CSRMatrix   **domain_structure_pointer)
32 {
33    /*  option =      0: nodal symGS;
34        1: next to nodal symGS (overlapping Schwarz) */
35 
36    HYPRE_Int *i_domain_dof, *j_domain_dof;
37    HYPRE_Real *domain_matrixinverse;
38    HYPRE_Int num_domains;
39    hypre_CSRMatrix *domain_structure;
40 
41    HYPRE_Int *i_dof_node, *j_dof_node;
42    HYPRE_Int *i_node_dof, *j_node_dof;
43 
44    HYPRE_Int *i_node_dof_dof, *j_node_dof_dof;
45 
46    HYPRE_Int *i_node_node, *j_node_node;
47 
48    HYPRE_Int num_nodes;
49 
50    HYPRE_Int *i_dof_dof = hypre_CSRMatrixI(A);
51    HYPRE_Int *j_dof_dof = hypre_CSRMatrixJ(A);
52    HYPRE_Real *a_dof_dof = hypre_CSRMatrixData(A);
53    HYPRE_Int num_dofs = hypre_CSRMatrixNumRows(A);
54 
55 
56    HYPRE_Int ierr = 0;
57    HYPRE_Int i,j,k, l_loc, i_loc, j_loc;
58    HYPRE_Int i_dof, j_dof;
59    HYPRE_Int *i_local_to_global;
60    HYPRE_Int *i_global_to_local;
61 
62    HYPRE_Int *i_int;
63    HYPRE_Int *i_int_to_local;
64 
65    HYPRE_Int int_dof_counter, local_dof_counter, max_local_dof_counter=0;
66 
67    HYPRE_Int domain_dof_counter = 0, domain_matrixinverse_counter = 0;
68 
69    HYPRE_Real *AE;
70 
71    char uplo = 'L';
72 
73    HYPRE_Int cnt;
74 
75    /* PCG arrays: ---------------------------------------------------
76       HYPRE_Real *x, *rhs, *v, *w, *d, *aux;
77 
78       HYPRE_Int max_iter;
79 
80       ------------------------------------------------------------------ */
81 
82    /* build dof_node graph: ----------------------------------------- */
83 
84    num_nodes = num_dofs / num_functions;
85 
86    /*hypre_printf("\nnum_nodes: %d, num_dofs: %d = %d x %d\n", num_nodes, num_dofs,
87      num_nodes, num_functions);*/
88 
89    i_dof_node = hypre_CTAlloc(HYPRE_Int,  num_dofs+1, HYPRE_MEMORY_HOST);
90    j_dof_node = hypre_CTAlloc(HYPRE_Int,  num_dofs, HYPRE_MEMORY_HOST);
91 
92    for (i=0; i < num_dofs+1; i++)
93       i_dof_node[i] = i;
94 
95    for (j = 0; j < num_nodes; j++)
96       for (k = 0; k < num_functions; k++)
97          j_dof_node[j*num_functions+k] = j;
98 
99    /* build node_dof graph: ----------------------------------------- */
100 
101    ierr = transpose_matrix_create(&i_node_dof, &j_node_dof,
102                                   i_dof_node, j_dof_node,
103                                   num_dofs, num_nodes);
104 
105 
106    /* build node_node graph: ----------------------------------------- */
107 
108    ierr = matrix_matrix_product(&i_node_dof_dof,
109                                 &j_node_dof_dof,
110                                 i_node_dof, j_node_dof,
111                                 i_dof_dof, j_dof_dof,
112                                 num_nodes, num_dofs, num_dofs);
113 
114    ierr = matrix_matrix_product(&i_node_node,
115                                 &j_node_node,
116                                 i_node_dof_dof,
117                                 j_node_dof_dof,
118                                 i_dof_node, j_dof_node,
119                                 num_nodes, num_dofs, num_nodes);
120 
121    hypre_TFree(i_node_dof_dof, HYPRE_MEMORY_HOST);
122    hypre_TFree(j_node_dof_dof, HYPRE_MEMORY_HOST);
123 
124    /* compute for each node the local information: -------------------- */
125 
126    i_global_to_local = i_dof_node;
127 
128    for (i_dof =0; i_dof < num_dofs; i_dof++)
129       i_global_to_local[i_dof] = -1;
130 
131    domain_matrixinverse_counter = 0;
132    domain_dof_counter = 0;
133    for (i=0; i < num_nodes; i++)
134    {
135       local_dof_counter = 0;
136 
137       for (j=i_node_node[i]; j < i_node_node[i+1]; j++)
138          for (k=i_node_dof[j_node_node[j]];
139               k<i_node_dof[j_node_node[j]+1]; k++)
140          {
141             j_dof = j_node_dof[k];
142 
143             if (i_global_to_local[j_dof] < 0)
144             {
145                i_global_to_local[j_dof] = local_dof_counter;
146                local_dof_counter++;
147             }
148 
149          }
150       domain_matrixinverse_counter += local_dof_counter*local_dof_counter;
151       domain_dof_counter += local_dof_counter;
152 
153       if (local_dof_counter > max_local_dof_counter)
154          max_local_dof_counter = local_dof_counter;
155 
156       for (j=i_node_node[i]; j < i_node_node[i+1]; j++)
157          for (k=i_node_dof[j_node_node[j]];
158               k<i_node_dof[j_node_node[j]+1]; k++)
159          {
160             j_dof = j_node_dof[k];
161             i_global_to_local[j_dof] = -1;
162          }
163    }
164 
165    num_domains = num_nodes;
166 
167    i_domain_dof = hypre_CTAlloc(HYPRE_Int,  num_domains+1, HYPRE_MEMORY_HOST);
168 
169    if (option == 1)
170       j_domain_dof = hypre_CTAlloc(HYPRE_Int,  domain_dof_counter, HYPRE_MEMORY_HOST);
171    else
172       j_domain_dof = hypre_CTAlloc(HYPRE_Int,  num_dofs, HYPRE_MEMORY_HOST);
173 
174    if (option == 1)
175       domain_matrixinverse = hypre_CTAlloc(HYPRE_Real,  domain_matrixinverse_counter, HYPRE_MEMORY_HOST);
176    else
177       domain_matrixinverse = hypre_CTAlloc(HYPRE_Real,  num_dofs * num_functions, HYPRE_MEMORY_HOST);
178 
179    i_local_to_global = hypre_CTAlloc(HYPRE_Int,  max_local_dof_counter, HYPRE_MEMORY_HOST);
180 
181    i_int_to_local = hypre_CTAlloc(HYPRE_Int,  max_local_dof_counter, HYPRE_MEMORY_HOST);
182    i_int          = hypre_CTAlloc(HYPRE_Int,  max_local_dof_counter, HYPRE_MEMORY_HOST);
183 
184    for (l_loc=0; l_loc < max_local_dof_counter; l_loc++)
185       i_int[l_loc] = -1;
186 
187    domain_dof_counter = 0;
188    domain_matrixinverse_counter = 0;
189    for (i=0; i < num_nodes; i++)
190    {
191       i_domain_dof[i] = domain_dof_counter;
192       local_dof_counter = 0;
193 
194       for (j=i_node_node[i]; j < i_node_node[i+1]; j++)
195          for (k=i_node_dof[j_node_node[j]];
196               k<i_node_dof[j_node_node[j]+1]; k++)
197          {
198             j_dof = j_node_dof[k];
199 
200             if (i_global_to_local[j_dof] < 0)
201             {
202                i_global_to_local[j_dof] = local_dof_counter;
203                i_local_to_global[local_dof_counter] = j_dof;
204                local_dof_counter++;
205             }
206 
207          }
208 
209       for (j=i_node_dof[i]; j < i_node_dof[i+1]; j++)
210          for (k=i_dof_dof[j_node_dof[j]]; k < i_dof_dof[j_node_dof[j]+1]; k++)
211             if (i_global_to_local[j_dof_dof[k]] < 0)
212                hypre_error_w_msg(HYPRE_ERROR_GENERIC,"WRONG local indexing: ====================== \n");
213 
214       int_dof_counter = 0;
215       for (k=i_node_dof[i]; k < i_node_dof[i+1]; k++)
216       {
217          i_dof = j_node_dof[k];
218          i_loc = i_global_to_local[i_dof];
219          i_int[i_loc] = int_dof_counter;
220          i_int_to_local[int_dof_counter] = i_loc;
221          int_dof_counter++;
222       }
223 
224       /* get local matrix AE: ======================================== */
225 
226       if (option == 1)
227       {
228          AE = &domain_matrixinverse[domain_matrixinverse_counter];
229          cnt = 0;
230          for (i_loc=0; i_loc < local_dof_counter; i_loc++)
231             for (j_loc=0; j_loc < local_dof_counter; j_loc++)
232                AE[cnt++] = 0.e0;
233 
234          for (i_loc=0; i_loc < local_dof_counter; i_loc++)
235          {
236             i_dof = i_local_to_global[i_loc];
237             for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++)
238             {
239                j_loc = i_global_to_local[j_dof_dof[j]];
240                if (j_loc >=0)
241                   AE[i_loc + j_loc * local_dof_counter] = a_dof_dof[j];
242             }
243          }
244 
245          /* get block for Schwarz smoother: ============================= */
246          /* ierr = hypre_matinv(XE, AE, local_dof_counter); */
247          /* hypre_printf("ierr_AE_inv: %d\n", ierr); */
248          hypre_dpotrf(&uplo, &local_dof_counter, AE,
249                       &local_dof_counter, &ierr);
250          if (ierr == 1) hypre_error_w_msg(HYPRE_ERROR_GENERIC,"Error! Matrix not SPD\n");
251 
252          for (i_loc=0; i_loc < local_dof_counter; i_loc++)
253             j_domain_dof[domain_dof_counter+i_loc]
254                = i_local_to_global[i_loc];
255       }
256 
257       if (option == 0)
258       {
259          AE = &domain_matrixinverse[domain_matrixinverse_counter];
260          for (i_loc=0; i_loc < int_dof_counter; i_loc++)
261             for (j_loc=0; j_loc < int_dof_counter; j_loc++)
262                AE[i_loc + j_loc * int_dof_counter] = 0.e0;
263 
264          for (l_loc=0; l_loc < int_dof_counter; l_loc++)
265          {
266             i_loc = i_int_to_local[l_loc];
267             i_dof = i_local_to_global[i_loc];
268             for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++)
269             {
270                j_loc = i_global_to_local[j_dof_dof[j]];
271                if (j_loc >=0)
272                   if (i_int[j_loc] >=0)
273                      AE[i_loc + i_int[j_loc] * int_dof_counter]
274                         = a_dof_dof[j];
275             }
276          }
277 
278          /* ierr = hypre_matinv(XE, AE, int_dof_counter); */
279          hypre_dpotrf(&uplo, &local_dof_counter, AE,
280                       &local_dof_counter, &ierr);
281 
282          if (ierr) hypre_error_w_msg(HYPRE_ERROR_GENERIC," error in dpotrf !!!\n");
283 
284          for (i_loc=0; i_loc < int_dof_counter; i_loc++)
285          {
286             j_domain_dof[domain_dof_counter + i_loc] =
287                i_local_to_global[i_int_to_local[i_loc]];
288 
289             for (j_loc=0; j_loc < int_dof_counter; j_loc++)
290                domain_matrixinverse[domain_matrixinverse_counter
291                                     + i_loc + j_loc * int_dof_counter]
292                   = AE[i_loc + j_loc * int_dof_counter];
293          }
294 
295          domain_dof_counter+=int_dof_counter;
296          domain_matrixinverse_counter+=int_dof_counter*int_dof_counter;
297       }
298       else
299       {
300          domain_dof_counter+=local_dof_counter;
301          domain_matrixinverse_counter+=local_dof_counter*local_dof_counter;
302       }
303 
304       for (l_loc=0; l_loc < local_dof_counter; l_loc++)
305       {
306          i_int[l_loc] = -1;
307          i_global_to_local[i_local_to_global[l_loc]] = -1;
308       }
309    }
310 
311    i_domain_dof[num_nodes] = domain_dof_counter;
312 
313    hypre_TFree(i_dof_node, HYPRE_MEMORY_HOST);
314    hypre_TFree(j_dof_node, HYPRE_MEMORY_HOST);
315 
316    hypre_TFree(i_node_dof, HYPRE_MEMORY_HOST);
317    hypre_TFree(j_node_dof, HYPRE_MEMORY_HOST);
318    hypre_TFree(i_node_node, HYPRE_MEMORY_HOST);
319    hypre_TFree(j_node_node, HYPRE_MEMORY_HOST);
320 
321    hypre_TFree(i_int, HYPRE_MEMORY_HOST);
322    hypre_TFree(i_int_to_local, HYPRE_MEMORY_HOST);
323 
324    hypre_TFree(i_local_to_global, HYPRE_MEMORY_HOST);
325 
326    domain_structure = hypre_CSRMatrixCreate(num_domains, max_local_dof_counter,
327                                             i_domain_dof[num_domains]);
328    hypre_CSRMatrixI(domain_structure) = i_domain_dof;
329    hypre_CSRMatrixJ(domain_structure) = j_domain_dof;
330    hypre_CSRMatrixData(domain_structure) = domain_matrixinverse;
331 
332    *domain_structure_pointer = domain_structure;
333 
334    return hypre_error_flag;
335 }
336 
hypre_ParMPSchwarzSolve(hypre_ParCSRMatrix * par_A,hypre_CSRMatrix * A_boundary,hypre_ParVector * rhs_vector,hypre_CSRMatrix * domain_structure,hypre_ParVector * par_x,HYPRE_Real relax_wt,HYPRE_Real * scale,hypre_ParVector * Vtemp,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)337 HYPRE_Int hypre_ParMPSchwarzSolve(hypre_ParCSRMatrix *par_A,
338                                   hypre_CSRMatrix *A_boundary,
339                                   hypre_ParVector *rhs_vector,
340                                   hypre_CSRMatrix *domain_structure,
341                                   hypre_ParVector *par_x,
342                                   HYPRE_Real relax_wt,
343                                   HYPRE_Real *scale,
344                                   hypre_ParVector *Vtemp, HYPRE_Int *pivots,
345                                   HYPRE_Int use_nonsymm)
346 {
347    hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(par_A);
348    HYPRE_Int num_sends = 0;
349    HYPRE_Int *send_map_starts;
350    HYPRE_Int *send_map_elmts;
351 
352    hypre_ParCSRCommHandle *comm_handle;
353 
354    HYPRE_Int ierr = 0;
355    /* HYPRE_Int num_dofs; */
356    hypre_CSRMatrix *A_diag;
357    HYPRE_Int *A_diag_i;
358    HYPRE_Int *A_diag_j;
359    HYPRE_Real *A_diag_data;
360    hypre_CSRMatrix *A_offd;
361    HYPRE_Int *A_offd_i;
362    HYPRE_Int *A_offd_j;
363    HYPRE_Real *A_offd_data;
364    HYPRE_Real *x;
365    HYPRE_Real *x_ext;
366    HYPRE_Real *x_ext_old;
367    HYPRE_Real *rhs;
368    HYPRE_Real *rhs_ext;
369    HYPRE_Real *vtemp_data;
370    HYPRE_Real *aux;
371    HYPRE_Real *buf_data;
372 /*hypre_Vector *x_vector;*/
373    MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
374    HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
375    HYPRE_Int max_domain_size = hypre_CSRMatrixNumCols(domain_structure);
376    HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
377    HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
378    HYPRE_Real *domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
379    HYPRE_Int *A_boundary_i;
380    HYPRE_Int *A_boundary_j;
381    HYPRE_Real *A_boundary_data;
382    HYPRE_Int num_variables;
383    HYPRE_Int num_cols_offd;
384 
385    HYPRE_Int piv_counter = 0;
386    HYPRE_Int one = 1;
387    char uplo = 'L';
388 
389    HYPRE_Int jj,i,j,k, j_loc, k_loc;
390    HYPRE_Int index;
391 
392    HYPRE_Int matrix_size, matrix_size_counter = 0;
393 
394    HYPRE_Int num_procs;
395 
396    hypre_MPI_Comm_size(comm,&num_procs);
397 
398    /* initiate:      ----------------------------------------------- */
399    /* num_dofs = hypre_CSRMatrixNumRows(A); */
400 
401    A_diag = hypre_ParCSRMatrixDiag(par_A);
402    A_offd = hypre_ParCSRMatrixOffd(par_A);
403    num_variables = hypre_CSRMatrixNumRows(A_diag);
404    num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
405    x = hypre_VectorData(hypre_ParVectorLocalVector(par_x));
406    vtemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Vtemp));
407    rhs = hypre_VectorData(hypre_ParVectorLocalVector(rhs_vector));
408 
409    if (use_nonsymm)
410       uplo = 'N';
411 
412    /*x_vector = hypre_ParVectorLocalVector(par_x);*/
413    A_diag_i = hypre_CSRMatrixI(A_diag);
414    A_diag_j = hypre_CSRMatrixJ(A_diag);
415    A_diag_data = hypre_CSRMatrixData(A_diag);
416    A_offd_i = hypre_CSRMatrixI(A_offd);
417    if (num_cols_offd)
418    {
419       A_offd_j = hypre_CSRMatrixJ(A_offd);
420       A_offd_data = hypre_CSRMatrixData(A_offd);
421       A_boundary_i = hypre_CSRMatrixI(A_boundary);
422       A_boundary_j = hypre_CSRMatrixJ(A_boundary);
423       A_boundary_data = hypre_CSRMatrixData(A_boundary);
424    }
425    aux = hypre_CTAlloc(HYPRE_Real,  max_domain_size, HYPRE_MEMORY_HOST);
426 
427    hypre_ParVectorCopy(rhs_vector,Vtemp);
428    hypre_ParCSRMatrixMatvec(-1.0,par_A,par_x,1.0,Vtemp);
429 
430    if (comm_pkg)
431    {
432       num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
433       send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
434       send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
435 
436       buf_data = hypre_CTAlloc(HYPRE_Real,  send_map_starts[num_sends], HYPRE_MEMORY_HOST);
437       x_ext = hypre_CTAlloc(HYPRE_Real,  num_cols_offd, HYPRE_MEMORY_HOST);
438       x_ext_old = hypre_CTAlloc(HYPRE_Real,  num_cols_offd, HYPRE_MEMORY_HOST);
439       rhs_ext = hypre_CTAlloc(HYPRE_Real,  num_cols_offd, HYPRE_MEMORY_HOST);
440 
441       index = 0;
442       for (i=0; i < num_sends; i++)
443       {
444          for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
445             buf_data[index++] = vtemp_data[send_map_elmts[j]];
446       }
447 
448       comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,
449                                                  rhs_ext);
450       hypre_ParCSRCommHandleDestroy(comm_handle);
451       comm_handle = NULL;
452 
453       index = 0;
454       for (i=0; i < num_sends; i++)
455       {
456          for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
457             buf_data[index++] = x[send_map_elmts[j]];
458       }
459 
460       comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,x_ext);
461       hypre_ParCSRCommHandleDestroy(comm_handle);
462       comm_handle = NULL;
463    }
464 
465    /* correction of residual for exterior points to be updated locally */
466    for (i=0; i < num_cols_offd; i++)
467    {
468       x_ext_old[i] = x_ext[i];
469       for (j = A_boundary_i[i]; j < A_boundary_i[i+1]; j++)
470       {
471          k_loc = A_boundary_j[j];
472          if (k_loc < num_variables)
473             rhs_ext[i] += A_boundary_data[j]*x[k_loc];
474          else
475             rhs_ext[i] += A_boundary_data[j]*x_ext[k_loc-num_variables];
476       }
477    }
478    /* forward solve: ----------------------------------------------- */
479 
480    matrix_size_counter = 0;
481    for (i=0; i < num_domains; i++)
482    {
483       matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
484 
485       /* compute residual: ---------------------------------------- */
486 
487       jj = 0;
488       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
489       {
490          j_loc = j_domain_dof[j];
491          if (j_loc < num_variables)
492          {
493             aux[jj] = rhs[j_loc];
494             for (k=A_diag_i[j_loc]; k<A_diag_i[j_loc+1]; k++)
495                aux[jj] -= A_diag_data[k] * x[A_diag_j[k]];
496             for (k=A_offd_i[j_loc]; k<A_offd_i[j_loc+1]; k++)
497                aux[jj] -= A_offd_data[k] * x_ext[A_offd_j[k]];
498          }
499          else
500          {
501             j_loc -= num_variables;
502             aux[jj] = rhs_ext[j_loc];
503             for (k=A_boundary_i[j_loc]; k<A_boundary_i[j_loc+1]; k++)
504                k_loc = A_boundary_j[k];
505             if (k_loc < num_variables)
506                aux[jj] -= A_boundary_data[k] * x[k_loc];
507             else
508                aux[jj] -= A_boundary_data[k] * x_ext[k_loc-num_variables];
509          }
510          jj++;
511       }
512       /* solve for correction: ------------------------------------- */
513       if (use_nonsymm)
514       {
515          hypre_dgetrs(&uplo, &matrix_size, &one,
516                       &domain_matrixinverse[matrix_size_counter],
517                       &matrix_size, &pivots[piv_counter], aux,
518                       &matrix_size, &ierr);
519       }
520       else
521       {
522          hypre_dpotrs(&uplo, &matrix_size, &one,
523                       &domain_matrixinverse[matrix_size_counter],
524                       &matrix_size, aux,
525                       &matrix_size, &ierr);
526       }
527       if (ierr)  hypre_error(HYPRE_ERROR_GENERIC);
528       jj = 0;
529       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
530       {
531          j_loc = j_domain_dof[j];
532          if (j_loc < num_variables)
533             x[j_loc] +=  relax_wt*aux[jj++];
534          else
535             x_ext[j_loc-num_variables] +=  relax_wt*aux[jj++];
536       }
537       matrix_size_counter += matrix_size * matrix_size;
538       piv_counter += matrix_size;
539 
540 
541    }
542 /*
543   for (i=0; i < num_cols_offd; i++)
544   x_ext[i] -= x_ext_old[i];
545 
546   if (comm_pkg)
547   {
548   comm_handle=hypre_ParCSRCommHandleCreate (2,comm_pkg,x_ext,buf_data);
549 
550   hypre_ParCSRCommHandleDestroy(comm_handle);
551   comm_handle = NULL;
552 
553   index = 0;
554   for (i=0; i < num_sends; i++)
555   {
556   for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
557   x[send_map_elmts[j]] += buf_data[index++];
558   }
559   }
560   for (i=0; i < num_variables; i++)
561   x[i] *= scale[i];
562 
563   hypre_ParVectorCopy(rhs_vector,Vtemp);
564   hypre_ParCSRMatrixMatvec(-1.0,par_A,par_x,1.0,Vtemp);
565 
566   if (comm_pkg)
567   {
568   index = 0;
569   for (i=0; i < num_sends; i++)
570   {
571   for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
572   buf_data[index++] = vtemp_data[send_map_elmts[j]];
573   }
574 
575   comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,
576   rhs_ext);
577   hypre_ParCSRCommHandleDestroy(comm_handle);
578   comm_handle = NULL;
579 
580   index = 0;
581   for (i=0; i < num_sends; i++)
582   {
583   for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
584   buf_data[index++] = x[send_map_elmts[j]];
585   }
586 
587   comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,x_ext);
588   hypre_ParCSRCommHandleDestroy(comm_handle);
589   comm_handle = NULL;
590   }
591 */
592    /* correction of residual for exterior points to be updated locally */
593 /*   for (i=0; i < num_cols_offd; i++)
594      {
595      x_ext_old[i] = x_ext[i];
596      for (j = A_boundary_i[i]; j < A_boundary_i[i+1]; j++)
597      {
598      k_loc = A_boundary_j[j];
599      if (k_loc < num_variables)
600      rhs_ext[i] += A_boundary_i[k]*x[k_loc];
601      else
602      rhs_ext[i] += A_boundary_i[k]*x_ext[k_loc-num_variables];
603      }
604      }
605 */
606    /* backward solve: ------------------------------------------------ */
607    for (i=num_domains-1; i > -1; i--)
608    {
609       matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
610       matrix_size_counter -= matrix_size * matrix_size;
611       piv_counter -= matrix_size;
612 
613       /* compute residual: ---------------------------------------- */
614       jj = 0;
615       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
616       {
617          j_loc = j_domain_dof[j];
618          if (j_loc < num_variables)
619          {
620             aux[jj] = rhs[j_loc];
621             for (k=A_diag_i[j_loc]; k<A_diag_i[j_loc+1]; k++)
622                aux[jj] -= A_diag_data[k] * x[A_diag_j[k]];
623             for (k=A_offd_i[j_loc]; k<A_offd_i[j_loc+1]; k++)
624                aux[jj] -= A_offd_data[k] * x_ext[A_offd_j[k]];
625          }
626          else
627          {
628             j_loc -= num_variables;
629             aux[jj] = rhs_ext[j_loc];
630             for (k=A_boundary_i[j_loc]; k<A_boundary_i[j_loc+1]; k++)
631                k_loc = A_boundary_j[k];
632             if (k_loc < num_variables)
633                aux[jj] -= A_boundary_data[k] * x[k_loc];
634             else
635                aux[jj] -= A_boundary_data[k] * x_ext[k_loc-num_variables];
636          }
637          jj++;
638       }
639 
640       /* solve for correction: ------------------------------------- */
641       if (use_nonsymm)
642       {
643          hypre_dgetrs(&uplo, &matrix_size, &one,
644                       &domain_matrixinverse[matrix_size_counter],
645                       &matrix_size, &pivots[piv_counter], aux,
646                       &matrix_size, &ierr);
647       }
648       else
649       {
650          hypre_dpotrs(&uplo, &matrix_size, &one,
651                       &domain_matrixinverse[matrix_size_counter],
652                       &matrix_size, aux,
653                       &matrix_size, &ierr);
654       }
655 
656       if (ierr)  hypre_error(HYPRE_ERROR_GENERIC);
657       jj = 0;
658       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
659       {
660          j_loc = j_domain_dof[j];
661          if (j_loc < num_variables)
662             x[j_loc] +=  relax_wt*aux[jj++];
663          else
664             x_ext[j_loc-num_variables] +=  relax_wt*aux[jj++];
665       }
666    }
667 
668    for (i=0; i < num_cols_offd; i++)
669       x_ext[i] -= x_ext_old[i];
670 
671    if (comm_pkg)
672    {
673       comm_handle=hypre_ParCSRCommHandleCreate (2,comm_pkg,x_ext,buf_data);
674 
675       hypre_ParCSRCommHandleDestroy(comm_handle);
676       comm_handle = NULL;
677 
678       index = 0;
679       for (i=0; i < num_sends; i++)
680       {
681          for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
682             x[send_map_elmts[j]] += buf_data[index++];
683       }
684 
685       hypre_TFree(buf_data, HYPRE_MEMORY_HOST);
686       hypre_TFree(x_ext, HYPRE_MEMORY_HOST);
687       hypre_TFree(x_ext_old, HYPRE_MEMORY_HOST);
688       hypre_TFree(rhs_ext, HYPRE_MEMORY_HOST);
689    }
690    for (i=0; i < num_variables; i++)
691       x[i] *= scale[i];
692 
693    hypre_TFree(aux, HYPRE_MEMORY_HOST);
694 
695    return hypre_error_flag;
696 
697 }
698 
hypre_MPSchwarzSolve(hypre_ParCSRMatrix * par_A,hypre_Vector * rhs_vector,hypre_CSRMatrix * domain_structure,hypre_ParVector * par_x,HYPRE_Real relax_wt,hypre_Vector * aux_vector,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)699 HYPRE_Int hypre_MPSchwarzSolve(hypre_ParCSRMatrix *par_A,
700                                hypre_Vector *rhs_vector,
701                                hypre_CSRMatrix *domain_structure,
702                                hypre_ParVector *par_x,
703                                HYPRE_Real relax_wt,
704                                hypre_Vector *aux_vector, HYPRE_Int *pivots,
705                                HYPRE_Int use_nonsymm)
706 {
707    HYPRE_Int ierr = 0;
708    /* HYPRE_Int num_dofs; */
709    HYPRE_Int *i_dof_dof;
710    HYPRE_Int *j_dof_dof;
711    HYPRE_Real *a_dof_dof;
712    HYPRE_Real *x;
713    hypre_Vector *rhs;
714    HYPRE_Real *aux;
715    hypre_CSRMatrix *A;
716    hypre_Vector *x_vector;
717    MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
718    HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
719    HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
720    HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
721    HYPRE_Real *domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
722 
723    HYPRE_Int piv_counter = 0;
724    HYPRE_Int one = 1;
725    char uplo = 'L';
726 
727    HYPRE_Int jj,i,j,k; /*, j_loc, k_loc;*/
728 
729 
730    HYPRE_Int matrix_size, matrix_size_counter = 0;
731 
732    HYPRE_Int num_procs;
733 
734    hypre_MPI_Comm_size(comm,&num_procs);
735 
736    /* initiate:      ----------------------------------------------- */
737    /* num_dofs = hypre_CSRMatrixNumRows(A); */
738    x_vector = hypre_ParVectorLocalVector(par_x);
739    A = hypre_ParCSRMatrixDiag(par_A);
740    i_dof_dof = hypre_CSRMatrixI(A);
741    j_dof_dof = hypre_CSRMatrixJ(A);
742    a_dof_dof = hypre_CSRMatrixData(A);
743    x = hypre_VectorData(x_vector);
744    aux = hypre_VectorData(aux_vector);
745    /* for (i=0; i < num_dofs; i++)
746       x[i] = 0.e0; */
747 
748    if (use_nonsymm)
749       uplo = 'N';
750 
751 
752    if (num_procs > 1)
753    {
754       hypre_parCorrRes(par_A, par_x, rhs_vector, &rhs);
755    }
756    else
757    {
758       rhs = rhs_vector;
759    }
760 
761    /* forward solve: ----------------------------------------------- */
762 
763    matrix_size_counter = 0;
764    for (i=0; i < num_domains; i++)
765    {
766       matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
767 
768       /* compute residual: ---------------------------------------- */
769 
770       jj = 0;
771       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
772       {
773          aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
774          for (k=i_dof_dof[j_domain_dof[j]];
775               k<i_dof_dof[j_domain_dof[j]+1]; k++)
776             aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
777          jj++;
778       }
779       /* solve for correction: ------------------------------------- */
780       if (use_nonsymm)
781       {
782          hypre_dgetrs(&uplo, &matrix_size, &one,
783                       &domain_matrixinverse[matrix_size_counter],
784                       &matrix_size, &pivots[piv_counter], aux,
785                       &matrix_size, &ierr);
786       }
787       else
788       {
789          hypre_dpotrs(&uplo, &matrix_size, &one,
790                       &domain_matrixinverse[matrix_size_counter],
791                       &matrix_size, aux,
792                       &matrix_size, &ierr);
793       }
794 
795       if (ierr) hypre_error(HYPRE_ERROR_GENERIC);
796       jj = 0;
797       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
798       {
799          x[j_domain_dof[j]]+=  relax_wt * aux[jj++];
800       }
801       matrix_size_counter += matrix_size * matrix_size;
802       piv_counter += matrix_size;
803    }
804 
805    /* backward solve: ------------------------------------------------ */
806    for (i=num_domains-1; i > -1; i--)
807    {
808       matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
809       matrix_size_counter -= matrix_size * matrix_size;
810       piv_counter -= matrix_size;
811 
812       /* compute residual: ---------------------------------------- */
813 
814       jj = 0;
815       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
816       {
817          aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
818          for (k=i_dof_dof[j_domain_dof[j]];
819               k<i_dof_dof[j_domain_dof[j]+1]; k++)
820             aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
821          jj++;
822       }
823 
824       /* solve for correction: ------------------------------------- */
825 
826       if (use_nonsymm)
827       {
828          hypre_dgetrs(&uplo, &matrix_size, &one,
829                       &domain_matrixinverse[matrix_size_counter],
830                       &matrix_size, &pivots[piv_counter], aux,
831                       &matrix_size, &ierr);
832       }
833       else
834       {
835          hypre_dpotrs(&uplo, &matrix_size, &one,
836                       &domain_matrixinverse[matrix_size_counter],
837                       &matrix_size, aux,
838                       &matrix_size, &ierr);
839       }
840 
841       if (ierr)  hypre_error(HYPRE_ERROR_GENERIC);
842       jj = 0;
843       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
844       {
845          x[j_domain_dof[j]] += relax_wt*aux[jj++];
846       }
847    }
848 
849    if (num_procs > 1)
850    {
851       hypre_SeqVectorDestroy(rhs);
852    }
853 
854    return hypre_error_flag;
855 }
856 
hypre_MPSchwarzCFSolve(hypre_ParCSRMatrix * par_A,hypre_Vector * rhs_vector,hypre_CSRMatrix * domain_structure,hypre_ParVector * par_x,HYPRE_Real relax_wt,hypre_Vector * aux_vector,HYPRE_Int * CF_marker,HYPRE_Int rlx_pt,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)857 HYPRE_Int hypre_MPSchwarzCFSolve(hypre_ParCSRMatrix *par_A,
858                                  hypre_Vector *rhs_vector,
859                                  hypre_CSRMatrix *domain_structure,
860                                  hypre_ParVector *par_x,
861                                  HYPRE_Real relax_wt,
862                                  hypre_Vector *aux_vector,
863                                  HYPRE_Int *CF_marker,
864                                  HYPRE_Int rlx_pt, HYPRE_Int *pivots,
865                                  HYPRE_Int use_nonsymm)
866 {
867    HYPRE_Int ierr = 0;
868    /* HYPRE_Int num_dofs; */
869    HYPRE_Int *i_dof_dof;
870    HYPRE_Int *j_dof_dof;
871    HYPRE_Real *a_dof_dof;
872    HYPRE_Real *x;
873    hypre_Vector *rhs;
874    HYPRE_Real *aux;
875    hypre_CSRMatrix *A;
876    hypre_Vector *x_vector;
877    MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
878    HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
879    HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
880    HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
881    HYPRE_Real *domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
882 
883    HYPRE_Int piv_counter = 0;
884    HYPRE_Int one = 1;
885    char uplo = 'L';
886    HYPRE_Int jj,i,j,k; /*, j_loc, k_loc;*/
887 
888    HYPRE_Int matrix_size, matrix_size_counter = 0;
889 
890    HYPRE_Int num_procs;
891 
892    hypre_MPI_Comm_size(comm,&num_procs);
893 
894    /* initiate:      ----------------------------------------------- */
895    /* num_dofs = hypre_CSRMatrixNumRows(A); */
896    x_vector = hypre_ParVectorLocalVector(par_x);
897    A = hypre_ParCSRMatrixDiag(par_A);
898    i_dof_dof = hypre_CSRMatrixI(A);
899    j_dof_dof = hypre_CSRMatrixJ(A);
900    a_dof_dof = hypre_CSRMatrixData(A);
901    x = hypre_VectorData(x_vector);
902    aux = hypre_VectorData(aux_vector);
903    /* for (i=0; i < num_dofs; i++)
904       x[i] = 0.e0; */
905 
906    if (use_nonsymm)
907       uplo = 'N';
908 
909    if (num_procs > 1)
910    {
911       hypre_parCorrRes(par_A, par_x, rhs_vector, &rhs);
912    }
913    else
914    {
915       rhs = rhs_vector;
916    }
917 
918    /* forward solve: ----------------------------------------------- */
919 
920    matrix_size_counter = 0;
921    for (i=0; i < num_domains; i++)
922    {
923       if (CF_marker[i] == rlx_pt)
924       {
925          matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
926 
927          /* compute residual: ---------------------------------------- */
928 
929          jj = 0;
930          for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
931          {
932             aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
933             if (CF_marker[j_domain_dof[j]] == rlx_pt)
934             {
935                for (k=i_dof_dof[j_domain_dof[j]];
936                     k<i_dof_dof[j_domain_dof[j]+1]; k++)
937                   aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
938             }
939             jj++;
940          }
941          /* solve for correction: ------------------------------------- */
942          if (use_nonsymm)
943          {
944             hypre_dgetrs(&uplo, &matrix_size, &one,
945                          &domain_matrixinverse[matrix_size_counter],
946                          &matrix_size, &pivots[piv_counter], aux,
947                          &matrix_size, &ierr);
948          }
949          else
950          {
951             hypre_dpotrs(&uplo, &matrix_size, &one,
952                          &domain_matrixinverse[matrix_size_counter],
953                          &matrix_size, aux,
954                          &matrix_size, &ierr);
955          }
956 
957          if (ierr)  hypre_error(HYPRE_ERROR_GENERIC);
958          jj = 0;
959          for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
960          {
961             x[j_domain_dof[j]]+=  relax_wt * aux[jj++];
962          }
963          matrix_size_counter += matrix_size * matrix_size;
964          piv_counter += matrix_size;
965       }
966    }
967 
968    /* backward solve: ------------------------------------------------ */
969    for (i=num_domains-1; i > -1; i--)
970    {
971       if (CF_marker[i] == rlx_pt)
972       {
973          matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
974          matrix_size_counter -= matrix_size * matrix_size;
975          piv_counter -= matrix_size;
976 
977          /* compute residual: ---------------------------------------- */
978          jj = 0;
979          for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
980          {
981             aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
982             if (CF_marker[j_domain_dof[j]] == rlx_pt)
983             {
984                for (k=i_dof_dof[j_domain_dof[j]];
985                     k<i_dof_dof[j_domain_dof[j]+1]; k++)
986                   aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
987             }
988             jj++;
989          }
990 
991          /* solve for correction: ------------------------------------- */
992          if (use_nonsymm)
993          {
994             hypre_dgetrs(&uplo, &matrix_size, &one,
995                          &domain_matrixinverse[matrix_size_counter],
996                          &matrix_size, &pivots[piv_counter], aux,
997                          &matrix_size, &ierr);
998          }
999          else
1000          {
1001             hypre_dpotrs(&uplo, &matrix_size, &one,
1002                          &domain_matrixinverse[matrix_size_counter],
1003                          &matrix_size, aux,
1004                          &matrix_size, &ierr);
1005          }
1006 
1007          if (ierr)  hypre_error(HYPRE_ERROR_GENERIC);
1008          jj = 0;
1009          for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1010          {
1011             x[j_domain_dof[j]]+=  relax_wt*aux[jj++];
1012          }
1013       }
1014    }
1015 
1016    if (num_procs > 1)
1017    {
1018       hypre_SeqVectorDestroy(rhs);
1019    }
1020 
1021    return hypre_error_flag;
1022 }
1023 
hypre_MPSchwarzFWSolve(hypre_ParCSRMatrix * par_A,hypre_Vector * rhs_vector,hypre_CSRMatrix * domain_structure,hypre_ParVector * par_x,HYPRE_Real relax_wt,hypre_Vector * aux_vector,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)1024 HYPRE_Int hypre_MPSchwarzFWSolve(hypre_ParCSRMatrix *par_A,
1025                                  hypre_Vector *rhs_vector,
1026                                  hypre_CSRMatrix *domain_structure,
1027                                  hypre_ParVector *par_x,
1028                                  HYPRE_Real relax_wt,
1029                                  hypre_Vector *aux_vector, HYPRE_Int *pivots,
1030                                  HYPRE_Int use_nonsymm)
1031 {
1032    HYPRE_Int ierr = 0;
1033    /* HYPRE_Int num_dofs; */
1034    HYPRE_Int *i_dof_dof;
1035    HYPRE_Int *j_dof_dof;
1036    HYPRE_Real *a_dof_dof;
1037    HYPRE_Real *x;
1038    hypre_Vector *rhs;
1039    HYPRE_Real *aux;
1040    hypre_CSRMatrix *A;
1041    hypre_Vector *x_vector;
1042    MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
1043    HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
1044    HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
1045    HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
1046    HYPRE_Real *domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
1047 
1048 
1049    HYPRE_Int piv_counter = 0;
1050    HYPRE_Int one = 1;
1051    char uplo = 'L';
1052    HYPRE_Int jj,i,j,k; /*, j_loc, k_loc;*/
1053 
1054 
1055    HYPRE_Int matrix_size, matrix_size_counter = 0;
1056 
1057    HYPRE_Int num_procs;
1058 
1059    hypre_MPI_Comm_size(comm,&num_procs);
1060 
1061    /* initiate:      ----------------------------------------------- */
1062    /* num_dofs = hypre_CSRMatrixNumRows(A); */
1063    x_vector = hypre_ParVectorLocalVector(par_x);
1064    A = hypre_ParCSRMatrixDiag(par_A);
1065    i_dof_dof = hypre_CSRMatrixI(A);
1066    j_dof_dof = hypre_CSRMatrixJ(A);
1067    a_dof_dof = hypre_CSRMatrixData(A);
1068    x = hypre_VectorData(x_vector);
1069    aux = hypre_VectorData(aux_vector);
1070    /* for (i=0; i < num_dofs; i++)
1071       x[i] = 0.e0; */
1072 
1073    if (num_procs > 1)
1074    {
1075       hypre_parCorrRes(par_A, par_x, rhs_vector, &rhs);
1076    }
1077    else
1078    {
1079       rhs = rhs_vector;
1080    }
1081 
1082    /* forward solve: ----------------------------------------------- */
1083 
1084    matrix_size_counter = 0;
1085    for (i=0; i < num_domains; i++)
1086    {
1087       matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
1088 
1089       /* compute residual: ---------------------------------------- */
1090 
1091       jj = 0;
1092       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1093       {
1094          aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
1095          for (k=i_dof_dof[j_domain_dof[j]];
1096               k<i_dof_dof[j_domain_dof[j]+1]; k++)
1097             aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
1098          jj++;
1099       }
1100       /* solve for correction: ------------------------------------- */
1101       if (use_nonsymm)
1102       {
1103          hypre_dgetrs(&uplo, &matrix_size, &one,
1104                       &domain_matrixinverse[matrix_size_counter],
1105                       &matrix_size, &pivots[piv_counter], aux,
1106                       &matrix_size, &ierr);
1107       }
1108       else
1109       {
1110          hypre_dpotrs(&uplo, &matrix_size, &one,
1111                       &domain_matrixinverse[matrix_size_counter],
1112                       &matrix_size, aux,
1113                       &matrix_size, &ierr);
1114       }
1115 
1116 
1117       if (ierr)   hypre_error(HYPRE_ERROR_GENERIC);
1118       jj = 0;
1119       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1120       {
1121          x[j_domain_dof[j]]+=  relax_wt * aux[jj++];
1122       }
1123       matrix_size_counter += matrix_size * matrix_size;
1124       piv_counter += matrix_size;
1125    }
1126 
1127    if (num_procs > 1)
1128    {
1129       hypre_SeqVectorDestroy(rhs);
1130    }
1131 
1132    return hypre_error_flag;
1133 }
1134 
hypre_MPSchwarzCFFWSolve(hypre_ParCSRMatrix * par_A,hypre_Vector * rhs_vector,hypre_CSRMatrix * domain_structure,hypre_ParVector * par_x,HYPRE_Real relax_wt,hypre_Vector * aux_vector,HYPRE_Int * CF_marker,HYPRE_Int rlx_pt,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)1135 HYPRE_Int hypre_MPSchwarzCFFWSolve(hypre_ParCSRMatrix *par_A,
1136                                    hypre_Vector *rhs_vector,
1137                                    hypre_CSRMatrix *domain_structure,
1138                                    hypre_ParVector *par_x,
1139                                    HYPRE_Real relax_wt,
1140                                    hypre_Vector *aux_vector,
1141                                    HYPRE_Int *CF_marker,
1142                                    HYPRE_Int rlx_pt, HYPRE_Int *pivots,
1143                                    HYPRE_Int use_nonsymm)
1144 {
1145    HYPRE_Int ierr = 0;
1146    /* HYPRE_Int num_dofs; */
1147    HYPRE_Int *i_dof_dof;
1148    HYPRE_Int *j_dof_dof;
1149    HYPRE_Real *a_dof_dof;
1150    HYPRE_Real *x;
1151    hypre_Vector *rhs;
1152    HYPRE_Real *aux;
1153    hypre_CSRMatrix *A;
1154    hypre_Vector *x_vector;
1155    MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
1156    HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
1157    HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
1158    HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
1159    HYPRE_Real *domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
1160 
1161    HYPRE_Int piv_counter = 0;
1162    HYPRE_Int one = 1;
1163 
1164    char uplo = 'L';
1165    HYPRE_Int jj,i,j,k; /*, j_loc, k_loc;*/
1166 
1167 
1168    HYPRE_Int matrix_size, matrix_size_counter = 0;
1169 
1170    HYPRE_Int num_procs;
1171 
1172    hypre_MPI_Comm_size(comm,&num_procs);
1173 
1174    /* initiate:      ----------------------------------------------- */
1175    /* num_dofs = hypre_CSRMatrixNumRows(A); */
1176    x_vector = hypre_ParVectorLocalVector(par_x);
1177    A = hypre_ParCSRMatrixDiag(par_A);
1178    i_dof_dof = hypre_CSRMatrixI(A);
1179    j_dof_dof = hypre_CSRMatrixJ(A);
1180    a_dof_dof = hypre_CSRMatrixData(A);
1181    x = hypre_VectorData(x_vector);
1182    aux = hypre_VectorData(aux_vector);
1183    /* for (i=0; i < num_dofs; i++)
1184       x[i] = 0.e0; */
1185 
1186    if (use_nonsymm)
1187       uplo = 'N';
1188 
1189    if (num_procs > 1)
1190    {
1191       hypre_parCorrRes(par_A, par_x, rhs_vector, &rhs);
1192    }
1193    else
1194    {
1195       rhs = rhs_vector;
1196    }
1197 
1198    /* forward solve: ----------------------------------------------- */
1199 
1200    matrix_size_counter = 0;
1201    for (i=0; i < num_domains; i++)
1202    {
1203       if (CF_marker[i] == rlx_pt)
1204       {
1205          matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
1206 
1207          /* compute residual: ---------------------------------------- */
1208 
1209          jj = 0;
1210          for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1211          {
1212             aux[jj] = hypre_VectorData(rhs)[j_domain_dof[j]];
1213             if (CF_marker[j_domain_dof[j]] == rlx_pt)
1214             {
1215                for (k=i_dof_dof[j_domain_dof[j]];
1216                     k<i_dof_dof[j_domain_dof[j]+1]; k++)
1217                   aux[jj] -= a_dof_dof[k] * x[j_dof_dof[k]];
1218             }
1219             jj++;
1220          }
1221          /* solve for correction: ------------------------------------- */
1222          if (use_nonsymm)
1223          {
1224             hypre_dgetrs(&uplo, &matrix_size, &one,
1225                          &domain_matrixinverse[matrix_size_counter],
1226                          &matrix_size, &pivots[piv_counter], aux,
1227                          &matrix_size, &ierr);
1228          }
1229 
1230          else
1231          {
1232             hypre_dpotrs(&uplo, &matrix_size, &one,
1233                          &domain_matrixinverse[matrix_size_counter],
1234                          &matrix_size, aux,
1235                          &matrix_size, &ierr);
1236          }
1237 
1238          if (ierr)  hypre_error(HYPRE_ERROR_GENERIC);
1239          jj = 0;
1240          for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1241          {
1242             x[j_domain_dof[j]]+=  relax_wt * aux[jj++];
1243          }
1244          matrix_size_counter += matrix_size * matrix_size;
1245          piv_counter += matrix_size;
1246       }
1247    }
1248 
1249    if (num_procs > 1)
1250    {
1251       hypre_SeqVectorDestroy(rhs);
1252    }
1253 
1254    return hypre_error_flag;
1255 }
1256 
1257 HYPRE_Int
transpose_matrix_create(HYPRE_Int ** i_face_element_pointer,HYPRE_Int ** j_face_element_pointer,HYPRE_Int * i_element_face,HYPRE_Int * j_element_face,HYPRE_Int num_elements,HYPRE_Int num_faces)1258 transpose_matrix_create(  HYPRE_Int **i_face_element_pointer,
1259                           HYPRE_Int **j_face_element_pointer,
1260                           HYPRE_Int *i_element_face, HYPRE_Int *j_element_face,
1261                           HYPRE_Int num_elements, HYPRE_Int num_faces)
1262 {
1263    /* FILE *f; */
1264    HYPRE_Int i, j;
1265 
1266    HYPRE_Int *i_face_element, *j_face_element;
1267 
1268    /* ======================================================================
1269       first create face_element graph: -------------------------------------
1270       ====================================================================== */
1271 
1272    i_face_element = hypre_TAlloc(HYPRE_Int, (num_faces+1) , HYPRE_MEMORY_HOST);
1273    j_face_element = hypre_TAlloc(HYPRE_Int, i_element_face[num_elements] , HYPRE_MEMORY_HOST);
1274 
1275 
1276    for (i=0; i < num_faces; i++)
1277       i_face_element[i] = 0;
1278 
1279    for (i=0; i < num_elements; i++)
1280       for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
1281          i_face_element[j_element_face[j]]++;
1282 
1283    i_face_element[num_faces] = i_element_face[num_elements];
1284 
1285    for (i=num_faces-1; i > -1; i--)
1286       i_face_element[i] = i_face_element[i+1] - i_face_element[i];
1287 
1288    for (i=0; i < num_elements; i++)
1289       for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
1290       {
1291          j_face_element[i_face_element[j_element_face[j]]] = i;
1292          i_face_element[j_element_face[j]]++;
1293       }
1294 
1295    for (i=num_faces-1; i > -1; i--)
1296       i_face_element[i+1] = i_face_element[i];
1297 
1298    i_face_element[0] = 0;
1299 
1300    /* hypre_printf("end building face--element graph: ++++++++++++++++++\n"); */
1301 
1302    /* END building face_element graph; ================================ */
1303 
1304    *i_face_element_pointer = i_face_element;
1305    *j_face_element_pointer = j_face_element;
1306 
1307    return 0;
1308 
1309 }
1310 HYPRE_Int
matrix_matrix_product(HYPRE_Int ** i_element_edge_pointer,HYPRE_Int ** j_element_edge_pointer,HYPRE_Int * i_element_face,HYPRE_Int * j_element_face,HYPRE_Int * i_face_edge,HYPRE_Int * j_face_edge,HYPRE_Int num_elements,HYPRE_Int num_faces,HYPRE_Int num_edges)1311 matrix_matrix_product(    HYPRE_Int **i_element_edge_pointer,
1312                           HYPRE_Int **j_element_edge_pointer,
1313                           HYPRE_Int *i_element_face, HYPRE_Int *j_element_face,
1314                           HYPRE_Int *i_face_edge, HYPRE_Int *j_face_edge,
1315                           HYPRE_Int num_elements, HYPRE_Int num_faces, HYPRE_Int num_edges)
1316 {
1317    /* FILE *f; */
1318    HYPRE_Int i, j, k, l, m;
1319 
1320    HYPRE_Int i_edge_on_local_list, i_edge_on_list;
1321    HYPRE_Int local_element_edge_counter = 0, element_edge_counter = 0;
1322    HYPRE_Int *j_local_element_edge;
1323 
1324    HYPRE_Int *i_element_edge, *j_element_edge;
1325 
1326    j_local_element_edge = hypre_TAlloc(HYPRE_Int, (num_edges+1) , HYPRE_MEMORY_HOST);
1327 
1328    i_element_edge = hypre_TAlloc(HYPRE_Int, (num_elements+1) , HYPRE_MEMORY_HOST);
1329 
1330    for (i=0; i < num_elements+1; i++)
1331       i_element_edge[i] = 0;
1332 
1333    for (i=0; i < num_elements; i++)
1334    {
1335       local_element_edge_counter = 0;
1336       for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
1337       {
1338          k = j_element_face[j];
1339 
1340          for (l=i_face_edge[k]; l < i_face_edge[k+1]; l++)
1341          {
1342             /* element i  and edge j_face_edge[l] are connected */
1343 
1344             /* hypre_printf("element %d  contains edge %d;\n",
1345                i, j_face_edge[l]);  */
1346 
1347             i_edge_on_local_list = -1;
1348             for (m=0; m < local_element_edge_counter; m++)
1349                if (j_local_element_edge[m] == j_face_edge[l])
1350                {
1351                   i_edge_on_local_list++;
1352                   break;
1353                }
1354 
1355             if (i_edge_on_local_list == -1)
1356             {
1357                i_element_edge[i]++;
1358                j_local_element_edge[local_element_edge_counter]=
1359                   j_face_edge[l];
1360                local_element_edge_counter++;
1361             }
1362          }
1363       }
1364    }
1365 
1366    hypre_TFree(j_local_element_edge, HYPRE_MEMORY_HOST);
1367 
1368    for (i=0; i < num_elements; i++)
1369       i_element_edge[i+1] += i_element_edge[i];
1370 
1371    for (i=num_elements; i>0; i--)
1372       i_element_edge[i] = i_element_edge[i-1];
1373 
1374    i_element_edge[0] = 0;
1375 
1376    j_element_edge = hypre_TAlloc(HYPRE_Int, i_element_edge[num_elements]
1377                                          , HYPRE_MEMORY_HOST);
1378 
1379    /* fill--in the actual j_element_edge array: --------------------- */
1380 
1381    element_edge_counter = 0;
1382    for (i=0; i < num_elements; i++)
1383    {
1384       i_element_edge[i] = element_edge_counter;
1385       for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
1386       {
1387          for (k=i_face_edge[j_element_face[j]];
1388               k<i_face_edge[j_element_face[j]+1];k++)
1389          {
1390             /* check if edge j_face_edge[k] is already on list ***/
1391 
1392             i_edge_on_list = -1;
1393             for (l=i_element_edge[i];
1394                  l<element_edge_counter; l++)
1395                if (j_element_edge[l] == j_face_edge[k])
1396                {
1397                   i_edge_on_list++;
1398                   break;
1399                }
1400 
1401             if (i_edge_on_list == -1)
1402             {
1403                if (element_edge_counter >=
1404                    i_element_edge[num_elements])
1405                {
1406                   hypre_error_w_msg(HYPRE_ERROR_GENERIC,"error in j_element_edge size: \n");
1407                   break;
1408                }
1409 
1410                j_element_edge[element_edge_counter] =
1411                   j_face_edge[k];
1412                element_edge_counter++;
1413             }
1414          }
1415       }
1416 
1417    }
1418 
1419    i_element_edge[num_elements] = element_edge_counter;
1420 
1421    /*------------------------------------------------------------------
1422      f = fopen("element_edge", "w");
1423      for (i=0; i < num_elements; i++)
1424      {
1425      hypre_printf("\nelement: %d has edges:\n", i);
1426      for (j=i_element_edge[i]; j < i_element_edge[i+1]; j++)
1427      {
1428      hypre_printf("%d ", j_element_edge[j]);
1429      hypre_fprintf(f, "%d %d\n", i, j_element_edge[j]);
1430      }
1431 
1432      hypre_printf("\n");
1433      }
1434 
1435      fclose(f);
1436    */
1437 
1438    /* hypre_printf("end element_edge computation: ++++++++++++++++++++++++ \n");*/
1439 
1440    *i_element_edge_pointer = i_element_edge;
1441    *j_element_edge_pointer = j_element_edge;
1442 
1443    return hypre_error_flag;
1444 }
1445 
1446 /*--------------------------------------------------------------------------
1447  * hypre_AMGCreateDomainDof:
1448  *--------------------------------------------------------------------------*/
1449 
1450 /*****************************************************************************
1451  *
1452  * Routine for constructing graph domain_dof with minimal overlap
1453  *             and computing the respective matrix inverses to be
1454  *             used in an overlapping Schwarz procedure (like smoother
1455  *             in AMG);
1456  *
1457  *****************************************************************************/
1458 HYPRE_Int
hypre_AMGCreateDomainDof(hypre_CSRMatrix * A,HYPRE_Int domain_type,HYPRE_Int overlap,HYPRE_Int num_functions,HYPRE_Int * dof_func,hypre_CSRMatrix ** domain_structure_pointer,HYPRE_Int ** piv_pointer,HYPRE_Int use_nonsymm)1459 hypre_AMGCreateDomainDof(hypre_CSRMatrix     *A,
1460                          HYPRE_Int domain_type, HYPRE_Int overlap,
1461                          HYPRE_Int num_functions, HYPRE_Int *dof_func,
1462                          hypre_CSRMatrix    **domain_structure_pointer,
1463                          HYPRE_Int **piv_pointer, HYPRE_Int use_nonsymm)
1464 {
1465 
1466    HYPRE_Int *i_domain_dof, *j_domain_dof;
1467    HYPRE_Real *domain_matrixinverse;
1468    HYPRE_Int num_domains;
1469    hypre_CSRMatrix *domain_structure = NULL;
1470 
1471    HYPRE_Int *i_dof_dof = hypre_CSRMatrixI(A);
1472    HYPRE_Int *j_dof_dof = hypre_CSRMatrixJ(A);
1473    HYPRE_Real *a_dof_dof = hypre_CSRMatrixData(A);
1474    HYPRE_Int num_dofs = hypre_CSRMatrixNumRows(A);
1475 
1476    /* HYPRE_Int *i_dof_to_accept_weight; */
1477    HYPRE_Int *i_dof_to_prefer_weight,
1478       *w_dof_dof, *i_dof_weight;
1479    HYPRE_Int *i_dof_to_aggregate, *i_aggregate_dof, *j_aggregate_dof;
1480 
1481    HYPRE_Int *i_dof_index;
1482 
1483    HYPRE_Int ierr = 0;
1484    HYPRE_Int i,j,k,  l_loc, i_loc, j_loc;
1485    HYPRE_Int i_dof;
1486    HYPRE_Int *i_local_to_global;
1487    HYPRE_Int *i_global_to_local;
1488 
1489    HYPRE_Int local_dof_counter, max_local_dof_counter=0;
1490 
1491    HYPRE_Int domain_dof_counter = 0, domain_matrixinverse_counter = 0;
1492    HYPRE_Int nf;
1493 
1494    HYPRE_Real *AE;
1495 
1496    HYPRE_Int piv_counter = 0;
1497    HYPRE_Int *ipiv;
1498    HYPRE_Int *piv = NULL;
1499    char uplo = 'L';
1500    HYPRE_Int cnt;
1501 
1502    /* --------------------------------------------------------------------- */
1503 
1504    /*=======================================================================*/
1505    /*    create artificial domains by agglomeration;                        */
1506    /*=======================================================================*/
1507 
1508    /*hypre_printf("----------- create artificials domain by agglomeration;  ======\n");
1509     */
1510 
1511    if (num_dofs == 0)
1512    {
1513       *domain_structure_pointer = domain_structure;
1514 
1515       *piv_pointer = piv;
1516 
1517       return hypre_error_flag;
1518    }
1519 
1520    i_aggregate_dof = hypre_CTAlloc(HYPRE_Int, num_dofs+1, HYPRE_MEMORY_HOST);
1521    j_aggregate_dof= hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
1522 
1523    if (domain_type == 2)
1524    {
1525       i_dof_to_prefer_weight = hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
1526       w_dof_dof = hypre_CTAlloc(HYPRE_Int, i_dof_dof[num_dofs], HYPRE_MEMORY_HOST);
1527       i_dof_weight = hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
1528 
1529       for (i=0; i<num_dofs; i++)
1530          for (j=i_dof_dof[i]; j< i_dof_dof[i+1]; j++)
1531          {
1532             if (j_dof_dof[j] == i)
1533                w_dof_dof[j]=0;
1534             else
1535                w_dof_dof[j]=1;
1536          }
1537 
1538       /*hypre_printf("end computing weights for agglomeration procedure: --------\n");
1539        */
1540       hypre_AMGeAgglomerate(i_aggregate_dof, j_aggregate_dof,
1541                             i_dof_dof, j_dof_dof, w_dof_dof,
1542                             i_dof_dof, j_dof_dof,
1543                             i_dof_dof, j_dof_dof,
1544                             i_dof_to_prefer_weight,
1545                             i_dof_weight,
1546                             num_dofs, num_dofs,
1547                             &num_domains);
1548 
1549       hypre_TFree(i_dof_to_prefer_weight, HYPRE_MEMORY_HOST);
1550       hypre_TFree(i_dof_weight, HYPRE_MEMORY_HOST);
1551       hypre_TFree(w_dof_dof, HYPRE_MEMORY_HOST);
1552    }
1553    else
1554    {
1555       nf = 1;
1556       if (domain_type == 1) nf = num_functions;
1557 
1558       num_domains = num_dofs/nf;
1559       for (i=0; i < num_domains+1; i++)
1560       {
1561          i_aggregate_dof[i] = nf*i;
1562       }
1563       for (i=0; i < num_dofs; i++)
1564       {
1565          j_aggregate_dof[i] = i;
1566       }
1567    }
1568    /*hypre_printf("num_dofs: %d, num_domains: %d\n", num_dofs, num_domains);*/
1569 
1570 
1571    /*
1572      hypre_printf("========================================================\n");
1573      hypre_printf("== artificial non--overlapping domains (aggregates): ===\n");
1574      hypre_printf("========================================================\n");
1575 
1576 
1577      for (i=0; i < num_domains; i++)
1578      {
1579      hypre_printf("\n aggregate %d:\n", i);
1580      for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1581      hypre_printf("%d, ", j_aggregate_dof[j]);
1582 
1583      hypre_printf("\n");
1584      }
1585    */
1586 
1587    /* make domains from aggregates: *********************************/
1588 
1589    if (overlap == 1)
1590    {
1591       i_domain_dof = hypre_CTAlloc(HYPRE_Int,  num_domains+1, HYPRE_MEMORY_HOST);
1592 
1593       i_dof_index = hypre_CTAlloc(HYPRE_Int,  num_dofs, HYPRE_MEMORY_HOST);
1594 
1595       for (i=0; i < num_dofs; i++)
1596          i_dof_index[i] = -1;
1597 
1598       i_dof_to_aggregate = hypre_CTAlloc(HYPRE_Int,  num_dofs, HYPRE_MEMORY_HOST);
1599       for (i=0; i < num_domains; i++)
1600          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1601             i_dof_to_aggregate[j_aggregate_dof[j]] = i;
1602 
1603       domain_dof_counter=0;
1604       for (i=0; i < num_domains; i++)
1605       {
1606          i_domain_dof[i] =  domain_dof_counter;
1607          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1608             i_dof_index[j_aggregate_dof[j]]=-1;
1609 
1610          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1611             for (k=i_dof_dof[j_aggregate_dof[j]];
1612                  k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1613                if (i_dof_to_aggregate[j_dof_dof[k]] >= i
1614                    && i_dof_index[j_dof_dof[k]]==-1)
1615                {
1616                   i_dof_index[j_dof_dof[k]]++;
1617                   domain_dof_counter++;
1618                }
1619       }
1620 
1621       i_domain_dof[num_domains] =  domain_dof_counter;
1622       j_domain_dof = hypre_CTAlloc(HYPRE_Int, domain_dof_counter, HYPRE_MEMORY_HOST);
1623 
1624       for (i=0; i < num_dofs; i++)
1625          i_dof_index[i] = -1;
1626 
1627       domain_dof_counter=0;
1628       for (i=0; i < num_domains; i++)
1629       {
1630          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1631             i_dof_index[j_aggregate_dof[j]]=-1;
1632 
1633          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1634             for (k=i_dof_dof[j_aggregate_dof[j]];
1635                  k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1636                if (i_dof_to_aggregate[j_dof_dof[k]] >= i
1637                    && i_dof_index[j_dof_dof[k]]==-1)
1638                {
1639                   i_dof_index[j_dof_dof[k]]++;
1640                   j_domain_dof[domain_dof_counter] = j_dof_dof[k];
1641                   domain_dof_counter++;
1642                }
1643       }
1644 
1645       hypre_TFree(i_aggregate_dof, HYPRE_MEMORY_HOST);
1646       hypre_TFree(j_aggregate_dof, HYPRE_MEMORY_HOST);
1647       hypre_TFree(i_dof_to_aggregate, HYPRE_MEMORY_HOST);
1648       hypre_TFree(i_dof_index, HYPRE_MEMORY_HOST);
1649    }
1650    else if (overlap == 2)
1651    {
1652       i_domain_dof = hypre_CTAlloc(HYPRE_Int,  num_domains+1, HYPRE_MEMORY_HOST);
1653 
1654       i_dof_index = hypre_CTAlloc(HYPRE_Int,  num_dofs, HYPRE_MEMORY_HOST);
1655 
1656       for (i=0; i < num_dofs; i++)
1657          i_dof_index[i] = -1;
1658 
1659       domain_dof_counter=0;
1660       for (i=0; i < num_domains; i++)
1661       {
1662          i_domain_dof[i] =  domain_dof_counter;
1663          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1664             for (k=i_dof_dof[j_aggregate_dof[j]];
1665                  k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1666                if (i_dof_index[j_dof_dof[k]]==-1)
1667                {
1668                   i_dof_index[j_dof_dof[k]]++;
1669                   domain_dof_counter++;
1670                }
1671 
1672          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1673             for (k=i_dof_dof[j_aggregate_dof[j]];
1674                  k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1675                i_dof_index[j_dof_dof[k]]=-1;
1676 
1677       }
1678 
1679       for (i=0; i < num_dofs; i++)
1680          i_dof_index[i] = -1;
1681 
1682       i_domain_dof[num_domains] =  domain_dof_counter;
1683       j_domain_dof = hypre_CTAlloc(HYPRE_Int, domain_dof_counter, HYPRE_MEMORY_HOST);
1684 
1685       domain_dof_counter=0;
1686       for (i=0; i < num_domains; i++)
1687       {
1688          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1689             for (k=i_dof_dof[j_aggregate_dof[j]];
1690                  k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1691                if ( i_dof_index[j_dof_dof[k]]==-1)
1692                {
1693                   i_dof_index[j_dof_dof[k]]++;
1694                   j_domain_dof[domain_dof_counter] = j_dof_dof[k];
1695                   domain_dof_counter++;
1696                }
1697 
1698          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
1699             for (k=i_dof_dof[j_aggregate_dof[j]];
1700                  k<i_dof_dof[j_aggregate_dof[j]+1]; k++)
1701                i_dof_index[j_dof_dof[k]]=-1;
1702       }
1703 
1704       hypre_TFree(i_aggregate_dof, HYPRE_MEMORY_HOST);
1705       hypre_TFree(j_aggregate_dof, HYPRE_MEMORY_HOST);
1706       hypre_TFree(i_dof_index, HYPRE_MEMORY_HOST);
1707    }
1708    else
1709    {
1710       i_domain_dof = i_aggregate_dof;
1711       j_domain_dof = j_aggregate_dof;
1712    }
1713 
1714    /*hypre_printf("END domain_dof computations: =================================\n");
1715     */
1716    domain_matrixinverse_counter = 0;
1717    local_dof_counter = 0;
1718    piv_counter = 0;
1719 
1720    for (i=0; i < num_domains; i++)
1721    {
1722       local_dof_counter = i_domain_dof[i+1]-i_domain_dof[i];
1723       domain_matrixinverse_counter+= local_dof_counter * local_dof_counter;
1724       piv_counter += local_dof_counter;
1725 
1726       if (local_dof_counter > max_local_dof_counter)
1727          max_local_dof_counter = local_dof_counter;
1728    }
1729 
1730    domain_matrixinverse = hypre_CTAlloc(HYPRE_Real,  domain_matrixinverse_counter, HYPRE_MEMORY_HOST);
1731    if (use_nonsymm)
1732       piv = hypre_CTAlloc(HYPRE_Int,  piv_counter, HYPRE_MEMORY_HOST);
1733 
1734    i_local_to_global = hypre_CTAlloc(HYPRE_Int,  max_local_dof_counter, HYPRE_MEMORY_HOST);
1735 
1736    i_global_to_local = hypre_CTAlloc(HYPRE_Int, num_dofs, HYPRE_MEMORY_HOST);
1737 
1738    for (i=0; i < num_dofs; i++)
1739       i_global_to_local[i] = -1;
1740 
1741    piv_counter = 0;
1742    domain_matrixinverse_counter = 0;
1743    for (i=0; i < num_domains; i++)
1744    {
1745       local_dof_counter = 0;
1746       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
1747       {
1748          i_global_to_local[j_domain_dof[j]] = local_dof_counter;
1749          i_local_to_global[local_dof_counter] = j_domain_dof[j];
1750          local_dof_counter++;
1751       }
1752 
1753       /* get local matrix in AE: ======================================== */
1754       cnt = 0;
1755 
1756       AE = &domain_matrixinverse[domain_matrixinverse_counter];
1757       ipiv = &piv[piv_counter];
1758       for (i_loc=0; i_loc < local_dof_counter; i_loc++)
1759          for (j_loc=0; j_loc < local_dof_counter; j_loc++)
1760             AE[cnt++] = 0.e0;
1761 
1762       for (i_loc=0; i_loc < local_dof_counter; i_loc++)
1763       {
1764          i_dof = i_local_to_global[i_loc];
1765          for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++)
1766          {
1767             j_loc = i_global_to_local[j_dof_dof[j]];
1768             if (j_loc >=0)
1769                AE[i_loc + j_loc * local_dof_counter] = a_dof_dof[j];
1770          }
1771       }
1772 
1773       if (use_nonsymm)
1774       {
1775          hypre_dgetrf(&local_dof_counter,
1776                       &local_dof_counter, AE,
1777                       &local_dof_counter, ipiv, &ierr);
1778          piv_counter +=local_dof_counter;
1779       }
1780 
1781       else
1782       {
1783          hypre_dpotrf(&uplo,&local_dof_counter, AE,
1784                       &local_dof_counter, &ierr);
1785       }
1786 
1787       domain_matrixinverse_counter+=local_dof_counter*local_dof_counter;
1788 
1789       for (l_loc=0; l_loc < local_dof_counter; l_loc++)
1790          i_global_to_local[i_local_to_global[l_loc]] = -1;
1791 
1792    }
1793 
1794    hypre_TFree(i_local_to_global, HYPRE_MEMORY_HOST);
1795    hypre_TFree(i_global_to_local, HYPRE_MEMORY_HOST);
1796 
1797    domain_structure = hypre_CSRMatrixCreate(num_domains, max_local_dof_counter,
1798                                             i_domain_dof[num_domains]);
1799 
1800    hypre_CSRMatrixMemoryLocation(domain_structure) = HYPRE_MEMORY_HOST;
1801 
1802    hypre_CSRMatrixI(domain_structure) = i_domain_dof;
1803    hypre_CSRMatrixJ(domain_structure) = j_domain_dof;
1804    hypre_CSRMatrixData(domain_structure) = domain_matrixinverse;
1805 
1806    *domain_structure_pointer = domain_structure;
1807 
1808    *piv_pointer = piv;
1809 
1810    return hypre_error_flag;
1811 }
1812 
1813 /* unacceptable faces: i_face_to_prefer_weight[] = -1; ------------------*/
1814 
hypre_AMGeAgglomerate(HYPRE_Int * i_AE_element,HYPRE_Int * j_AE_element,HYPRE_Int * i_face_face,HYPRE_Int * j_face_face,HYPRE_Int * w_face_face,HYPRE_Int * i_face_element,HYPRE_Int * j_face_element,HYPRE_Int * i_element_face,HYPRE_Int * j_element_face,HYPRE_Int * i_face_to_prefer_weight,HYPRE_Int * i_face_weight,HYPRE_Int num_faces,HYPRE_Int num_elements,HYPRE_Int * num_AEs_pointer)1815 HYPRE_Int hypre_AMGeAgglomerate(HYPRE_Int *i_AE_element, HYPRE_Int *j_AE_element,
1816                                 HYPRE_Int *i_face_face, HYPRE_Int *j_face_face, HYPRE_Int *w_face_face,
1817                                 HYPRE_Int *i_face_element, HYPRE_Int *j_face_element,
1818                                 HYPRE_Int *i_element_face, HYPRE_Int *j_element_face,
1819                                 HYPRE_Int *i_face_to_prefer_weight,
1820                                 HYPRE_Int *i_face_weight,
1821                                 HYPRE_Int num_faces, HYPRE_Int num_elements,
1822                                 HYPRE_Int *num_AEs_pointer)
1823 {
1824    HYPRE_Int ierr = 0;
1825    HYPRE_Int i, j, k, l;
1826 
1827    HYPRE_Int face_to_eliminate;
1828    HYPRE_Int max_weight_old, max_weight;
1829 
1830    HYPRE_Int AE_counter=0, AE_element_counter=0;
1831 
1832    /* HYPRE_Int i_element_face_counter; */
1833 
1834    HYPRE_Int *i_element_to_AE;
1835 
1836    HYPRE_Int *previous, *next, *first;
1837    HYPRE_Int head, tail, last;
1838 
1839    HYPRE_Int face_max_weight, face_local_max_weight, preferred_weight;
1840 
1841    HYPRE_Int weight, weight_max;
1842 
1843    max_weight = 1;
1844    for (i=0; i < num_faces; i++)
1845    {
1846       weight = 1;
1847       for (j=i_face_face[i]; j < i_face_face[i+1]; j++)
1848          weight+= w_face_face[j];
1849       if (max_weight < weight) max_weight = weight;
1850    }
1851 
1852    first = hypre_CTAlloc(HYPRE_Int,  max_weight+1, HYPRE_MEMORY_HOST);
1853 
1854    next = hypre_CTAlloc(HYPRE_Int,  num_faces, HYPRE_MEMORY_HOST);
1855 
1856    previous = hypre_CTAlloc(HYPRE_Int,  num_faces+1, HYPRE_MEMORY_HOST);
1857 
1858    tail = num_faces;
1859    head = -1;
1860 
1861    for (i=0; i < num_faces; i++)
1862    {
1863       next[i] = i+1;
1864       previous[i] = i-1;
1865    }
1866 
1867    last = num_faces-1;
1868    previous[tail] = last;
1869 
1870    for (weight=1; weight <= max_weight; weight++)
1871       first[weight] = tail;
1872 
1873    i_element_to_AE = hypre_CTAlloc(HYPRE_Int,  num_elements, HYPRE_MEMORY_HOST);
1874 
1875    /*=======================================================================
1876      AGGLOMERATION PROCEDURE:
1877      ======================================================================= */
1878 
1879    for (k=0; k < num_elements; k++)
1880       i_element_to_AE[k] = -1;
1881 
1882    for (k=0; k < num_faces; k++)
1883       i_face_weight[k] = 1;
1884 
1885    first[0] = 0;
1886    first[1] = 0;
1887 
1888    last = previous[tail];
1889    weight_max = i_face_weight[last];
1890 
1891    k = last;
1892    face_max_weight = -1;
1893    while (k!= head)
1894    {
1895       if (i_face_to_prefer_weight[k] > -1)
1896          face_max_weight = k;
1897 
1898       if (face_max_weight > -1) break;
1899 
1900       k=previous[k];
1901    }
1902 
1903    /* this will be used if the faces have been sorted: *****************
1904       k = last;
1905       face_max_weight = -1;
1906       while (k != head)
1907       {
1908       if (i_face_to_prefer_weight[k] > -1)
1909       face_max_weight = k;
1910 
1911 
1912       if (face_max_weight > -1)
1913       {
1914       max_weight = i_face_weight[face_max_weight];
1915       l = face_max_weight;
1916 
1917       while (previous[l] != head)
1918       {
1919 
1920       if (i_face_weight[previous[l]] < max_weight)
1921       break;
1922       else
1923       if (i_face_to_prefer_weight[previous[l]] >
1924       i_face_to_prefer_weight[face_max_weight])
1925       {
1926       l = previous[l];
1927       face_max_weight = l;
1928       }
1929       else
1930       l = previous[l];
1931       }
1932 
1933       break;
1934       }
1935 
1936       l =previous[k];
1937 
1938       weight = i_face_weight[k];
1939       last = previous[tail];
1940       if (last == head)
1941       weight_max = 0;
1942       else
1943       weight_max = i_face_weight[last];
1944 
1945       ierr = hypre_remove_entry(weight, &weight_max,
1946       previous, next, first, &last,
1947       head, tail,
1948       k);
1949 
1950       k=l;
1951       }
1952    */
1953 
1954    if (face_max_weight == -1)
1955    {
1956       hypre_error_w_msg(HYPRE_ERROR_GENERIC,"all faces are unacceptable, i.e., no faces to eliminate !\n");
1957 
1958       *num_AEs_pointer = 1;
1959 
1960       i_AE_element[0] = 0;
1961       for (i=0; i < num_elements; i++)
1962       {
1963          i_element_to_AE[i] = 0;
1964          j_AE_element[i] = i;
1965       }
1966 
1967       i_AE_element[1] = num_elements;
1968 
1969       return hypre_error_flag;
1970    }
1971 
1972    for (k=0; k < num_faces; k++)
1973       if (i_face_to_prefer_weight[k] > i_face_to_prefer_weight[face_max_weight])
1974          face_max_weight = k;
1975 
1976    max_weight = i_face_weight[face_max_weight];
1977 
1978    AE_counter=0;
1979    AE_element_counter=0;
1980 
1981    i_AE_element[AE_counter] = AE_element_counter;
1982 
1983    max_weight_old = -1;
1984 
1985    face_local_max_weight = face_max_weight;
1986 
1987   eliminate_face:
1988 
1989    face_to_eliminate = face_local_max_weight;
1990 
1991    max_weight = i_face_weight[face_to_eliminate];
1992 
1993    last = previous[tail];
1994    if (last == head)
1995       weight_max = 0;
1996    else
1997       weight_max = i_face_weight[last];
1998 
1999    ierr = hypre_remove_entry(max_weight, &weight_max,
2000                              previous, next, first, &last,
2001                              head, tail,
2002                              face_to_eliminate);
2003 
2004    i_face_weight[face_to_eliminate] = 0;
2005 
2006    /*----------------------------------------------------------
2007     *  agglomeration step:
2008     *
2009     *  put on AE_element -- list all elements
2010     *  that share face "face_to_eliminate";
2011     *----------------------------------------------------------*/
2012 
2013    for (k = i_face_element[face_to_eliminate];
2014         k < i_face_element[face_to_eliminate+1]; k++)
2015    {
2016       /* check if element j_face_element[k] is already on the list: */
2017 
2018       if (j_face_element[k] < num_elements)
2019       {
2020          if (i_element_to_AE[j_face_element[k]] == -1)
2021          {
2022             j_AE_element[AE_element_counter] = j_face_element[k];
2023             i_element_to_AE[j_face_element[k]] = AE_counter;
2024             AE_element_counter++;
2025          }
2026       }
2027    }
2028 
2029    /* local update & search:==================================== */
2030 
2031    for (j=i_face_face[face_to_eliminate];
2032         j<i_face_face[face_to_eliminate+1]; j++)
2033       if (i_face_weight[j_face_face[j]] > 0)
2034       {
2035          weight = i_face_weight[j_face_face[j]];
2036 
2037 
2038          last = previous[tail];
2039          if (last == head)
2040             weight_max = 0;
2041          else
2042             weight_max = i_face_weight[last];
2043 
2044          ierr = hypre_move_entry(weight, &weight_max,
2045                                  previous, next, first, &last,
2046                                  head, tail,
2047                                  j_face_face[j]);
2048 
2049          i_face_weight[j_face_face[j]]+=w_face_face[j];
2050 
2051          weight = i_face_weight[j_face_face[j]];
2052 
2053          /* hypre_printf("update entry: %d\n", j_face_face[j]);  */
2054 
2055          last = previous[tail];
2056          if (last == head)
2057             weight_max = 0;
2058          else
2059             weight_max = i_face_weight[last];
2060 
2061          ierr = hypre_update_entry(weight, &weight_max,
2062                                    previous, next, first, &last,
2063                                    head, tail,
2064                                    j_face_face[j]);
2065 
2066          last = previous[tail];
2067          if (last == head)
2068             weight_max = 0;
2069          else
2070             weight_max = i_face_weight[last];
2071       }
2072 
2073    /* find a face of the elements that have already been agglomerated
2074       with a maximal weight: ====================================== */
2075 
2076    max_weight_old = max_weight;
2077 
2078    face_local_max_weight = -1;
2079    preferred_weight = -1;
2080 
2081    for (l = i_AE_element[AE_counter];
2082         l < AE_element_counter; l++)
2083    {
2084       for (j=i_element_face[j_AE_element[l]];
2085            j<i_element_face[j_AE_element[l]+1]; j++)
2086       {
2087          i = j_element_face[j];
2088 
2089          if (max_weight_old > 1 && i_face_weight[i] > 0 &&
2090              i_face_to_prefer_weight[i] > -1)
2091          {
2092             if ( max_weight < i_face_weight[i])
2093             {
2094                face_local_max_weight = i;
2095                max_weight = i_face_weight[i];
2096                preferred_weight = i_face_to_prefer_weight[i];
2097             }
2098 
2099             if ( max_weight == i_face_weight[i]
2100                  && i_face_to_prefer_weight[i] > preferred_weight)
2101             {
2102                face_local_max_weight = i;
2103                preferred_weight = i_face_to_prefer_weight[i];
2104             }
2105          }
2106       }
2107    }
2108 
2109    if (face_local_max_weight > -1) goto eliminate_face;
2110 
2111    /* ----------------------------------------------------------------
2112     * eliminate and label with i_face_weight[ ] = -1
2113     * "boundary faces of agglomerated elements";
2114     * those faces will be preferred for the next coarse spaces
2115     * in case multiple coarse spaces are to be built;
2116     * ---------------------------------------------------------------*/
2117 
2118    for (k = i_AE_element[AE_counter]; k < AE_element_counter; k++)
2119    {
2120       for (j = i_element_face[j_AE_element[k]];
2121            j < i_element_face[j_AE_element[k]+1]; j++)
2122       {
2123          if (i_face_weight[j_element_face[j]] > 0)
2124          {
2125             weight = i_face_weight[j_element_face[j]];
2126             last = previous[tail];
2127             if (last == head)
2128                weight_max = 0;
2129             else
2130                weight_max = i_face_weight[last];
2131 
2132 
2133             ierr = hypre_remove_entry(weight, &weight_max,
2134                                       previous, next, first, &last,
2135                                       head, tail,
2136                                       j_element_face[j]);
2137 
2138             i_face_weight[j_element_face[j]] = -1;
2139 
2140          }
2141       }
2142    }
2143 
2144    if (AE_element_counter > i_AE_element[AE_counter])
2145    {
2146       /* hypre_printf("completing agglomerated element: %d\n",
2147          AE_counter);   */
2148       AE_counter++;
2149    }
2150 
2151    i_AE_element[AE_counter] = AE_element_counter;
2152 
2153    /* find a face with maximal weight: ---------------------------*/
2154 
2155    last = previous[tail];
2156    if (last == head) goto end_agglomerate;
2157 
2158    weight_max = i_face_weight[last];
2159 
2160    /* hypre_printf("global search: ======================================\n"); */
2161 
2162    face_max_weight = -1;
2163 
2164    k = last;
2165    while (k != head)
2166    {
2167       if (i_face_to_prefer_weight[k] > -1)
2168          face_max_weight = k;
2169 
2170       if (face_max_weight > -1)
2171       {
2172          max_weight = i_face_weight[face_max_weight];
2173          l = face_max_weight;
2174 
2175          while (previous[l] != head)
2176          {
2177 
2178             if (i_face_weight[previous[l]] < max_weight)
2179                break;
2180             else
2181                if (i_face_to_prefer_weight[previous[l]] >
2182                    i_face_to_prefer_weight[face_max_weight])
2183                {
2184                   l = previous[l];
2185                   face_max_weight = l;
2186                }
2187                else
2188                   l = previous[l];
2189          }
2190 
2191          break;
2192       }
2193 
2194       l =previous[k];
2195       /* remove face k: ---------------------------------------*/
2196 
2197       weight = i_face_weight[k];
2198       last = previous[tail];
2199       if (last == head)
2200          weight_max = 0;
2201       else
2202          weight_max = i_face_weight[last];
2203 
2204 
2205       ierr = hypre_remove_entry(weight, &weight_max,
2206                                 previous, next, first, &last,
2207                                 head, tail,
2208                                 k);
2209 
2210       /* i_face_weight[k] = -1; */
2211 
2212       k=l;
2213    }
2214 
2215    if (face_max_weight == -1) goto end_agglomerate;
2216 
2217    max_weight = i_face_weight[face_max_weight];
2218 
2219    face_local_max_weight = face_max_weight;
2220 
2221    goto eliminate_face;
2222 
2223   end_agglomerate:
2224 
2225    /* eliminate isolated elements: ----------------------------------*/
2226 
2227    for (i=0; i<num_elements; i++)
2228    {
2229 
2230       if (i_element_to_AE[i] == -1)
2231       {
2232          for (j=i_element_face[i]; j < i_element_face[i+1]
2233                  && i_element_to_AE[i] == -1; j++)
2234             if (i_face_to_prefer_weight[j_element_face[j]] > -1)
2235                for (k=i_face_element[j_element_face[j]];
2236                     k<i_face_element[j_element_face[j]+1]
2237                        && i_element_to_AE[i] == -1; k++)
2238                   if (i_element_to_AE[j_face_element[k]] != -1)
2239                      i_element_to_AE[i] = i_element_to_AE[j_face_element[k]];
2240       }
2241 
2242       /*
2243         if (i_element_to_AE[i] == -1)
2244         {
2245         i_element_face_counter = 0;
2246         for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
2247         if (i_face_to_prefer_weight[j_element_face[j]] > -1)
2248         i_element_face_counter++;
2249 
2250         if (i_element_face_counter == 1)
2251         {
2252         for (j=i_element_face[i]; j < i_element_face[i+1]; j++)
2253         if (i_face_to_prefer_weight[j_element_face[j]] > -1)
2254         for (k=i_face_element[j_element_face[j]];
2255         k<i_face_element[j_element_face[j]+1]; k++)
2256         if (i_element_to_AE[j_face_element[k]] != -1)
2257         i_element_to_AE[i] = i_element_to_AE[j_face_element[k]];
2258         }
2259         }
2260       */
2261 
2262       if (i_element_to_AE[i] == -1)
2263       {
2264          i_element_to_AE[i] = AE_counter;
2265          AE_counter++;
2266       }
2267    }
2268 
2269    num_AEs_pointer[0] = AE_counter;
2270 
2271    /* compute adjoint graph: -------------------------------------------*/
2272 
2273    for (i=0; i < AE_counter; i++)
2274       i_AE_element[i] = 0;
2275 
2276    for (i=0; i < num_elements; i++)
2277       i_AE_element[i_element_to_AE[i]]++;
2278 
2279    i_AE_element[AE_counter] = num_elements;
2280 
2281    for (i=AE_counter-1; i > -1; i--)
2282       i_AE_element[i] = i_AE_element[i+1] - i_AE_element[i];
2283 
2284    for (i=0; i < num_elements; i++)
2285    {
2286       j_AE_element[i_AE_element[i_element_to_AE[i]]] = i;
2287       i_AE_element[i_element_to_AE[i]]++;
2288    }
2289 
2290    for (i=AE_counter-1; i > -1; i--)
2291       i_AE_element[i+1] = i_AE_element[i];
2292 
2293    i_AE_element[0] = 0;
2294 
2295    /*--------------------------------------------------------------------*/
2296    for (i=0; i < num_faces; i++)
2297       if (i_face_to_prefer_weight[i] == -1) i_face_weight[i] = -1;
2298 
2299 
2300    hypre_TFree(i_element_to_AE, HYPRE_MEMORY_HOST);
2301 
2302    hypre_TFree(previous, HYPRE_MEMORY_HOST);
2303    hypre_TFree(next, HYPRE_MEMORY_HOST);
2304    hypre_TFree(first, HYPRE_MEMORY_HOST);
2305 
2306    return ierr;
2307 }
2308 
hypre_update_entry(HYPRE_Int weight,HYPRE_Int * weight_max,HYPRE_Int * previous,HYPRE_Int * next,HYPRE_Int * first,HYPRE_Int * last,HYPRE_Int head,HYPRE_Int tail,HYPRE_Int i)2309 HYPRE_Int hypre_update_entry(HYPRE_Int weight, HYPRE_Int *weight_max,
2310                              HYPRE_Int *previous, HYPRE_Int *next, HYPRE_Int *first, HYPRE_Int *last,
2311                              HYPRE_Int head, HYPRE_Int tail,
2312                              HYPRE_Int i)
2313 
2314 {
2315    HYPRE_Int weight0;
2316 
2317    if (previous[i] != head) next[previous[i]] = next[i];
2318    previous[next[i]] = previous[i];
2319 
2320    if (first[weight] == tail)
2321    {
2322       if (weight <= weight_max[0])
2323       {
2324          hypre_printf("ERROR IN UPDATE_ENTRY: ===================\n");
2325          hypre_printf("weight: %d, weight_max: %d\n",
2326                       weight, weight_max[0]);
2327          return -1;
2328       }
2329       for (weight0=weight_max[0]+1; weight0 <= weight; weight0++)
2330       {
2331          first[weight0] = i;
2332          /* hypre_printf("create first[%d] = %d\n", weight0, i); */
2333       }
2334 
2335       previous[i] = previous[tail];
2336       next[i] = tail;
2337       if (previous[tail] > head)
2338          next[previous[tail]] = i;
2339       previous[tail] = i;
2340 
2341    }
2342    else
2343       /* first[weight] already exists: =====================*/
2344    {
2345       previous[i] = previous[first[weight]];
2346       next[i] = first[weight];
2347 
2348       if (previous[first[weight]] != head)
2349          next[previous[first[weight]]] = i;
2350 
2351       previous[first[weight]] = i;
2352 
2353       for (weight0=1; weight0 <= weight; weight0++)
2354          if (first[weight0] == first[weight])
2355             first[weight0] = i;
2356 
2357    }
2358 
2359    return 0;
2360 }
2361 
hypre_remove_entry(HYPRE_Int weight,HYPRE_Int * weight_max,HYPRE_Int * previous,HYPRE_Int * next,HYPRE_Int * first,HYPRE_Int * last,HYPRE_Int head,HYPRE_Int tail,HYPRE_Int i)2362 HYPRE_Int hypre_remove_entry(HYPRE_Int weight, HYPRE_Int *weight_max,
2363                              HYPRE_Int *previous, HYPRE_Int *next, HYPRE_Int *first, HYPRE_Int *last,
2364                              HYPRE_Int head, HYPRE_Int tail,
2365                              HYPRE_Int i)
2366 {
2367    HYPRE_Int weight0;
2368 
2369    if (previous[i] != head) next[previous[i]] = next[i];
2370    previous[next[i]] = previous[i];
2371 
2372    for (weight0=1; weight0 <= weight_max[0]; weight0++)
2373    {
2374       /* hypre_printf("first[%d}: %d\n", weight0,  first[weight0]); */
2375       if (first[weight0] == i)
2376       {
2377          first[weight0] = next[i];
2378          /* hypre_printf("shift: first[%d]= %d to %d\n",
2379             weight0, i, next[i]);
2380             if (i == last[0])
2381             hypre_printf("i= last[0]: %d\n", i); */
2382       }
2383    }
2384 
2385    next[i] = i;
2386    previous[i] = i;
2387 
2388    return 0;
2389 }
2390 
hypre_move_entry(HYPRE_Int weight,HYPRE_Int * weight_max,HYPRE_Int * previous,HYPRE_Int * next,HYPRE_Int * first,HYPRE_Int * last,HYPRE_Int head,HYPRE_Int tail,HYPRE_Int i)2391 HYPRE_Int hypre_move_entry(HYPRE_Int weight, HYPRE_Int *weight_max,
2392                            HYPRE_Int *previous, HYPRE_Int *next, HYPRE_Int *first, HYPRE_Int *last,
2393                            HYPRE_Int head, HYPRE_Int tail,
2394                            HYPRE_Int i)
2395 {
2396    HYPRE_Int  weight0;
2397 
2398    if (previous[i] != head) next[previous[i]] = next[i];
2399    previous[next[i]] = previous[i];
2400 
2401    for (weight0=1; weight0 <= weight_max[0]; weight0++)
2402    {
2403       if (first[weight0] == i)
2404          first[weight0] = next[i];
2405    }
2406 
2407    return 0;
2408 }
2409 
2410 /*---------------------------------------------------------------------
2411   hypre_matinv:  X <--  A**(-1) ;  A IS POSITIVE DEFINITE (non--symmetric);
2412   ---------------------------------------------------------------------*/
2413 
hypre_matinv(HYPRE_Real * x,HYPRE_Real * a,HYPRE_Int k)2414 HYPRE_Int hypre_matinv(HYPRE_Real *x, HYPRE_Real *a, HYPRE_Int k)
2415 {
2416    HYPRE_Int i,j,l, ierr =0;
2417 
2418    for (i=0; i < k; i++)
2419    {
2420       if (a[i+i*k] <= 0.e0)
2421       {
2422          if (i < k-1)
2423          {
2424             /*********
2425                       hypre_printf("indefinite singular matrix in *** matinv ***:\n");
2426                       hypre_printf("i:%d;  diagonal entry: %e\n", i, a[i+k*i]);
2427             */
2428             ierr = -1;
2429          }
2430 
2431          a[i+i*k] = 0.e0;
2432       }
2433       else
2434          a[i+k*i] = 1.0 / a[i+i*k];
2435 
2436       for (j=1; j < k-i; j++)
2437       {
2438          for (l=1; l < k-i; l++)
2439          {
2440             a[i+l+k*(i+j)] -= a[i+l+k*i] * a[i+k*i] * a[i+k*(i+j)];
2441          }
2442       }
2443 
2444       for (j=1; j < k-i; j++)
2445       {
2446          a[i+j+k*i] = a[i+j+k*i] * a[i+k*i];
2447          a[i+k*(i+j)] = a[i+k*(i+j)] * a[i+k*i];
2448       }
2449    }
2450 
2451    /* FULL INVERSION: --------------------------------------------*/
2452 
2453    x[k*k-1] = a[k*k-1];
2454    for (i=k-1; i > -1; i--)
2455    {
2456       for (j=1; j < k-i; j++)
2457       {
2458          x[i+j+k*i] =0;
2459          x[i+k*(i+j)] =0;
2460 
2461          for (l=1; l< k-i; l++)
2462          {
2463             x[i+j+k*i] -= x[i+j+k*(i+l)] * a[i+l+k*i];
2464             x[i+k*(i+j)] -= a[i+k*(i+l)] * x[i+l+k*(i+j)];
2465          }
2466       }
2467 
2468       x[i+k*i] = a[i+k*i];
2469       for (j=1; j<k-i; j++)
2470       {
2471          x[i+k*i] -= x[i+k*(i+j)] * a[i+j+k*i];
2472       }
2473    }
2474 
2475    return ierr;
2476 }
2477 
2478 HYPRE_Int
hypre_parCorrRes(hypre_ParCSRMatrix * A,hypre_ParVector * x,hypre_Vector * rhs,hypre_Vector ** tmp_ptr)2479 hypre_parCorrRes( hypre_ParCSRMatrix *A,
2480                   hypre_ParVector    *x,
2481                   hypre_Vector       *rhs,
2482                   hypre_Vector      **tmp_ptr )
2483 {
2484    HYPRE_Int i, j, index, start;
2485    HYPRE_Int num_sends, num_cols_offd;
2486    HYPRE_Int local_size;
2487    HYPRE_Real *x_buf_data, *x_tmp_data, *x_local_data;
2488 
2489    hypre_ParCSRCommPkg *comm_pkg;
2490    hypre_CSRMatrix *offd;
2491    hypre_Vector *x_local, *x_tmp, *tmp_vector;
2492    hypre_ParCSRCommHandle *comm_handle;
2493 
2494    comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2495    offd = hypre_ParCSRMatrixOffd(A);
2496    num_cols_offd = hypre_CSRMatrixNumCols(offd);
2497 
2498    x_local = hypre_ParVectorLocalVector(x);
2499    x_local_data = hypre_VectorData(x_local);
2500    local_size = hypre_VectorSize(x_local);
2501 
2502    if (num_cols_offd)
2503    {
2504       num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2505       x_buf_data = hypre_CTAlloc(HYPRE_Real,
2506             hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
2507 
2508       index = 0;
2509       for (i = 0; i < num_sends; i++)
2510       {
2511          start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
2512          for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
2513             x_buf_data[index++]
2514                = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2515       }
2516 
2517       x_tmp = hypre_SeqVectorCreate(num_cols_offd);
2518       hypre_SeqVectorInitialize(x_tmp);
2519       x_tmp_data = hypre_VectorData(x_tmp);
2520 
2521       comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, x_buf_data,
2522                                                   x_tmp_data);
2523 
2524       tmp_vector = hypre_SeqVectorCreate(local_size);
2525       hypre_VectorMemoryLocation(tmp_vector) = HYPRE_MEMORY_DEVICE;
2526       hypre_SeqVectorInitialize(tmp_vector);
2527       hypre_SeqVectorCopy(rhs,tmp_vector);
2528 
2529       hypre_ParCSRCommHandleDestroy(comm_handle);
2530       comm_handle = NULL;
2531 
2532       hypre_CSRMatrixMatvec(-1.0, offd, x_tmp, 1.0, tmp_vector);
2533 
2534       hypre_SeqVectorDestroy(x_tmp);
2535       hypre_TFree(x_buf_data, HYPRE_MEMORY_HOST);
2536    }
2537    else
2538    {
2539       tmp_vector = hypre_SeqVectorCreate(local_size);
2540       hypre_VectorMemoryLocation(tmp_vector) = HYPRE_MEMORY_DEVICE;
2541       hypre_SeqVectorInitialize(tmp_vector);
2542       hypre_SeqVectorCopy(rhs,tmp_vector);
2543    }
2544 
2545    *tmp_ptr = tmp_vector;
2546 
2547    return 0;
2548 }
2549 
2550 
hypre_AdSchwarzSolve(hypre_ParCSRMatrix * par_A,hypre_ParVector * par_rhs,hypre_CSRMatrix * domain_structure,HYPRE_Real * scale,hypre_ParVector * par_x,hypre_ParVector * par_aux,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)2551 HYPRE_Int hypre_AdSchwarzSolve(hypre_ParCSRMatrix *par_A,
2552                                hypre_ParVector *par_rhs,
2553                                hypre_CSRMatrix *domain_structure,
2554                                HYPRE_Real *scale,
2555                                hypre_ParVector *par_x,
2556                                hypre_ParVector *par_aux, HYPRE_Int *pivots,
2557                                HYPRE_Int use_nonsymm)
2558 {
2559    HYPRE_Int ierr = 0;
2560    HYPRE_Real *x;
2561    HYPRE_Real *aux;
2562    HYPRE_Real *tmp;
2563    hypre_Vector *x_vector;
2564    hypre_Vector *aux_vector;
2565    MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
2566    HYPRE_Int num_domains;
2567    HYPRE_Int max_domain_size;
2568    HYPRE_Int *i_domain_dof;
2569    HYPRE_Int *j_domain_dof;
2570    HYPRE_Real *domain_matrixinverse;
2571 
2572    HYPRE_Int piv_counter = 0;
2573    HYPRE_Int one = 1;
2574    char uplo = 'L';
2575 
2576    HYPRE_Int jj,i,j; /*, j_loc, k_loc;*/
2577 
2578 
2579    HYPRE_Int matrix_size, matrix_size_counter = 0;
2580 
2581    HYPRE_Int num_procs;
2582 
2583    hypre_MPI_Comm_size(comm,&num_procs);
2584 
2585    /* initiate:      ----------------------------------------------- */
2586    x_vector = hypre_ParVectorLocalVector(par_x);
2587    aux_vector = hypre_ParVectorLocalVector(par_aux);
2588    x = hypre_VectorData(x_vector);
2589    aux = hypre_VectorData(aux_vector);
2590    num_domains = hypre_CSRMatrixNumRows(domain_structure);
2591    max_domain_size = hypre_CSRMatrixNumCols(domain_structure);
2592    i_domain_dof = hypre_CSRMatrixI(domain_structure);
2593    j_domain_dof = hypre_CSRMatrixJ(domain_structure);
2594    domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
2595 
2596    if (use_nonsymm)
2597       uplo = 'N';
2598 
2599    hypre_ParVectorCopy(par_rhs,par_aux);
2600    hypre_ParCSRMatrixMatvec(-1.0,par_A,par_x,1.0,par_aux);
2601    tmp = hypre_CTAlloc(HYPRE_Real, max_domain_size, HYPRE_MEMORY_HOST);
2602 
2603    /* forward solve: ----------------------------------------------- */
2604 
2605    matrix_size_counter = 0;
2606    for (i=0; i < num_domains; i++)
2607    {
2608       matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
2609 
2610       /* compute residual: ---------------------------------------- */
2611 
2612       jj = 0;
2613       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2614       {
2615          tmp[jj] = aux[j_domain_dof[j]];
2616          jj++;
2617       }
2618       /* solve for correction: ------------------------------------- */
2619       if (use_nonsymm)
2620       {
2621          hypre_dgetrs(&uplo, &matrix_size, &one,
2622                       &domain_matrixinverse[matrix_size_counter],
2623                       &matrix_size, &pivots[piv_counter], tmp,
2624                       &matrix_size, &ierr);
2625       }
2626       else
2627       {
2628          hypre_dpotrs(&uplo, &matrix_size, &one,
2629                       &domain_matrixinverse[matrix_size_counter],
2630                       &matrix_size, tmp,
2631                       &matrix_size, &ierr);
2632       }
2633 
2634       if (ierr)  hypre_error(HYPRE_ERROR_GENERIC);
2635       jj = 0;
2636       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2637       {
2638          x[j_domain_dof[j]]+=  scale[j_domain_dof[j]]*tmp[jj++];
2639       }
2640       matrix_size_counter += matrix_size * matrix_size;
2641       piv_counter += matrix_size;
2642 
2643    }
2644 
2645    hypre_TFree(tmp, HYPRE_MEMORY_HOST);
2646 
2647    return hypre_error_flag;
2648 }
2649 
hypre_AdSchwarzCFSolve(hypre_ParCSRMatrix * par_A,hypre_ParVector * par_rhs,hypre_CSRMatrix * domain_structure,HYPRE_Real * scale,hypre_ParVector * par_x,hypre_ParVector * par_aux,HYPRE_Int * CF_marker,HYPRE_Int rlx_pt,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)2650 HYPRE_Int hypre_AdSchwarzCFSolve(hypre_ParCSRMatrix *par_A,
2651                                  hypre_ParVector *par_rhs,
2652                                  hypre_CSRMatrix *domain_structure,
2653                                  HYPRE_Real *scale,
2654                                  hypre_ParVector *par_x,
2655                                  hypre_ParVector *par_aux,
2656                                  HYPRE_Int *CF_marker,
2657                                  HYPRE_Int rlx_pt, HYPRE_Int *pivots,
2658                                  HYPRE_Int use_nonsymm)
2659 {
2660    HYPRE_Int ierr = 0;
2661    HYPRE_Real *x;
2662    HYPRE_Real *aux;
2663    HYPRE_Real *tmp;
2664    hypre_Vector *x_vector;
2665    hypre_Vector *aux_vector;
2666    MPI_Comm comm = hypre_ParCSRMatrixComm(par_A);
2667    HYPRE_Int num_domains;
2668    HYPRE_Int max_domain_size;
2669    HYPRE_Int *i_domain_dof;
2670    HYPRE_Int *j_domain_dof;
2671    HYPRE_Real *domain_matrixinverse;
2672 
2673    HYPRE_Int piv_counter = 0;
2674    HYPRE_Int one = 1;
2675 
2676    char uplo = 'L';
2677    HYPRE_Int jj,i,j; /*, j_loc, k_loc;*/
2678 
2679    HYPRE_Int matrix_size, matrix_size_counter = 0;
2680 
2681    HYPRE_Int num_procs;
2682 
2683    hypre_MPI_Comm_size(comm,&num_procs);
2684 
2685    /* initiate:      ----------------------------------------------- */
2686    x_vector = hypre_ParVectorLocalVector(par_x);
2687    aux_vector = hypre_ParVectorLocalVector(par_aux);
2688    x = hypre_VectorData(x_vector);
2689    aux = hypre_VectorData(aux_vector);
2690    num_domains = hypre_CSRMatrixNumRows(domain_structure);
2691    max_domain_size = hypre_CSRMatrixNumCols(domain_structure);
2692    i_domain_dof = hypre_CSRMatrixI(domain_structure);
2693    j_domain_dof = hypre_CSRMatrixJ(domain_structure);
2694    domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
2695 
2696    if (use_nonsymm)
2697       uplo = 'N';
2698 
2699    hypre_ParVectorCopy(par_rhs,par_aux);
2700    hypre_ParCSRMatrixMatvec(-1.0,par_A,par_x,1.0,par_aux);
2701    tmp = hypre_CTAlloc(HYPRE_Real, max_domain_size, HYPRE_MEMORY_HOST);
2702 
2703    /* forward solve: ----------------------------------------------- */
2704 
2705    matrix_size_counter = 0;
2706    for (i=0; i < num_domains; i++)
2707    {
2708       if (CF_marker[i] == rlx_pt)
2709       {
2710          matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
2711 
2712          /* compute residual: ---------------------------------------- */
2713 
2714          jj = 0;
2715          for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2716          {
2717             tmp[jj] = aux[j_domain_dof[j]];
2718             jj++;
2719          }
2720          /* solve for correction: ------------------------------------- */
2721          if (use_nonsymm)
2722          {
2723             hypre_dgetrs(&uplo, &matrix_size, &one,
2724                          &domain_matrixinverse[matrix_size_counter],
2725                          &matrix_size, &pivots[piv_counter], tmp,
2726                          &matrix_size, &ierr);
2727          }
2728 
2729          else
2730          {
2731             hypre_dpotrs(&uplo, &matrix_size, &one,
2732                          &domain_matrixinverse[matrix_size_counter],
2733                          &matrix_size, tmp,
2734                          &matrix_size, &ierr);
2735          }
2736 
2737          if (ierr)  hypre_error(HYPRE_ERROR_GENERIC);
2738          jj = 0;
2739          for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2740          {
2741             x[j_domain_dof[j]]+=  scale[j_domain_dof[j]]*tmp[jj++];
2742          }
2743          matrix_size_counter += matrix_size * matrix_size;
2744          piv_counter += matrix_size;
2745       }
2746    }
2747 
2748    hypre_TFree(tmp, HYPRE_MEMORY_HOST);
2749 
2750    return hypre_error_flag;
2751 }
2752 
2753 HYPRE_Int
hypre_GenerateScale(hypre_CSRMatrix * domain_structure,HYPRE_Int num_variables,HYPRE_Real relaxation_weight,HYPRE_Real ** scale_pointer)2754 hypre_GenerateScale(hypre_CSRMatrix *domain_structure,
2755                     HYPRE_Int              num_variables,
2756                     HYPRE_Real       relaxation_weight,
2757                     HYPRE_Real     **scale_pointer)
2758 {
2759    HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
2760    HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
2761    HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
2762    HYPRE_Int i, j;
2763    HYPRE_Real *scale;
2764 
2765    scale = hypre_CTAlloc(HYPRE_Real,  num_variables, HYPRE_MEMORY_HOST);
2766 
2767    for (i=0; i < num_domains; i++)
2768       for (j = i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2769          scale[j_domain_dof[j]] += 1.0;
2770 
2771    for (i=0; i < num_variables; i++)
2772       scale[i] = relaxation_weight/scale[i];
2773 
2774    *scale_pointer = scale;
2775 
2776    return hypre_error_flag;
2777 }
2778 
hypre_ParAdSchwarzSolve(hypre_ParCSRMatrix * A,hypre_ParVector * F,hypre_CSRMatrix * domain_structure,HYPRE_Real * scale,hypre_ParVector * X,hypre_ParVector * Vtemp,HYPRE_Int * pivots,HYPRE_Int use_nonsymm)2779 HYPRE_Int hypre_ParAdSchwarzSolve(hypre_ParCSRMatrix *A,
2780                                   hypre_ParVector *F,
2781                                   hypre_CSRMatrix *domain_structure,
2782                                   HYPRE_Real *scale,
2783                                   hypre_ParVector *X,
2784                                   hypre_ParVector *Vtemp,
2785                                   HYPRE_Int *pivots,
2786                                   HYPRE_Int use_nonsymm)
2787 {
2788    hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2789    HYPRE_Int num_sends = 0;
2790    HYPRE_Int *send_map_starts;
2791    HYPRE_Int *send_map_elmts;
2792 
2793    hypre_ParCSRCommHandle *comm_handle;
2794 
2795    HYPRE_Int ierr = 0;
2796    HYPRE_Real *x_data;
2797    HYPRE_Real *x_ext_data;
2798    HYPRE_Real *aux;
2799    HYPRE_Real *vtemp_data;
2800    HYPRE_Real *vtemp_ext_data;
2801    HYPRE_Int num_domains, max_domain_size;
2802    HYPRE_Int *i_domain_dof;
2803    HYPRE_Int *j_domain_dof;
2804    HYPRE_Real *domain_matrixinverse;
2805    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2806    HYPRE_Int num_variables;
2807    HYPRE_Int num_cols_offd;
2808    HYPRE_Real *scale_ext;
2809    HYPRE_Real *buf_data;
2810    HYPRE_Int index;
2811 
2812    HYPRE_Int piv_counter = 0;
2813    HYPRE_Int one = 1;
2814 
2815    char uplo = 'L';
2816    HYPRE_Int jj,i,j, j_loc; /*, j_loc, k_loc;*/
2817 
2818    HYPRE_Int matrix_size, matrix_size_counter = 0;
2819 
2820    /* initiate:      ----------------------------------------------- */
2821    num_variables = hypre_CSRMatrixNumRows(A_diag);
2822    num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
2823    x_data = hypre_VectorData(hypre_ParVectorLocalVector(X));
2824    vtemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Vtemp));
2825 
2826    if (use_nonsymm)
2827       uplo = 'N';
2828 
2829    hypre_ParVectorCopy(F,Vtemp);
2830    hypre_ParCSRMatrixMatvec(-1.0,A,X,1.0,Vtemp);
2831 
2832    /* forward solve: ----------------------------------------------- */
2833 
2834    num_domains = hypre_CSRMatrixNumRows(domain_structure);
2835    max_domain_size = hypre_CSRMatrixNumCols(domain_structure);
2836    i_domain_dof = hypre_CSRMatrixI(domain_structure);
2837    j_domain_dof = hypre_CSRMatrixJ(domain_structure);
2838    domain_matrixinverse = hypre_CSRMatrixData(domain_structure);
2839    aux = hypre_CTAlloc(HYPRE_Real,  max_domain_size, HYPRE_MEMORY_HOST);
2840 
2841    if (comm_pkg)
2842    {
2843       num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2844       send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
2845       send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
2846 
2847       buf_data = hypre_CTAlloc(HYPRE_Real,  send_map_starts[num_sends], HYPRE_MEMORY_HOST);
2848       x_ext_data = hypre_CTAlloc(HYPRE_Real,  num_cols_offd, HYPRE_MEMORY_HOST);
2849       vtemp_ext_data = hypre_CTAlloc(HYPRE_Real,  num_cols_offd, HYPRE_MEMORY_HOST);
2850       scale_ext = hypre_CTAlloc(HYPRE_Real,  num_cols_offd, HYPRE_MEMORY_HOST);
2851 
2852       index = 0;
2853       for (i=0; i < num_sends; i++)
2854       {
2855          for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
2856             buf_data[index++] = vtemp_data[send_map_elmts[j]];
2857       }
2858 
2859       comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,
2860                                                  vtemp_ext_data);
2861       hypre_ParCSRCommHandleDestroy(comm_handle);
2862       comm_handle = NULL;
2863 
2864       index = 0;
2865       for (i=0; i < num_sends; i++)
2866       {
2867          for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
2868             buf_data[index++] = scale[send_map_elmts[j]];
2869       }
2870 
2871       comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,buf_data,scale_ext);
2872       hypre_ParCSRCommHandleDestroy(comm_handle);
2873       comm_handle = NULL;
2874    }
2875 
2876    for (i=0; i < num_cols_offd; i++)
2877       x_ext_data[i] = 0.0;
2878 
2879    matrix_size_counter = 0;
2880    for (i=0; i < num_domains; i++)
2881    {
2882       matrix_size = i_domain_dof[i+1] - i_domain_dof[i];
2883 
2884       /* copy data contiguously into aux  --------------------------- */
2885 
2886       jj = 0;
2887       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2888       {
2889          j_loc = j_domain_dof[j];
2890          if (j_loc < num_variables)
2891             aux[jj] = vtemp_data[j_loc];
2892          else
2893             aux[jj] = vtemp_ext_data[j_loc-num_variables];
2894          jj++;
2895       }
2896       /* solve for correction: ------------------------------------- */
2897       if (use_nonsymm)
2898       {
2899          hypre_dgetrs(&uplo, &matrix_size, &one,
2900                       &domain_matrixinverse[matrix_size_counter],
2901                       &matrix_size, &pivots[piv_counter], aux,
2902                       &matrix_size, &ierr);
2903       }
2904       else
2905       {
2906          hypre_dpotrs(&uplo, &matrix_size, &one,
2907                       &domain_matrixinverse[matrix_size_counter],
2908                       &matrix_size, aux,
2909                       &matrix_size, &ierr);
2910       }
2911 
2912       if (ierr)  hypre_error(HYPRE_ERROR_GENERIC);
2913       jj = 0;
2914       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
2915       {
2916          j_loc = j_domain_dof[j];
2917          if (j_loc < num_variables)
2918             x_data[j_loc]+= scale[j_loc] * aux[jj++];
2919          else
2920          {
2921             j_loc -= num_variables;
2922             x_ext_data[j_loc] += scale_ext[j_loc] * aux[jj++];
2923          }
2924       }
2925       matrix_size_counter += matrix_size * matrix_size;
2926       piv_counter += matrix_size;
2927    }
2928 
2929    if (comm_pkg)
2930    {
2931       comm_handle=hypre_ParCSRCommHandleCreate (2,comm_pkg,x_ext_data,buf_data);
2932 
2933       hypre_ParCSRCommHandleDestroy(comm_handle);
2934       comm_handle = NULL;
2935 
2936       index = 0;
2937       for (i=0; i < num_sends; i++)
2938       {
2939          for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
2940             x_data[send_map_elmts[j]] += buf_data[index++];
2941       }
2942 
2943       hypre_TFree(buf_data, HYPRE_MEMORY_HOST);
2944       hypre_TFree(x_ext_data, HYPRE_MEMORY_HOST);
2945       hypre_TFree(vtemp_ext_data, HYPRE_MEMORY_HOST);
2946       hypre_TFree(scale_ext, HYPRE_MEMORY_HOST);
2947    }
2948    hypre_TFree(aux, HYPRE_MEMORY_HOST);
2949 
2950    return hypre_error_flag;
2951 }
2952 
2953 /*--------------------------------------------------------------------------
2954  * hypre_ParAMGCreateDomainDof:
2955  *--------------------------------------------------------------------------*/
2956 
2957 /*****************************************************************************
2958  *
2959  * Routine for constructing graph domain_dof with minimal overlap
2960  *             and computing the respective matrix inverses to be
2961  *             used in an overlapping additive Schwarz procedure (smoother
2962  *             in AMG);
2963  *
2964  *****************************************************************************/
2965 HYPRE_Int
hypre_ParAMGCreateDomainDof(hypre_ParCSRMatrix * A,HYPRE_Int domain_type,HYPRE_Int overlap,HYPRE_Int num_functions,HYPRE_Int * dof_func,hypre_CSRMatrix ** domain_structure_pointer,HYPRE_Int ** piv_pointer,HYPRE_Int use_nonsymm)2966 hypre_ParAMGCreateDomainDof(hypre_ParCSRMatrix   *A,
2967                             HYPRE_Int domain_type, HYPRE_Int overlap,
2968                             HYPRE_Int num_functions, HYPRE_Int *dof_func,
2969                             hypre_CSRMatrix     **domain_structure_pointer,
2970                             HYPRE_Int **piv_pointer, HYPRE_Int use_nonsymm)
2971 
2972 {
2973    hypre_CSRMatrix *domain_structure = NULL;
2974    HYPRE_Int *i_domain_dof, *j_domain_dof;
2975    HYPRE_Real *domain_matrixinverse;
2976    HYPRE_Int num_domains;
2977 
2978    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2979    HYPRE_Int *a_diag_i = hypre_CSRMatrixI(A_diag);
2980    HYPRE_Int *a_diag_j = hypre_CSRMatrixJ(A_diag);
2981    HYPRE_Real *a_diag_data = hypre_CSRMatrixData(A_diag);
2982    HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag);
2983    HYPRE_BigInt first_col_diag = hypre_ParCSRMatrixFirstColDiag(A);
2984    HYPRE_BigInt col_0 = first_col_diag -1 ;
2985    HYPRE_BigInt col_n = first_col_diag + (HYPRE_BigInt)num_variables;
2986 
2987    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
2988    HYPRE_Int *a_offd_i = hypre_CSRMatrixI(A_offd);
2989    HYPRE_Int *a_offd_j = hypre_CSRMatrixJ(A_offd);
2990    HYPRE_Real *a_offd_data = hypre_CSRMatrixData(A_offd);
2991    HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
2992    HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
2993 
2994    hypre_CSRMatrix *A_ext;
2995    HYPRE_Int *a_ext_i;
2996    HYPRE_BigInt *a_ext_j;
2997    HYPRE_Real *a_ext_data;
2998 
2999    /* HYPRE_Int *i_dof_to_accept_weight; */
3000    HYPRE_Int *i_dof_to_prefer_weight,
3001       *w_dof_dof, *i_dof_weight;
3002    HYPRE_Int *i_dof_to_aggregate, *i_aggregate_dof, *j_aggregate_dof;
3003 
3004    HYPRE_Int *i_dof_index;
3005    HYPRE_Int *i_dof_index_offd;
3006    HYPRE_Int *i_proc;
3007    /* HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A);*/
3008    hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3009    HYPRE_Int num_recvs = 0;
3010    HYPRE_Int *recv_vec_starts = NULL;
3011 
3012    HYPRE_Int ierr = 0;
3013    HYPRE_Int i,j,k, l_loc, i_loc, j_loc;
3014    HYPRE_Int i_dof;
3015    HYPRE_Int nf;
3016    HYPRE_Int *i_local_to_global;
3017    HYPRE_Int *i_global_to_local;
3018 
3019    HYPRE_Int local_dof_counter, max_local_dof_counter=0;
3020 
3021    HYPRE_Int domain_dof_counter = 0, domain_matrixinverse_counter = 0;
3022 
3023    HYPRE_Real *AE;
3024 
3025 
3026    HYPRE_Int *ipiv;
3027    char uplo = 'L';
3028    HYPRE_Int piv_counter;
3029    HYPRE_Int *piv = NULL;
3030 
3031    HYPRE_Int cnt, indx;
3032    HYPRE_Int num_procs, my_id;
3033 
3034    if (num_variables == 0)
3035    {
3036       *domain_structure_pointer = domain_structure;
3037 
3038       *piv_pointer = piv;
3039 
3040       return hypre_error_flag;
3041    }
3042 
3043    hypre_MPI_Comm_size(hypre_ParCSRMatrixComm(A),&num_procs);
3044    hypre_MPI_Comm_size(hypre_ParCSRMatrixComm(A),&my_id);
3045 
3046    /* --------------------------------------------------------------------- */
3047 
3048    /*=======================================================================*/
3049    /*    create artificial domains by agglomeration;                        */
3050    /*=======================================================================*/
3051 
3052    /*hypre_printf("----------- create artificials domain by agglomeration;  ======\n");
3053     */
3054    i_aggregate_dof = hypre_CTAlloc(HYPRE_Int, num_variables+1, HYPRE_MEMORY_HOST);
3055    j_aggregate_dof = hypre_CTAlloc(HYPRE_Int, num_variables, HYPRE_MEMORY_HOST);
3056 
3057    if (domain_type == 2)
3058    {
3059       i_dof_to_prefer_weight = hypre_CTAlloc(HYPRE_Int,  num_variables, HYPRE_MEMORY_HOST);
3060       w_dof_dof = hypre_CTAlloc(HYPRE_Int,  a_diag_i[num_variables], HYPRE_MEMORY_HOST);
3061 
3062       for (i=0; i<num_variables; i++)
3063          for (j=a_diag_i[i]; j< a_diag_i[i+1]; j++)
3064          {
3065             if (a_diag_j[j] == i)
3066                w_dof_dof[j]=0;
3067             else
3068                w_dof_dof[j]=1;
3069          }
3070 
3071       /*hypre_printf("end computing weights for agglomeration procedure: --------\n");
3072        */
3073 
3074       i_dof_weight = hypre_CTAlloc(HYPRE_Int,  num_variables, HYPRE_MEMORY_HOST);
3075       hypre_AMGeAgglomerate(i_aggregate_dof, j_aggregate_dof,
3076                             a_diag_i, a_diag_j, w_dof_dof,
3077                             a_diag_i, a_diag_j,
3078                             a_diag_i, a_diag_j,
3079                             i_dof_to_prefer_weight,
3080                             i_dof_weight,
3081                             num_variables, num_variables,
3082                             &num_domains);
3083 
3084       hypre_TFree(i_dof_to_prefer_weight, HYPRE_MEMORY_HOST);
3085       hypre_TFree(i_dof_weight, HYPRE_MEMORY_HOST);
3086       hypre_TFree(w_dof_dof, HYPRE_MEMORY_HOST);
3087    }
3088    else
3089    {
3090       nf = 1;
3091       if (domain_type == 1) nf = num_functions;
3092 
3093       num_domains = num_variables/nf;
3094       for (i=0; i < num_domains+1; i++)
3095       {
3096          i_aggregate_dof[i] = nf*i;
3097       }
3098       for (i=0; i < num_variables; i++)
3099       {
3100          j_aggregate_dof[i] = i;
3101       }
3102    }
3103    /*hypre_printf("num_variables: %d, num_domains: %d\n", num_variables, num_domains);
3104     */
3105    if (overlap == 1)
3106    {
3107       i_domain_dof = hypre_CTAlloc(HYPRE_Int,  num_domains+1, HYPRE_MEMORY_HOST);
3108 
3109       i_dof_to_aggregate = hypre_CTAlloc(HYPRE_Int,  num_variables, HYPRE_MEMORY_HOST);
3110       for (i=0; i < num_domains; i++)
3111          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3112             i_dof_to_aggregate[j_aggregate_dof[j]] = i;
3113 
3114       i_proc = hypre_CTAlloc(HYPRE_Int,  num_cols_offd, HYPRE_MEMORY_HOST);
3115 
3116       for (i=0; i < num_cols_offd; i++)
3117          i_proc[i] = 0;
3118 
3119       if (comm_pkg)
3120       {
3121          num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
3122          recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
3123       }
3124       else if (num_procs > 1)
3125       {
3126 
3127          hypre_MatvecCommPkgCreate(A);
3128 
3129          comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3130          num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
3131          recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
3132       }
3133 
3134       for (i=0; i < num_recvs; i++)
3135          for (indx=recv_vec_starts[i]; indx < recv_vec_starts[i+1]; indx++)
3136             i_proc[indx] = i;
3137 
3138       /* make domains from aggregates: *********************************/
3139 
3140       i_dof_index = hypre_CTAlloc(HYPRE_Int,  num_variables, HYPRE_MEMORY_HOST);
3141 
3142       i_dof_index_offd = hypre_CTAlloc(HYPRE_Int,  num_cols_offd, HYPRE_MEMORY_HOST);
3143 
3144       for (i=0; i < num_variables; i++)
3145          i_dof_index[i] = -1;
3146 
3147       for (i=0; i < num_cols_offd; i++)
3148          i_dof_index_offd[i] = -1;
3149 
3150       domain_dof_counter=0;
3151       for (i=0; i < num_domains; i++)
3152       {
3153          i_domain_dof[i] =  domain_dof_counter;
3154          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3155          {
3156             i_dof_index[j_aggregate_dof[j]]=-1;
3157          }
3158          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3159          {
3160             for (k=a_diag_i[j_aggregate_dof[j]];
3161                  k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3162                if (i_dof_to_aggregate[a_diag_j[k]] >= i
3163                    && i_dof_index[a_diag_j[k]]==-1)
3164                {
3165                   i_dof_index[a_diag_j[k]]++;
3166                   domain_dof_counter++;
3167                }
3168             for (k=a_offd_i[j_aggregate_dof[j]];
3169                  k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3170                if (i_proc[a_offd_j[k]] > my_id
3171                    && i_dof_index_offd[a_offd_j[k]]==-1)
3172                {
3173                   i_dof_index_offd[a_offd_j[k]]++;
3174                   domain_dof_counter++;
3175                }
3176          }
3177       }
3178 
3179       for (i=0; i < num_variables; i++)
3180          i_dof_index[i] = -1;
3181 
3182       for (i=0; i < num_cols_offd; i++)
3183          i_dof_index_offd[i] = -1;
3184 
3185       i_domain_dof[num_domains] =  domain_dof_counter;
3186       j_domain_dof = hypre_CTAlloc(HYPRE_Int,  domain_dof_counter, HYPRE_MEMORY_HOST);
3187 
3188       domain_dof_counter=0;
3189       for (i=0; i < num_domains; i++)
3190       {
3191          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3192          {
3193             i_dof_index[j_aggregate_dof[j]]=-1;
3194          }
3195          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3196          {
3197             for (k=a_diag_i[j_aggregate_dof[j]];
3198                  k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3199                if (i_dof_to_aggregate[a_diag_j[k]] >= i
3200                    && i_dof_index[a_diag_j[k]]==-1)
3201                {
3202                   i_dof_index[a_diag_j[k]]++;
3203                   j_domain_dof[domain_dof_counter] = a_diag_j[k];
3204                   domain_dof_counter++;
3205                }
3206             for (k=a_offd_i[j_aggregate_dof[j]];
3207                  k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3208                if (i_proc[a_offd_j[k]] > my_id
3209                    && i_dof_index_offd[a_offd_j[k]]==-1)
3210                {
3211                   i_dof_index_offd[a_offd_j[k]]++;
3212                   j_domain_dof[domain_dof_counter] = a_offd_j[k]+num_variables;
3213                   domain_dof_counter++;
3214                }
3215          }
3216       }
3217 
3218       hypre_TFree(i_aggregate_dof, HYPRE_MEMORY_HOST);
3219       hypre_TFree(j_aggregate_dof, HYPRE_MEMORY_HOST);
3220       hypre_TFree(i_dof_to_aggregate, HYPRE_MEMORY_HOST);
3221       hypre_TFree(i_dof_index, HYPRE_MEMORY_HOST);
3222       hypre_TFree(i_dof_index_offd, HYPRE_MEMORY_HOST);
3223       hypre_TFree(i_proc, HYPRE_MEMORY_HOST);
3224    }
3225    else if (overlap == 2)
3226    {
3227       i_domain_dof = hypre_CTAlloc(HYPRE_Int,  num_domains+1, HYPRE_MEMORY_HOST);
3228 
3229       i_dof_to_aggregate = hypre_CTAlloc(HYPRE_Int,  num_variables, HYPRE_MEMORY_HOST);
3230       for (i=0; i < num_domains; i++)
3231          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3232             i_dof_to_aggregate[j_aggregate_dof[j]] = i;
3233 
3234       /* make domains from aggregates: *********************************/
3235 
3236       i_dof_index = hypre_CTAlloc(HYPRE_Int,  num_variables, HYPRE_MEMORY_HOST);
3237 
3238       i_dof_index_offd = hypre_CTAlloc(HYPRE_Int,  num_cols_offd, HYPRE_MEMORY_HOST);
3239 
3240       for (i=0; i < num_variables; i++)
3241          i_dof_index[i] = -1;
3242 
3243       for (i=0; i < num_cols_offd; i++)
3244          i_dof_index_offd[i] = -1;
3245 
3246       domain_dof_counter=0;
3247       for (i=0; i < num_domains; i++)
3248       {
3249          i_domain_dof[i] =  domain_dof_counter;
3250          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3251          {
3252             for (k=a_diag_i[j_aggregate_dof[j]];
3253                  k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3254                if ( i_dof_index[a_diag_j[k]]==-1)
3255                {
3256                   i_dof_index[a_diag_j[k]]++;
3257                   domain_dof_counter++;
3258                }
3259             for (k=a_offd_i[j_aggregate_dof[j]];
3260                  k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3261                if ( i_dof_index_offd[a_offd_j[k]]==-1)
3262                {
3263                   i_dof_index_offd[a_offd_j[k]]++;
3264                   domain_dof_counter++;
3265                }
3266          }
3267          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3268          {
3269             for (k=a_diag_i[j_aggregate_dof[j]];
3270                  k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3271                i_dof_index[a_diag_j[k]]=-1;
3272             for (k=a_offd_i[j_aggregate_dof[j]];
3273                  k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3274                i_dof_index_offd[a_offd_j[k]]=-1;
3275          }
3276       }
3277 
3278       for (i=0; i < num_variables; i++)
3279          i_dof_index[i] = -1;
3280 
3281       for (i=0; i < num_cols_offd; i++)
3282          i_dof_index_offd[i] = -1;
3283 
3284       i_domain_dof[num_domains] =  domain_dof_counter;
3285       j_domain_dof = hypre_CTAlloc(HYPRE_Int,  domain_dof_counter, HYPRE_MEMORY_HOST);
3286 
3287       domain_dof_counter=0;
3288       for (i=0; i < num_domains; i++)
3289       {
3290          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3291          {
3292             for (k=a_diag_i[j_aggregate_dof[j]];
3293                  k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3294                if ( i_dof_index[a_diag_j[k]]==-1)
3295                {
3296                   i_dof_index[a_diag_j[k]]++;
3297                   j_domain_dof[domain_dof_counter] = a_diag_j[k];
3298                   domain_dof_counter++;
3299                }
3300             for (k=a_offd_i[j_aggregate_dof[j]];
3301                  k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3302                if ( i_dof_index_offd[a_offd_j[k]]==-1)
3303                {
3304                   i_dof_index_offd[a_offd_j[k]]++;
3305                   j_domain_dof[domain_dof_counter] = a_offd_j[k]+num_variables;
3306                   domain_dof_counter++;
3307                }
3308          }
3309 
3310          for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++)
3311          {
3312             for (k=a_diag_i[j_aggregate_dof[j]];
3313                  k<a_diag_i[j_aggregate_dof[j]+1]; k++)
3314                i_dof_index[a_diag_j[k]]=-1;
3315             for (k=a_offd_i[j_aggregate_dof[j]];
3316                  k<a_offd_i[j_aggregate_dof[j]+1]; k++)
3317                i_dof_index_offd[a_offd_j[k]]=-1;
3318          }
3319       }
3320 
3321       hypre_TFree(i_aggregate_dof, HYPRE_MEMORY_HOST);
3322       hypre_TFree(j_aggregate_dof, HYPRE_MEMORY_HOST);
3323       hypre_TFree(i_dof_to_aggregate, HYPRE_MEMORY_HOST);
3324       hypre_TFree(i_dof_index, HYPRE_MEMORY_HOST);
3325       hypre_TFree(i_dof_index_offd, HYPRE_MEMORY_HOST);
3326    }
3327    else
3328    {
3329       i_domain_dof = i_aggregate_dof;
3330       j_domain_dof = j_aggregate_dof;
3331    }
3332 
3333    /*hypre_printf("END domain_dof computations: =================================\n");
3334     */
3335    domain_matrixinverse_counter = 0;
3336    local_dof_counter = 0;
3337    piv_counter = 0;
3338 
3339    for (i=0; i < num_domains; i++)
3340    {
3341       local_dof_counter = i_domain_dof[i+1]-i_domain_dof[i];
3342       domain_matrixinverse_counter+= local_dof_counter * local_dof_counter;
3343       piv_counter += local_dof_counter;
3344 
3345       if (local_dof_counter > max_local_dof_counter)
3346          max_local_dof_counter = local_dof_counter;
3347    }
3348 
3349    domain_matrixinverse = hypre_CTAlloc(HYPRE_Real,  domain_matrixinverse_counter, HYPRE_MEMORY_HOST);
3350    if (use_nonsymm)
3351       piv = hypre_CTAlloc(HYPRE_Int,  piv_counter, HYPRE_MEMORY_HOST);
3352 
3353    if (num_procs > 1)
3354    {
3355       A_ext = hypre_ParCSRMatrixExtractBExt(A,A,1);
3356       a_ext_i = hypre_CSRMatrixI(A_ext);
3357       a_ext_j = hypre_CSRMatrixBigJ(A_ext);
3358       a_ext_data = hypre_CSRMatrixData(A_ext);
3359    }
3360    else
3361       A_ext = NULL;
3362 
3363    i_local_to_global = hypre_CTAlloc(HYPRE_Int,  max_local_dof_counter, HYPRE_MEMORY_HOST);
3364 
3365    i_global_to_local = hypre_CTAlloc(HYPRE_Int,  num_variables+num_cols_offd, HYPRE_MEMORY_HOST);
3366 
3367    for (i=0; i < num_variables+num_cols_offd; i++)
3368       i_global_to_local[i] = -1;
3369 
3370    piv_counter = 0;
3371    domain_matrixinverse_counter = 0;
3372    for (i=0; i < num_domains; i++)
3373    {
3374       local_dof_counter = 0;
3375       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
3376       {
3377          i_global_to_local[j_domain_dof[j]] = local_dof_counter;
3378          i_local_to_global[local_dof_counter] = j_domain_dof[j];
3379          local_dof_counter++;
3380       }
3381 
3382       /* get local matrix in AE: ======================================== */
3383 
3384       AE = &domain_matrixinverse[domain_matrixinverse_counter];
3385       ipiv = &piv[piv_counter];
3386 
3387       cnt = 0;
3388       for (i_loc=0; i_loc < local_dof_counter; i_loc++)
3389          for (j_loc=0; j_loc < local_dof_counter; j_loc++)
3390             AE[cnt++] = 0.e0;
3391 
3392       for (i_loc=0; i_loc < local_dof_counter; i_loc++)
3393       {
3394          i_dof = i_local_to_global[i_loc];
3395          if (i_dof < num_variables)
3396          {
3397             for (j=a_diag_i[i_dof]; j < a_diag_i[i_dof+1]; j++)
3398             {
3399                j_loc = i_global_to_local[a_diag_j[j]];
3400                if (j_loc >=0)
3401                   AE[i_loc + j_loc * local_dof_counter] = a_diag_data[j];
3402             }
3403             for (j=a_offd_i[i_dof]; j < a_offd_i[i_dof+1]; j++)
3404             {
3405                j_loc = i_global_to_local[a_offd_j[j]+num_variables];
3406                if (j_loc >=0)
3407                   AE[i_loc + j_loc * local_dof_counter] = a_offd_data[j];
3408             }
3409          }
3410          else
3411          {
3412             HYPRE_BigInt jj;
3413             HYPRE_Int j2;
3414             i_dof -= num_variables;
3415             for (j=a_ext_i[i_dof]; j < a_ext_i[i_dof+1]; j++)
3416             {
3417                jj = a_ext_j[j];
3418                if (jj > col_0 && jj < col_n)
3419                {
3420                   j2 = (HYPRE_Int)(jj - first_col_diag);
3421                }
3422                else
3423                {
3424                   j2 = hypre_BigBinarySearch(col_map_offd,jj,num_cols_offd);
3425                   if (j2 > -1) j2 += num_variables;
3426                }
3427                if (j2 > -1)
3428                {
3429                   j_loc = i_global_to_local[j2];
3430                   if (j_loc >=0)
3431                      AE[i_loc + j_loc * local_dof_counter] = a_ext_data[j];
3432                }
3433             }
3434          }
3435       }
3436 
3437       if (use_nonsymm)
3438       {
3439          hypre_dgetrf(&local_dof_counter,
3440                       &local_dof_counter, AE,
3441                       &local_dof_counter, ipiv, &ierr);
3442          piv_counter +=local_dof_counter;
3443       }
3444 
3445       else
3446       {
3447          hypre_dpotrf(&uplo,&local_dof_counter, AE,
3448                       &local_dof_counter, &ierr);
3449       }
3450 
3451       domain_matrixinverse_counter+=local_dof_counter*local_dof_counter;
3452 
3453 
3454 
3455       for (l_loc=0; l_loc < local_dof_counter; l_loc++)
3456          i_global_to_local[i_local_to_global[l_loc]] = -1;
3457 
3458    }
3459 
3460    hypre_TFree(i_local_to_global, HYPRE_MEMORY_HOST);
3461    hypre_TFree(i_global_to_local, HYPRE_MEMORY_HOST);
3462    hypre_CSRMatrixDestroy(A_ext);
3463 
3464    domain_structure = hypre_CSRMatrixCreate(num_domains, max_local_dof_counter,
3465                                             i_domain_dof[num_domains]);
3466 
3467    hypre_CSRMatrixI(domain_structure) = i_domain_dof;
3468    hypre_CSRMatrixJ(domain_structure) = j_domain_dof;
3469    hypre_CSRMatrixData(domain_structure) = domain_matrixinverse;
3470 
3471    *domain_structure_pointer = domain_structure;
3472    *piv_pointer = piv;
3473    return hypre_error_flag;
3474 }
3475 
3476 HYPRE_Int
hypre_ParGenerateScale(hypre_ParCSRMatrix * A,hypre_CSRMatrix * domain_structure,HYPRE_Real relaxation_weight,HYPRE_Real ** scale_pointer)3477 hypre_ParGenerateScale(hypre_ParCSRMatrix *A,
3478                        hypre_CSRMatrix *domain_structure,
3479                        HYPRE_Real       relaxation_weight,
3480                        HYPRE_Real     **scale_pointer)
3481 {
3482    HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
3483    HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
3484    HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
3485    HYPRE_Int i, j;
3486    HYPRE_Real *scale;
3487    HYPRE_Real *scale_ext;
3488    HYPRE_Real *scale_int;
3489 
3490    hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3491    HYPRE_Int num_sends = 0;
3492    HYPRE_Int *send_map_starts;
3493    HYPRE_Int *send_map_elmts;
3494 
3495    HYPRE_Int num_variables = hypre_ParCSRMatrixNumRows(A);
3496    HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
3497    HYPRE_Int j_loc, index, start;
3498 
3499    hypre_ParCSRCommHandle *comm_handle;
3500 
3501    if (comm_pkg)
3502    {
3503       num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
3504       send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
3505       send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
3506    }
3507 
3508    scale = hypre_CTAlloc(HYPRE_Real,  num_variables, HYPRE_MEMORY_HOST);
3509    if (num_cols_offd) scale_ext = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
3510 
3511    for (i=0; i < num_domains; i++)
3512    {
3513       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
3514       {
3515          j_loc = j_domain_dof[j];
3516          if (j_loc < num_variables)
3517             scale[j_loc] += 1.0;
3518          else
3519             scale_ext[j_loc-num_variables] += 1.0;
3520       }
3521    }
3522    if (comm_pkg)
3523    {
3524       scale_int = hypre_CTAlloc(HYPRE_Real,  send_map_starts[num_sends], HYPRE_MEMORY_HOST);
3525       comm_handle = hypre_ParCSRCommHandleCreate (2,comm_pkg,scale_ext,scale_int);
3526 
3527       hypre_ParCSRCommHandleDestroy(comm_handle);
3528       comm_handle = NULL;
3529    }
3530 
3531    index = 0;
3532    for (i=0; i < num_sends; i++)
3533    {
3534       start = send_map_starts[i];
3535       for (j=start; j < send_map_starts[i+1]; j++)
3536          scale[send_map_elmts[j]] += scale_int[index++];
3537    }
3538 
3539    if (comm_pkg) hypre_TFree(scale_int, HYPRE_MEMORY_HOST);
3540    if (num_cols_offd) hypre_TFree(scale_ext, HYPRE_MEMORY_HOST);
3541 
3542    for (i=0; i < num_variables; i++)
3543       scale[i] = relaxation_weight/scale[i];
3544 
3545    *scale_pointer = scale;
3546 
3547    return hypre_error_flag;
3548 }
3549 
3550 HYPRE_Int
hypre_ParGenerateHybridScale(hypre_ParCSRMatrix * A,hypre_CSRMatrix * domain_structure,hypre_CSRMatrix ** A_boundary_pointer,HYPRE_Real ** scale_pointer)3551 hypre_ParGenerateHybridScale(hypre_ParCSRMatrix *A,
3552                              hypre_CSRMatrix  *domain_structure,
3553                              hypre_CSRMatrix **A_boundary_pointer,
3554                              HYPRE_Real      **scale_pointer)
3555 {
3556    hypre_CSRMatrix *A_ext;
3557    HYPRE_Int *A_ext_i;
3558    HYPRE_BigInt *A_ext_j;
3559    HYPRE_Real *A_ext_data;
3560 
3561    hypre_CSRMatrix *A_boundary;
3562    HYPRE_Int *A_boundary_i;
3563    HYPRE_Int *A_boundary_j;
3564    HYPRE_Real *A_boundary_data;
3565 
3566    HYPRE_Int num_domains = hypre_CSRMatrixNumRows(domain_structure);
3567    HYPRE_Int *i_domain_dof = hypre_CSRMatrixI(domain_structure);
3568    HYPRE_Int *j_domain_dof = hypre_CSRMatrixJ(domain_structure);
3569    HYPRE_Int i, j, jj;
3570    HYPRE_Real *scale;
3571    HYPRE_Real *scale_ext;
3572    HYPRE_Real *scale_int;
3573 
3574    hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3575    HYPRE_Int num_sends = 0;
3576    HYPRE_Int *send_map_starts;
3577    HYPRE_Int *send_map_elmts;
3578    HYPRE_Int *index_ext;
3579 
3580    HYPRE_Int num_variables = hypre_ParCSRMatrixNumRows(A);
3581    HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
3582    HYPRE_Int j_loc, index, start;
3583    HYPRE_BigInt col_0, col_n;
3584    HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
3585 
3586    hypre_ParCSRCommHandle *comm_handle;
3587 
3588    col_0 = hypre_ParCSRMatrixFirstColDiag(A)-1;
3589    col_n = col_0+(HYPRE_Int)num_variables;
3590 
3591    A_boundary = NULL;
3592 
3593    if (comm_pkg)
3594    {
3595       num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
3596       send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
3597       send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
3598    }
3599 
3600    scale = hypre_CTAlloc(HYPRE_Real,  num_variables, HYPRE_MEMORY_HOST);
3601    if (num_cols_offd)
3602    {
3603       scale_ext = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
3604       index_ext = hypre_CTAlloc(HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST);
3605    }
3606 
3607    for (i=0; i < num_variables; i++)
3608       scale[i] = 1;
3609 
3610    for (i=0; i < num_cols_offd; i++)
3611       index_ext[i] = -1;
3612 
3613    for (i=0; i < num_domains; i++)
3614    {
3615       for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
3616       {
3617          j_loc = j_domain_dof[j];
3618          if (j_loc >= num_variables)
3619          {
3620             j_loc -= num_variables;
3621             if (index_ext[j_loc] == -1)
3622             {
3623                scale_ext[j_loc] += 1.0;
3624                index_ext[j_loc] ++;
3625             }
3626          }
3627       }
3628    }
3629    if (comm_pkg)
3630    {
3631       scale_int = hypre_CTAlloc(HYPRE_Real,  send_map_starts[num_sends], HYPRE_MEMORY_HOST);
3632       comm_handle=hypre_ParCSRCommHandleCreate(2,comm_pkg,scale_ext,scale_int);
3633 
3634       hypre_ParCSRCommHandleDestroy(comm_handle);
3635       comm_handle = NULL;
3636       A_ext = hypre_ParCSRMatrixExtractBExt(A,A,1);
3637       A_ext_i = hypre_CSRMatrixI(A_ext);
3638       A_boundary_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd+1, HYPRE_MEMORY_HOST);
3639       A_ext_j = hypre_CSRMatrixBigJ(A_ext);
3640       A_ext_data = hypre_CSRMatrixData(A_ext);
3641       /* compress A_ext to contain only local data and
3642          necessary boundary points*/
3643       index = 0;
3644       for (i=0; i < num_cols_offd; i++)
3645       {
3646          A_boundary_i[i] = index;
3647          for (j = A_ext_i[i]; j < A_ext_i[i+1]; j++)
3648          {
3649             HYPRE_BigInt j_col;
3650             j_col = A_ext_j[j];
3651             if (j_col > col_0 && j_col < col_n)
3652             {
3653                A_ext_j[j] = j_col-col_0;
3654                index++;
3655             }
3656             else
3657             {
3658                jj = hypre_BigBinarySearch(col_map_offd,j_col,num_cols_offd);
3659                if (jj > -1 && (scale_ext[jj] > 0))
3660                {
3661                   A_ext_j[j] = num_variables+jj;
3662                   index++;
3663                }
3664                else
3665                {
3666                   A_ext_j[j] = -1;
3667                }
3668             }
3669          }
3670       }
3671       A_boundary_i[num_cols_offd] = index;
3672 
3673       A_boundary_j = NULL;
3674       A_boundary_data = NULL;
3675 
3676       if (index)
3677       {
3678          A_boundary_j = hypre_CTAlloc(HYPRE_Int, index, HYPRE_MEMORY_HOST);
3679          A_boundary_data = hypre_CTAlloc(HYPRE_Real, index, HYPRE_MEMORY_HOST);
3680       }
3681 
3682       index = 0;
3683       for (i=0; i < A_ext_i[num_cols_offd]; i++)
3684       {
3685          if (A_ext_j[i] > -1)
3686          {
3687             A_boundary_j[index] = (HYPRE_Int)A_ext_j[i];
3688             A_boundary_data[index] = A_ext_data[i];
3689             index++;
3690          }
3691       }
3692       A_boundary = hypre_CSRMatrixCreate(num_cols_offd,num_variables, index);
3693       hypre_CSRMatrixI(A_boundary) = A_boundary_i;
3694       hypre_CSRMatrixJ(A_boundary) = A_boundary_j;
3695       hypre_CSRMatrixData(A_boundary) = A_boundary_data;
3696       hypre_CSRMatrixDestroy(A_ext);
3697    }
3698 
3699    index = 0;
3700    for (i=0; i < num_sends; i++)
3701    {
3702       start = send_map_starts[i];
3703       for (j=start; j < send_map_starts[i+1]; j++)
3704          scale[send_map_elmts[j]] += scale_int[index++];
3705    }
3706 
3707    if (comm_pkg) hypre_TFree(scale_int, HYPRE_MEMORY_HOST);
3708    if (num_cols_offd)
3709    {
3710       hypre_TFree(scale_ext, HYPRE_MEMORY_HOST);
3711       hypre_TFree(index_ext, HYPRE_MEMORY_HOST);
3712    }
3713 
3714    for (i=0; i < num_variables; i++)
3715       scale[i] = 1.0/scale[i];
3716 
3717    *scale_pointer = scale;
3718    *A_boundary_pointer = A_boundary;
3719 
3720    return hypre_error_flag;
3721 }
3722 
3723