1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "_hypre_parcsr_ls.h"
9
10 /*--------------------------------------------------------------------------
11 * hypre_ParAMGBuildModMultipass
12 * This routine implements Stuben's direct interpolation with multiple passes.
13 * expressed with matrix matrix multiplications
14 *--------------------------------------------------------------------------*/
15
16 HYPRE_Int
hypre_BoomerAMGBuildModMultipassHost(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Real trunc_factor,HYPRE_Int P_max_elmts,HYPRE_Int interp_type,HYPRE_Int num_functions,HYPRE_Int * dof_func,hypre_ParCSRMatrix ** P_ptr)17 hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix *A,
18 HYPRE_Int *CF_marker,
19 hypre_ParCSRMatrix *S,
20 HYPRE_BigInt *num_cpts_global,
21 HYPRE_Real trunc_factor,
22 HYPRE_Int P_max_elmts,
23 HYPRE_Int interp_type,
24 HYPRE_Int num_functions,
25 HYPRE_Int *dof_func,
26 hypre_ParCSRMatrix **P_ptr )
27 {
28 #ifdef HYPRE_PROFILE
29 hypre_profile_times[HYPRE_TIMER_ID_MULTIPASS_INTERP] -= hypre_MPI_Wtime();
30 #endif
31
32 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
33 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
34 hypre_ParCSRCommHandle *comm_handle;
35
36 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
37 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
38 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
39 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
40 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
41
42 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
43 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
44 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
45 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
46
47 HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
48
49 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
50 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
51 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
52
53 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
54 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
55 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
56
57 hypre_ParCSRMatrix **Pi;
58 hypre_ParCSRMatrix *P;
59 hypre_CSRMatrix *P_diag;
60 HYPRE_Real *P_diag_data;
61 HYPRE_Int *P_diag_i; /*at first counter of nonzero cols for each row,
62 finally will be pointer to start of row */
63 HYPRE_Int *P_diag_j;
64
65 hypre_CSRMatrix *P_offd;
66 HYPRE_Real *P_offd_data = NULL;
67 HYPRE_Int *P_offd_i; /*at first counter of nonzero cols for each row,
68 finally will be pointer to start of row */
69 HYPRE_Int *P_offd_j = NULL;
70 HYPRE_BigInt *col_map_offd_P = NULL;
71 HYPRE_Int num_cols_offd_P = 0;
72
73 HYPRE_Int num_sends = 0;
74 HYPRE_Int *int_buf_data = NULL;
75
76 HYPRE_Int *fine_to_coarse;
77 HYPRE_Int *points_left;
78 HYPRE_Int *pass_marker;
79 HYPRE_Int *pass_marker_offd = NULL;
80 HYPRE_Int *pass_order;
81 HYPRE_Int *pass_starts;
82
83 HYPRE_Int i, j, i1, i2, j1;
84 HYPRE_Int num_passes, p, remaining;
85 HYPRE_Int global_remaining;
86 HYPRE_Int cnt, cnt_old, cnt_rem, current_pass;
87 HYPRE_Int startc, index;
88
89 HYPRE_BigInt total_global_cpts;
90 HYPRE_Int my_id, num_procs;
91 HYPRE_Int P_offd_size = 0;
92
93 HYPRE_Int *dof_func_offd = NULL;
94 HYPRE_Real *row_sums = NULL;
95
96 /* MPI size and rank*/
97 hypre_MPI_Comm_size(comm, &num_procs);
98 hypre_MPI_Comm_rank(comm, &my_id);
99
100 if (num_procs > 1)
101 {
102 if (my_id == num_procs - 1)
103 {
104 total_global_cpts = num_cpts_global[1];
105 }
106 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
107 }
108 else
109 {
110 total_global_cpts = num_cpts_global[1];
111 }
112 /* Generate pass marker array */
113
114 pass_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
115 /* contains pass numbers for each variable according to original order */
116 pass_order = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
117 /* contains row numbers according to new order, pass 1 followed by pass 2 etc */
118 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
119 /* reverse of pass_order, keeps track where original numbers go */
120 points_left = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
121 /* contains row numbers of remaining points, auxiliary */
122 pass_starts = hypre_CTAlloc(HYPRE_Int, 10, HYPRE_MEMORY_HOST);
123 /* contains beginning for each pass in pass_order field, assume no more than 10 passes */
124
125 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
126 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
127
128 cnt = 0;
129 remaining = 0;
130 for (i=0; i < n_fine; i++)
131 {
132 if (CF_marker[i] == 1)
133 {
134 pass_marker[i] = 1;
135 P_diag_i[i+1] = 1;
136 P_offd_i[i+1] = 0;
137 fine_to_coarse[i] = cnt;
138 pass_order[cnt++] = i;
139 }
140 else
141 {
142 points_left[remaining++] = i;
143 }
144 }
145 pass_starts[0] = 0;
146 pass_starts[1] = cnt;
147
148 if (num_functions > 1)
149 {
150 dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_HOST);
151 index = 0;
152 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
153 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
154 for (i = 0; i < num_sends; i++)
155 {
156 startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
157 for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
158 {
159 int_buf_data[index++] = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
160 }
161 }
162
163 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, dof_func_offd);
164
165 hypre_ParCSRCommHandleDestroy(comm_handle);
166
167 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
168 }
169
170 if (num_procs > 1)
171 {
172 pass_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_HOST);
173 index = 0;
174 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
175 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
176 for (i = 0; i < num_sends; i++)
177 {
178 startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
179 for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
180 {
181 int_buf_data[index++] = pass_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
182 }
183 }
184
185 comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, pass_marker_offd);
186
187 hypre_ParCSRCommHandleDestroy(comm_handle);
188 }
189 current_pass = 1;
190 num_passes = 1;
191 /* color points according to pass number */
192 hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm);
193 while (global_remaining > 0)
194 {
195 HYPRE_Int remaining_pts = remaining;
196 cnt_rem = 0;
197 for (i=0; i < remaining_pts; i++)
198 {
199 i1 = points_left[i];
200 cnt_old = cnt;
201 for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
202 {
203 j1 = S_diag_j[j];
204 if (pass_marker[j1] == current_pass)
205 {
206 pass_marker[i1] = current_pass + 1;
207 pass_order[cnt++] = i1;
208 remaining--;
209 break;
210 }
211 }
212 if (cnt == cnt_old)
213 {
214 for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
215 {
216 j1 = S_offd_j[j];
217 if (pass_marker_offd[j1] == current_pass)
218 {
219 pass_marker[i1] = current_pass + 1;
220 pass_order[cnt++] = i1;
221 remaining--;
222 break;
223 }
224 }
225 }
226 if (cnt == cnt_old)
227 {
228 points_left[cnt_rem++] = i1;
229 }
230 }
231 remaining = cnt_rem;
232 current_pass++;
233 num_passes++;
234 if (num_passes > 9)
235 {
236 hypre_error_w_msg(HYPRE_ERROR_GENERIC," Warning!!! too many passes! out of range!\n");
237 break;
238 }
239 pass_starts[num_passes] = cnt;
240 /* update pass_marker_offd */
241 index = 0;
242 if (num_procs > 1)
243 {
244 for (i = 0; i < num_sends; i++)
245 {
246 startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
247 for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
248 {
249 int_buf_data[index++] = pass_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
250 }
251 }
252 comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, pass_marker_offd);
253
254 hypre_ParCSRCommHandleDestroy(comm_handle);
255 }
256 hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm);
257 }
258 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
259 hypre_TFree(points_left, HYPRE_MEMORY_HOST);
260
261 /* generate row sum of weak points and C-points to be ignored */
262
263 row_sums = hypre_CTAlloc(HYPRE_Real, n_fine, HYPRE_MEMORY_HOST);
264 if (num_functions > 1)
265 {
266 for (i=0; i < n_fine; i++)
267 {
268 if (CF_marker[i] < 0)
269 {
270 for (j = A_diag_i[i]+1; j < A_diag_i[i+1]; j++)
271 {
272 if (dof_func[i] == dof_func[A_diag_j[j]])
273 {
274 row_sums[i] += A_diag_data[j];
275 }
276 }
277 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
278 {
279 if (dof_func[i] == dof_func_offd[A_offd_j[j]])
280 {
281 row_sums[i] += A_offd_data[j];
282 }
283 }
284 }
285 }
286 }
287 else
288 {
289 for (i=0; i < n_fine; i++)
290 {
291 if (CF_marker[i] < 0)
292 {
293 for (j = A_diag_i[i]+1; j < A_diag_i[i+1]; j++)
294 {
295 row_sums[i] += A_diag_data[j];
296 }
297 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
298 {
299 row_sums[i] += A_offd_data[j];
300 }
301 }
302 }
303 }
304
305 Pi = hypre_CTAlloc(hypre_ParCSRMatrix*, num_passes, HYPRE_MEMORY_HOST);
306 hypre_GenerateMultipassPi(A, S, num_cpts_global, &pass_order[pass_starts[1]], pass_marker,
307 pass_marker_offd, pass_starts[2]-pass_starts[1], 1, row_sums, &Pi[0]);
308 if (interp_type == 8)
309 {
310 for (i=1; i<num_passes-1; i++)
311 {
312 hypre_ParCSRMatrix *Q;
313 HYPRE_BigInt *c_pts_starts = hypre_ParCSRMatrixRowStarts(Pi[i-1]);
314 hypre_GenerateMultipassPi(A, S, c_pts_starts, &pass_order[pass_starts[i+1]], pass_marker,
315 pass_marker_offd, pass_starts[i+2]-pass_starts[i+1], i+1, row_sums, &Q);
316 Pi[i] = hypre_ParMatmul(Q, Pi[i-1]);
317 hypre_ParCSRMatrixDestroy(Q);
318 }
319 }
320 else if (interp_type == 9)
321 {
322 for (i=1; i<num_passes-1; i++)
323 {
324 HYPRE_BigInt *c_pts_starts = hypre_ParCSRMatrixRowStarts(Pi[i-1]);
325 hypre_GenerateMultiPi(A, S, Pi[i-1], c_pts_starts, &pass_order[pass_starts[i+1]], pass_marker,
326 pass_marker_offd, pass_starts[i+2]-pass_starts[i+1], i+1,
327 num_functions, dof_func, dof_func_offd, &Pi[i]);
328 }
329 }
330
331 /* p pulate P_diag_i[i+1] with nnz of i-th row */
332 for (i = 0; i < num_passes-1; i++)
333 {
334 HYPRE_Int *Pi_diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(Pi[i]));
335 HYPRE_Int *Pi_offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(Pi[i]));
336 j1 = 0;
337 for (j=pass_starts[i+1]; j < pass_starts[i+2]; j++)
338 {
339 i1 = pass_order[j];
340 P_diag_i[i1+1] = Pi_diag_i[j1+1]-Pi_diag_i[j1];
341 P_offd_i[i1+1] = Pi_offd_i[j1+1]-Pi_offd_i[j1];
342 j1++;
343 }
344 }
345
346 for (i=0; i < n_fine; i++)
347 {
348 P_diag_i[i+1] += P_diag_i[i];
349 P_offd_i[i+1] += P_offd_i[i];
350 }
351
352 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_i[n_fine], HYPRE_MEMORY_HOST);
353 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_i[n_fine], HYPRE_MEMORY_HOST);
354 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_i[n_fine], HYPRE_MEMORY_HOST);
355 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_i[n_fine], HYPRE_MEMORY_HOST);
356
357 /* insert weights for coarse points */
358 for (i=0; i < pass_starts[1]; i++)
359 {
360 i1 = pass_order[i];
361 j = P_diag_i[i1];
362 P_diag_j[j] = fine_to_coarse[i1];
363 P_diag_data[j] = 1.0;
364 }
365
366 /* generate col_map_offd_P by combining all col_map_offd_Pi
367 * and reompute indices if needed */
368
369 /* insert remaining weights */
370 for (p=0; p < num_passes-1; p++)
371 {
372 HYPRE_Int *Pi_diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(Pi[p]));
373 HYPRE_Int *Pi_offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(Pi[p]));
374 HYPRE_Int *Pi_diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(Pi[p]));
375 HYPRE_Int *Pi_offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(Pi[p]));
376 HYPRE_Real *Pi_diag_data = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(Pi[p]));
377 HYPRE_Real *Pi_offd_data = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(Pi[p]));
378 j1 = 0;
379 for (i = pass_starts[p+1]; i < pass_starts[p+2]; i++)
380 {
381 i1 = pass_order[i];
382 i2 = Pi_diag_i[j1];
383 for (j = P_diag_i[i1]; j < P_diag_i[i1+1]; j++)
384 {
385 P_diag_j[j] = Pi_diag_j[i2];
386 P_diag_data[j] = Pi_diag_data[i2++];
387 }
388 i2 = Pi_offd_i[j1];
389 for (j = P_offd_i[i1]; j < P_offd_i[i1+1]; j++)
390 {
391 P_offd_j[j] = Pi_offd_j[i2];
392 P_offd_data[j] = Pi_offd_data[i2++];
393 }
394 j1++;
395 }
396 }
397 /* Note that col indices in P_offd_j probably not consistent,
398 this gets fixed after truncation */
399
400 P = hypre_ParCSRMatrixCreate(comm,
401 hypre_ParCSRMatrixGlobalNumRows(A),
402 total_global_cpts,
403 hypre_ParCSRMatrixRowStarts(A),
404 num_cpts_global,
405 num_cols_offd_P,
406 P_diag_i[n_fine],
407 P_offd_i[n_fine]);
408 P_diag = hypre_ParCSRMatrixDiag(P);
409 hypre_CSRMatrixData(P_diag) = P_diag_data;
410 hypre_CSRMatrixI(P_diag) = P_diag_i;
411 hypre_CSRMatrixJ(P_diag) = P_diag_j;
412 P_offd = hypre_ParCSRMatrixOffd(P);
413 hypre_CSRMatrixData(P_offd) = P_offd_data;
414 hypre_CSRMatrixI(P_offd) = P_offd_i;
415 hypre_CSRMatrixJ(P_offd) = P_offd_j;
416
417 /* Compress P, removing coefficients smaller than trunc_factor * Max */
418
419 if (trunc_factor != 0.0 || P_max_elmts > 0)
420 {
421 hypre_BoomerAMGInterpTruncation(P, trunc_factor, P_max_elmts);
422 P_diag_data = hypre_CSRMatrixData(P_diag);
423 P_diag_i = hypre_CSRMatrixI(P_diag);
424 P_diag_j = hypre_CSRMatrixJ(P_diag);
425 P_offd_data = hypre_CSRMatrixData(P_offd);
426 P_offd_i = hypre_CSRMatrixI(P_offd);
427 P_offd_j = hypre_CSRMatrixJ(P_offd);
428 }
429
430 num_cols_offd_P = 0;
431 P_offd_size = P_offd_i[n_fine];
432 if (P_offd_size)
433 {
434 HYPRE_BigInt *tmp_P_offd_j = hypre_CTAlloc(HYPRE_BigInt, P_offd_size, HYPRE_MEMORY_HOST);
435 HYPRE_BigInt *big_P_offd_j = hypre_CTAlloc(HYPRE_BigInt, P_offd_size, HYPRE_MEMORY_HOST);
436 for (p=0; p < num_passes-1; p++)
437 {
438 HYPRE_BigInt *col_map_offd_Pi = hypre_ParCSRMatrixColMapOffd(Pi[p]);
439 for (i = pass_starts[p+1]; i < pass_starts[p+2]; i++)
440 {
441 i1 = pass_order[i];
442 for (j = P_offd_i[i1]; j < P_offd_i[i1+1]; j++)
443 {
444 big_P_offd_j[j] = col_map_offd_Pi[P_offd_j[j]];
445 }
446 }
447 }
448
449 for (i=0; i < P_offd_size; i++)
450 {
451 tmp_P_offd_j[i] = big_P_offd_j[i];
452 }
453
454 hypre_BigQsort0(tmp_P_offd_j, 0, P_offd_size-1);
455
456 num_cols_offd_P = 1;
457 for (i=0; i < P_offd_size-1; i++)
458 {
459 if (tmp_P_offd_j[i+1] > tmp_P_offd_j[i])
460 {
461 tmp_P_offd_j[num_cols_offd_P++] = tmp_P_offd_j[i+1];
462 }
463 }
464
465 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_P, HYPRE_MEMORY_HOST);
466
467 for (i=0; i < num_cols_offd_P; i++)
468 {
469 col_map_offd_P[i] = tmp_P_offd_j[i];
470 }
471
472 for (i=0; i < P_offd_size; i++)
473 {
474 P_offd_j[i] = hypre_BigBinarySearch(col_map_offd_P,
475 big_P_offd_j[i],
476 num_cols_offd_P);
477 }
478 hypre_TFree(tmp_P_offd_j, HYPRE_MEMORY_HOST);
479 hypre_TFree(big_P_offd_j, HYPRE_MEMORY_HOST);
480 }
481
482 for (i=0; i < num_passes-1; i++)
483 {
484 hypre_ParCSRMatrixDestroy(Pi[i]);
485 }
486 hypre_TFree (Pi, HYPRE_MEMORY_HOST);
487 hypre_TFree (pass_marker, HYPRE_MEMORY_HOST);
488 hypre_TFree (pass_marker_offd, HYPRE_MEMORY_HOST);
489 hypre_TFree (pass_order, HYPRE_MEMORY_HOST);
490 hypre_TFree (pass_starts, HYPRE_MEMORY_HOST);
491 hypre_TFree (fine_to_coarse, HYPRE_MEMORY_HOST);
492 hypre_TFree (dof_func_offd, HYPRE_MEMORY_HOST);
493 hypre_TFree (row_sums, HYPRE_MEMORY_HOST);
494
495 for (i=0; i < n_fine; i++)
496 {
497 if (CF_marker[i] == -3) CF_marker[i] = -1;
498 }
499
500 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
501 hypre_CSRMatrixNumCols(P_offd) = num_cols_offd_P;
502
503 hypre_MatvecCommPkgCreate(P);
504
505 *P_ptr = P;
506
507 return hypre_error_flag;
508
509 }
510
511
512 HYPRE_Int
hypre_GenerateMultipassPi(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * S,HYPRE_BigInt * c_pts_starts,HYPRE_Int * pass_order,HYPRE_Int * pass_marker,HYPRE_Int * pass_marker_offd,HYPRE_Int num_points,HYPRE_Int color,HYPRE_Real * row_sums,hypre_ParCSRMatrix ** P_ptr)513 hypre_GenerateMultipassPi( hypre_ParCSRMatrix *A,
514 hypre_ParCSRMatrix *S,
515 HYPRE_BigInt *c_pts_starts,
516 HYPRE_Int *pass_order, /* array containing row numbers of rows in A and S to be considered */
517 HYPRE_Int *pass_marker,
518 HYPRE_Int *pass_marker_offd,
519 HYPRE_Int num_points,
520 HYPRE_Int color,
521 HYPRE_Real *row_sums,
522 hypre_ParCSRMatrix **P_ptr )
523 {
524 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
525 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
526 hypre_ParCSRCommHandle *comm_handle;
527
528 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
529 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
530 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
531 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
532 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
533
534 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
535 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
536 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
537 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
538 HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
539
540 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
541 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
542 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
543
544 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
545 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
546 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
547 HYPRE_BigInt *col_map_offd_P = NULL;
548 HYPRE_Int num_cols_offd_P;
549 HYPRE_Int nnz_diag, nnz_offd;
550 HYPRE_Int n_cpts, i, j, i1, j1, j2;
551 HYPRE_Int startc, index;
552 HYPRE_Int cpt, cnt_diag, cnt_offd;
553
554 hypre_ParCSRMatrix *P;
555 hypre_CSRMatrix *P_diag;
556 HYPRE_Real *P_diag_data;
557 HYPRE_Int *P_diag_i; /*at first counter of nonzero cols for each row,
558 finally will be pointer to start of row */
559 HYPRE_Int *P_diag_j;
560
561 hypre_CSRMatrix *P_offd;
562 HYPRE_Real *P_offd_data = NULL;
563 HYPRE_Int *P_offd_i; /*at first counter of nonzero cols for each row,
564 finally will be pointer to start of row */
565 HYPRE_Int *P_offd_j = NULL;
566 HYPRE_Int *fine_to_coarse;
567 HYPRE_Int *fine_to_coarse_offd = NULL;
568 HYPRE_BigInt *f_pts_starts = NULL;
569 HYPRE_Int my_id, num_procs;
570 HYPRE_BigInt total_global_fpts;
571 HYPRE_BigInt total_global_cpts;
572 HYPRE_BigInt *big_convert;
573 HYPRE_BigInt *big_convert_offd = NULL;
574 HYPRE_BigInt *big_buf_data = NULL;
575 HYPRE_Int num_sends;
576 HYPRE_Real *row_sum_C;
577
578 /* MPI size and rank*/
579 hypre_MPI_Comm_size(comm, &num_procs);
580 hypre_MPI_Comm_rank(comm, &my_id);
581
582 /* define P matrices */
583
584 P_diag_i = hypre_CTAlloc(HYPRE_Int, num_points+1, HYPRE_MEMORY_HOST);
585 P_offd_i = hypre_CTAlloc(HYPRE_Int, num_points+1, HYPRE_MEMORY_HOST);
586 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
587
588 /* fill P */
589
590 n_cpts = 0;
591 for (i=0; i < n_fine; i++)
592 {
593 if (pass_marker[i] == color)
594 {
595 fine_to_coarse[i] = n_cpts++;
596 }
597 else
598 {
599 fine_to_coarse[i] = -1;
600 }
601 }
602
603 if (num_procs > 1)
604 {
605 HYPRE_BigInt big_Fpts;
606 big_Fpts = num_points;
607
608 f_pts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
609 hypre_MPI_Scan(&big_Fpts, f_pts_starts+1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
610 f_pts_starts[0] = f_pts_starts[1] - big_Fpts;
611 if (my_id == num_procs - 1)
612 {
613 total_global_fpts = f_pts_starts[1];
614 total_global_cpts = c_pts_starts[1];
615 }
616 hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
617 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
618 }
619 else
620 {
621 f_pts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
622 f_pts_starts[0] = 0;
623 f_pts_starts[1] = num_points;
624 total_global_fpts = f_pts_starts[1];
625 total_global_cpts = c_pts_starts[1];
626 }
627
628 {
629 big_convert = hypre_CTAlloc(HYPRE_BigInt, n_fine, HYPRE_MEMORY_HOST);
630 for (i=0; i < n_fine; i++)
631 {
632 if (pass_marker[i] == color)
633 {
634 big_convert[i] = (HYPRE_BigInt)fine_to_coarse[i] + c_pts_starts[0];
635 }
636 }
637
638 num_cols_offd_P = 0;
639 if (num_procs > 1)
640 {
641 big_convert_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_A, HYPRE_MEMORY_HOST);
642 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_HOST);
643 index = 0;
644 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
645 big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
646 for (i = 0; i < num_sends; i++)
647 {
648 startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
649 for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
650 {
651 big_buf_data[index++] = big_convert[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
652 }
653 }
654
655 comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data, big_convert_offd);
656
657 hypre_ParCSRCommHandleDestroy(comm_handle);
658
659 num_cols_offd_P = 0;
660 for (i=0; i < num_cols_offd_A; i++)
661 {
662 if (pass_marker_offd[i] == color)
663 {
664 fine_to_coarse_offd[i] = num_cols_offd_P++;
665 }
666 }
667
668 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_P, HYPRE_MEMORY_HOST);
669
670 cpt = 0;
671 for (i=0; i < num_cols_offd_A; i++)
672 {
673 if (pass_marker_offd[i] == color)
674 {
675 col_map_offd_P[cpt++] = big_convert_offd[i];
676 }
677 }
678 }
679 }
680
681 /* generate P_diag_i and P_offd_i */
682 nnz_diag = 0;
683 nnz_offd = 0;
684 for (i=0; i < num_points; i++)
685 {
686 i1 = pass_order[i];
687 for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
688 {
689 j1 = S_diag_j[j];
690 if (pass_marker[j1] == color)
691 {
692 P_diag_i[i+1]++;
693 nnz_diag++;
694 }
695 }
696 for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
697 {
698 j1 = S_offd_j[j];
699 if (pass_marker_offd[j1] == color)
700 {
701 P_offd_i[i+1]++;
702 nnz_offd++;
703 }
704 }
705 }
706
707 for (i=1; i < num_points+1; i++)
708 {
709 P_diag_i[i] += P_diag_i[i-1];
710 P_offd_i[i] += P_offd_i[i-1];
711 }
712
713 P_diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag, HYPRE_MEMORY_HOST);
714 P_diag_data = hypre_CTAlloc(HYPRE_Real, nnz_diag, HYPRE_MEMORY_HOST);
715 P_offd_j = hypre_CTAlloc(HYPRE_Int, nnz_offd, HYPRE_MEMORY_HOST);
716 P_offd_data = hypre_CTAlloc(HYPRE_Real, nnz_offd, HYPRE_MEMORY_HOST);
717
718 cnt_diag = 0;
719 cnt_offd = 0;
720 for (i=0; i < num_points; i++)
721 {
722 i1 = pass_order[i];
723 j2 = A_diag_i[i1];
724 for (j = S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
725 {
726 j1 = S_diag_j[j];
727 while (A_diag_j[j2] != j1) j2++;
728 if (pass_marker[j1] == color && A_diag_j[j2] == j1)
729 {
730 P_diag_j[cnt_diag] = fine_to_coarse[j1];
731 P_diag_data[cnt_diag++] = A_diag_data[j2];
732 }
733 }
734 j2 = A_offd_i[i1];
735 for (j = S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
736 {
737 j1 = S_offd_j[j];
738 while (A_offd_j[j2] != j1) j2++;
739 if (pass_marker_offd[j1] == color && A_offd_j[j2] == j1)
740 {
741 P_offd_j[cnt_offd] = fine_to_coarse_offd[j1];
742 P_offd_data[cnt_offd++] = A_offd_data[j2];
743 }
744 }
745 }
746
747 //row_sums = hypre_CTAlloc(HYPRE_Real, num_points, HYPRE_MEMORY_HOST);
748 row_sum_C = hypre_CTAlloc(HYPRE_Real, num_points, HYPRE_MEMORY_HOST);
749 for (i=0; i < num_points; i++)
750 {
751 HYPRE_Real diagonal, value;
752 i1 = pass_order[i];
753 diagonal = A_diag_data[A_diag_i[i1]];
754 /*for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++)
755 {
756 row_sums[i] += A_diag_data[j];
757 }
758 for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++)
759 {
760 row_sums[i] += A_offd_data[j];
761 }*/
762 for (j = P_diag_i[i]; j < P_diag_i[i+1]; j++)
763 {
764 row_sum_C[i] += P_diag_data[j];
765 }
766 for (j = P_offd_i[i]; j < P_offd_i[i+1]; j++)
767 {
768 row_sum_C[i] += P_offd_data[j];
769 }
770 value = row_sum_C[i]*diagonal;
771 if (value != 0)
772 {
773 row_sums[i1] /= value;
774 }
775 for (j = P_diag_i[i]; j < P_diag_i[i+1]; j++)
776 {
777 P_diag_data[j] = -P_diag_data[j]*row_sums[i1];
778 }
779 for (j = P_offd_i[i]; j < P_offd_i[i+1]; j++)
780 {
781 P_offd_data[j] = -P_offd_data[j]*row_sums[i1];
782 }
783 }
784
785
786 P = hypre_ParCSRMatrixCreate(comm,
787 total_global_fpts,
788 total_global_cpts,
789 f_pts_starts,
790 c_pts_starts,
791 num_cols_offd_P,
792 P_diag_i[num_points],
793 P_offd_i[num_points]);
794
795 P_diag = hypre_ParCSRMatrixDiag(P);
796 hypre_CSRMatrixData(P_diag) = P_diag_data;
797 hypre_CSRMatrixI(P_diag) = P_diag_i;
798 hypre_CSRMatrixJ(P_diag) = P_diag_j;
799 P_offd = hypre_ParCSRMatrixOffd(P);
800 hypre_CSRMatrixData(P_offd) = P_offd_data;
801 hypre_CSRMatrixI(P_offd) = P_offd_i;
802 hypre_CSRMatrixJ(P_offd) = P_offd_j;
803 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
804
805 hypre_CSRMatrixMemoryLocation(P_diag) = HYPRE_MEMORY_HOST;
806 hypre_CSRMatrixMemoryLocation(P_offd) = HYPRE_MEMORY_HOST;
807
808 /* free stuff */
809 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
810 hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
811 //hypre_TFree(row_sums, HYPRE_MEMORY_HOST);
812 hypre_TFree(row_sum_C, HYPRE_MEMORY_HOST);
813 hypre_TFree(big_convert, HYPRE_MEMORY_HOST);
814 hypre_TFree(big_convert_offd, HYPRE_MEMORY_HOST);
815 hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
816 hypre_TFree(f_pts_starts, HYPRE_MEMORY_HOST);
817
818 hypre_MatvecCommPkgCreate(P);
819 *P_ptr = P;
820
821 return hypre_error_flag;
822 }
823
824 HYPRE_Int
hypre_GenerateMultiPi(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * S,hypre_ParCSRMatrix * P,HYPRE_BigInt * c_pts_starts,HYPRE_Int * pass_order,HYPRE_Int * pass_marker,HYPRE_Int * pass_marker_offd,HYPRE_Int num_points,HYPRE_Int color,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int * dof_func_offd,hypre_ParCSRMatrix ** Pi_ptr)825 hypre_GenerateMultiPi( hypre_ParCSRMatrix *A,
826 hypre_ParCSRMatrix *S,
827 hypre_ParCSRMatrix *P,
828 HYPRE_BigInt *c_pts_starts,
829 HYPRE_Int *pass_order, /* array containing row numbers of rows in A and S to be considered */
830 HYPRE_Int *pass_marker,
831 HYPRE_Int *pass_marker_offd,
832 HYPRE_Int num_points,
833 HYPRE_Int color,
834 HYPRE_Int num_functions,
835 HYPRE_Int *dof_func,
836 HYPRE_Int *dof_func_offd,
837 hypre_ParCSRMatrix **Pi_ptr )
838 {
839 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
840 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
841 hypre_ParCSRCommHandle *comm_handle;
842
843 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
844 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
845 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
846 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
847 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
848
849 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
850 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
851 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
852 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
853 HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
854
855 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
856 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
857 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
858
859 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
860 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
861 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
862 HYPRE_BigInt *col_map_offd_Q = NULL;
863 HYPRE_Int num_cols_offd_Q;
864
865 hypre_ParCSRMatrix *Pi;
866 hypre_CSRMatrix *Pi_diag;
867 HYPRE_Int *Pi_diag_i;
868 HYPRE_Real *Pi_diag_data;
869
870 hypre_CSRMatrix *Pi_offd;
871 HYPRE_Int *Pi_offd_i;
872 HYPRE_Real *Pi_offd_data;
873
874 HYPRE_Int nnz_diag, nnz_offd;
875 HYPRE_Int n_cpts, i, j, i1, j1, j2;
876 HYPRE_Int startc, index;
877 HYPRE_Int cpt, cnt_diag, cnt_offd;
878
879 hypre_ParCSRMatrix *Q;
880 hypre_CSRMatrix *Q_diag;
881 HYPRE_Real *Q_diag_data;
882 HYPRE_Int *Q_diag_i; /*at first counter of nonzero cols for each row,
883 finally will be pointer to start of row */
884 HYPRE_Int *Q_diag_j;
885
886 hypre_CSRMatrix *Q_offd;
887 HYPRE_Real *Q_offd_data = NULL;
888 HYPRE_Int *Q_offd_i; /*at first counter of nonzero cols for each row,
889 finally will be pointer to start of row */
890 HYPRE_Int *Q_offd_j = NULL;
891 HYPRE_Int *fine_to_coarse;
892 HYPRE_Int *fine_to_coarse_offd = NULL;
893 HYPRE_BigInt *f_pts_starts = NULL;
894 HYPRE_Int my_id, num_procs;
895 HYPRE_BigInt total_global_fpts;
896 HYPRE_BigInt total_global_cpts;
897 HYPRE_BigInt *big_convert;
898 HYPRE_BigInt *big_convert_offd = NULL;
899 HYPRE_BigInt *big_buf_data = NULL;
900 HYPRE_Int num_sends;
901 //HYPRE_Real *row_sums;
902 HYPRE_Real *row_sums_C;
903 HYPRE_Real *w_row_sum;
904
905 /* MPI size and rank*/
906 hypre_MPI_Comm_size(comm, &num_procs);
907 hypre_MPI_Comm_rank(comm, &my_id);
908
909 /* define P matrices */
910
911 Q_diag_i = hypre_CTAlloc(HYPRE_Int, num_points+1, HYPRE_MEMORY_HOST);
912 Q_offd_i = hypre_CTAlloc(HYPRE_Int, num_points+1, HYPRE_MEMORY_HOST);
913 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
914
915 /* fill P */
916
917 n_cpts = 0;
918 for (i=0; i < n_fine; i++)
919 {
920 if (pass_marker[i] == color)
921 {
922 fine_to_coarse[i] = n_cpts++;
923 }
924 else
925 {
926 fine_to_coarse[i] = -1;
927 }
928 }
929
930 if (num_procs > 1)
931 {
932 HYPRE_BigInt big_Fpts;
933 big_Fpts = num_points;
934
935 f_pts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
936 hypre_MPI_Scan(&big_Fpts, f_pts_starts+1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
937 f_pts_starts[0] = f_pts_starts[1] - big_Fpts;
938 if (my_id == num_procs - 1)
939 {
940 total_global_fpts = f_pts_starts[1];
941 total_global_cpts = c_pts_starts[1];
942 }
943 hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
944 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
945 }
946 else
947 {
948 f_pts_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
949 f_pts_starts[0] = 0;
950 f_pts_starts[1] = num_points;
951 total_global_fpts = f_pts_starts[1];
952 total_global_cpts = c_pts_starts[1];
953 }
954
955 {
956 big_convert = hypre_CTAlloc(HYPRE_BigInt, n_fine, HYPRE_MEMORY_HOST);
957 for (i=0; i < n_fine; i++)
958 {
959 if (pass_marker[i] == color)
960 {
961 big_convert[i] = (HYPRE_BigInt)fine_to_coarse[i] + c_pts_starts[0];
962 }
963 }
964
965 num_cols_offd_Q = 0;
966 if (num_procs > 1)
967 {
968 big_convert_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_A, HYPRE_MEMORY_HOST);
969 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_HOST);
970 index = 0;
971 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
972 big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
973 for (i = 0; i < num_sends; i++)
974 {
975 startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
976 for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
977 {
978 big_buf_data[index++] = big_convert[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
979 }
980 }
981
982 comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data, big_convert_offd);
983
984 hypre_ParCSRCommHandleDestroy(comm_handle);
985
986 num_cols_offd_Q = 0;
987 for (i=0; i < num_cols_offd_A; i++)
988 {
989 if (pass_marker_offd[i] == color)
990 {
991 fine_to_coarse_offd[i] = num_cols_offd_Q++;
992 }
993 }
994
995 col_map_offd_Q = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_Q, HYPRE_MEMORY_HOST);
996
997 cpt = 0;
998 for (i=0; i < num_cols_offd_A; i++)
999 {
1000 if (pass_marker_offd[i] == color)
1001 {
1002 col_map_offd_Q[cpt++] = big_convert_offd[i];
1003 }
1004 }
1005 }
1006 }
1007
1008 /* generate Q_diag_i and Q_offd_i */
1009 nnz_diag = 0;
1010 nnz_offd = 0;
1011 for (i=0; i < num_points; i++)
1012 {
1013 i1 = pass_order[i];
1014 for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
1015 {
1016 j1 = S_diag_j[j];
1017 if (pass_marker[j1] == color)
1018 {
1019 Q_diag_i[i+1]++;
1020 nnz_diag++;
1021 }
1022 }
1023 for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
1024 {
1025 j1 = S_offd_j[j];
1026 if (pass_marker_offd[j1] == color)
1027 {
1028 Q_offd_i[i+1]++;
1029 nnz_offd++;
1030 }
1031 }
1032 }
1033
1034 for (i=1; i < num_points+1; i++)
1035 {
1036 Q_diag_i[i] += Q_diag_i[i-1];
1037 Q_offd_i[i] += Q_offd_i[i-1];
1038 }
1039
1040 Q_diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag, HYPRE_MEMORY_HOST);
1041 Q_diag_data = hypre_CTAlloc(HYPRE_Real, nnz_diag, HYPRE_MEMORY_HOST);
1042 Q_offd_j = hypre_CTAlloc(HYPRE_Int, nnz_offd, HYPRE_MEMORY_HOST);
1043 Q_offd_data = hypre_CTAlloc(HYPRE_Real, nnz_offd, HYPRE_MEMORY_HOST);
1044 w_row_sum = hypre_CTAlloc(HYPRE_Real, num_points, HYPRE_MEMORY_HOST);
1045
1046 cnt_diag = 0;
1047 cnt_offd = 0;
1048 if (num_functions > 1)
1049 {
1050 for (i=0; i < num_points; i++)
1051 {
1052 i1 = pass_order[i];
1053 j2 = A_diag_i[i1]+1;
1054 //if (w_row_minus) w_row_sum[i] = -w_row_minus[i1];
1055 for (j = S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
1056 {
1057 j1 = S_diag_j[j];
1058 while (A_diag_j[j2] != j1)
1059 {
1060 if (dof_func[i1] == dof_func[A_diag_j[j2]])
1061 {
1062 w_row_sum[i] += A_diag_data[j2];
1063 }
1064 j2++;
1065 }
1066 if (pass_marker[j1] == color && A_diag_j[j2] == j1)
1067 {
1068 Q_diag_j[cnt_diag] = fine_to_coarse[j1];
1069 Q_diag_data[cnt_diag++] = A_diag_data[j2++];
1070 }
1071 else
1072 {
1073 if (dof_func[i1] == dof_func[A_diag_j[j2]])
1074 {
1075 w_row_sum[i] += A_diag_data[j2];
1076 }
1077 j2++;
1078 }
1079 }
1080 while (j2 < A_diag_i[i1+1])
1081 {
1082 if (dof_func[i1] == dof_func[A_diag_j[j2]])
1083 {
1084 w_row_sum[i] += A_diag_data[j2];
1085 }
1086 j2++;
1087 }
1088 j2 = A_offd_i[i1];
1089 for (j = S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
1090 {
1091 j1 = S_offd_j[j];
1092 while (A_offd_j[j2] != j1)
1093 {
1094 if (dof_func[i1] == dof_func_offd[A_offd_j[j2]])
1095 {
1096 w_row_sum[i] += A_offd_data[j2];
1097 }
1098 j2++;
1099 }
1100 if (pass_marker_offd[j1] == color && A_offd_j[j2] == j1)
1101 {
1102 Q_offd_j[cnt_offd] = fine_to_coarse_offd[j1];
1103 Q_offd_data[cnt_offd++] = A_offd_data[j2++];
1104 }
1105 else
1106 {
1107 if (dof_func[i1] == dof_func_offd[A_offd_j[j2]])
1108 {
1109 w_row_sum[i] += A_offd_data[j2];
1110 }
1111 j2++;
1112 }
1113 }
1114 while (j2 < A_offd_i[i1+1])
1115 {
1116 if (dof_func[i1] == dof_func_offd[A_offd_j[j2]])
1117 {
1118 w_row_sum[i] += A_offd_data[j2];
1119 }
1120 j2++;
1121 }
1122 }
1123 }
1124 else
1125 {
1126 for (i=0; i < num_points; i++)
1127 {
1128 i1 = pass_order[i];
1129 j2 = A_diag_i[i1]+1;
1130 for (j = S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
1131 {
1132 j1 = S_diag_j[j];
1133 while (A_diag_j[j2] != j1)
1134 {
1135 w_row_sum[i] += A_diag_data[j2];
1136 j2++;
1137 }
1138 if (pass_marker[j1] == color && A_diag_j[j2] == j1)
1139 {
1140 Q_diag_j[cnt_diag] = fine_to_coarse[j1];
1141 Q_diag_data[cnt_diag++] = A_diag_data[j2++];
1142 }
1143 else
1144 {
1145 w_row_sum[i] += A_diag_data[j2];
1146 j2++;
1147 }
1148 }
1149 while (j2 < A_diag_i[i1+1])
1150 {
1151 w_row_sum[i] += A_diag_data[j2];
1152 j2++;
1153 }
1154 j2 = A_offd_i[i1];
1155 for (j = S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
1156 {
1157 j1 = S_offd_j[j];
1158 while (A_offd_j[j2] != j1)
1159 {
1160 w_row_sum[i] += A_offd_data[j2];
1161 j2++;
1162 }
1163 if (pass_marker_offd[j1] == color && A_offd_j[j2] == j1)
1164 {
1165 Q_offd_j[cnt_offd] = fine_to_coarse_offd[j1];
1166 Q_offd_data[cnt_offd++] = A_offd_data[j2++];
1167 }
1168 else
1169 {
1170 w_row_sum[i] += A_offd_data[j2];
1171 j2++;
1172 }
1173 }
1174 while (j2 < A_offd_i[i1+1])
1175 {
1176 w_row_sum[i] += A_offd_data[j2];
1177 j2++;
1178 }
1179 }
1180 }
1181
1182 Q = hypre_ParCSRMatrixCreate(comm,
1183 total_global_fpts,
1184 total_global_cpts,
1185 f_pts_starts,
1186 c_pts_starts,
1187 num_cols_offd_Q,
1188 Q_diag_i[num_points],
1189 Q_offd_i[num_points]);
1190
1191 Q_diag = hypre_ParCSRMatrixDiag(Q);
1192 hypre_CSRMatrixData(Q_diag) = Q_diag_data;
1193 hypre_CSRMatrixI(Q_diag) = Q_diag_i;
1194 hypre_CSRMatrixJ(Q_diag) = Q_diag_j;
1195 Q_offd = hypre_ParCSRMatrixOffd(Q);
1196 hypre_CSRMatrixData(Q_offd) = Q_offd_data;
1197 hypre_CSRMatrixI(Q_offd) = Q_offd_i;
1198 hypre_CSRMatrixJ(Q_offd) = Q_offd_j;
1199 hypre_ParCSRMatrixColMapOffd(Q) = col_map_offd_Q;
1200
1201 hypre_CSRMatrixMemoryLocation(Q_diag) = HYPRE_MEMORY_HOST;
1202 hypre_CSRMatrixMemoryLocation(Q_offd) = HYPRE_MEMORY_HOST;
1203
1204 /* free stuff */
1205 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
1206 hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
1207 hypre_TFree(big_convert, HYPRE_MEMORY_HOST);
1208 hypre_TFree(big_convert_offd, HYPRE_MEMORY_HOST);
1209 hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
1210
1211 hypre_MatvecCommPkgCreate(Q);
1212
1213 Pi = hypre_ParMatmul(Q, P);
1214
1215 Pi_diag = hypre_ParCSRMatrixDiag(Pi);
1216 Pi_diag_data = hypre_CSRMatrixData(Pi_diag);
1217 Pi_diag_i = hypre_CSRMatrixI(Pi_diag);
1218 Pi_offd = hypre_ParCSRMatrixOffd(Pi);
1219 Pi_offd_data = hypre_CSRMatrixData(Pi_offd);
1220 Pi_offd_i = hypre_CSRMatrixI(Pi_offd);
1221
1222 row_sums_C = hypre_CTAlloc(HYPRE_Real, num_points, HYPRE_MEMORY_HOST);
1223 for (i=0; i < num_points; i++)
1224 {
1225 HYPRE_Real diagonal, value;
1226 i1 = pass_order[i];
1227 diagonal = A_diag_data[A_diag_i[i1]];
1228 for (j=Pi_diag_i[i]; j < Pi_diag_i[i+1]; j++)
1229 {
1230 row_sums_C[i] += Pi_diag_data[j];
1231 }
1232 for (j=Pi_offd_i[i]; j < Pi_offd_i[i+1]; j++)
1233 {
1234 row_sums_C[i] += Pi_offd_data[j];
1235 }
1236 value = row_sums_C[i]*diagonal;
1237 row_sums_C[i] += w_row_sum[i];
1238 if (value != 0)
1239 {
1240 row_sums_C[i] /= value;
1241 }
1242 for (j = Pi_diag_i[i]; j < Pi_diag_i[i+1]; j++)
1243 {
1244 Pi_diag_data[j] = -Pi_diag_data[j]*row_sums_C[i];
1245 }
1246 for (j = Pi_offd_i[i]; j < Pi_offd_i[i+1]; j++)
1247 {
1248 Pi_offd_data[j] = -Pi_offd_data[j]*row_sums_C[i];
1249 }
1250 }
1251
1252 hypre_ParCSRMatrixDestroy(Q);
1253 //hypre_TFree(row_sums, HYPRE_MEMORY_HOST);
1254 hypre_TFree(row_sums_C, HYPRE_MEMORY_HOST);
1255 hypre_TFree(w_row_sum, HYPRE_MEMORY_HOST);
1256 hypre_TFree(f_pts_starts, HYPRE_MEMORY_HOST);
1257
1258 *Pi_ptr = Pi;
1259
1260 return hypre_error_flag;
1261 }
1262
1263
1264 HYPRE_Int
hypre_BoomerAMGBuildModMultipass(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Real trunc_factor,HYPRE_Int P_max_elmts,HYPRE_Int interp_type,HYPRE_Int num_functions,HYPRE_Int * dof_func,hypre_ParCSRMatrix ** P_ptr)1265 hypre_BoomerAMGBuildModMultipass( hypre_ParCSRMatrix *A,
1266 HYPRE_Int *CF_marker,
1267 hypre_ParCSRMatrix *S,
1268 HYPRE_BigInt *num_cpts_global,
1269 HYPRE_Real trunc_factor,
1270 HYPRE_Int P_max_elmts,
1271 HYPRE_Int interp_type,
1272 HYPRE_Int num_functions,
1273 HYPRE_Int *dof_func,
1274 hypre_ParCSRMatrix **P_ptr )
1275 {
1276 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1277 hypre_GpuProfilingPushRange("ModMultipass");
1278 #endif
1279
1280 HYPRE_Int ierr = 0;
1281
1282 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1283 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_ParCSRMatrixMemoryLocation(A),
1284 hypre_ParCSRMatrixMemoryLocation(S) );
1285 if (exec == HYPRE_EXEC_DEVICE)
1286 {
1287 ierr = hypre_BoomerAMGBuildModMultipassDevice( A, CF_marker, S, num_cpts_global,
1288 trunc_factor, P_max_elmts,
1289 interp_type, num_functions,
1290 dof_func, P_ptr);
1291 }
1292 else
1293 #endif
1294 {
1295 ierr = hypre_BoomerAMGBuildModMultipassHost( A, CF_marker, S, num_cpts_global,
1296 trunc_factor, P_max_elmts,
1297 interp_type, num_functions,
1298 dof_func, P_ptr);
1299 }
1300
1301 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1302 hypre_GpuProfilingPopRange();
1303 #endif
1304
1305 return ierr;
1306 }
1307
1308