1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "_hypre_parcsr_ls.h"
9
10 /*---------------------------------------------------------------------------
11 * hypre_BoomerAMGBuildInterp
12 *--------------------------------------------------------------------------*/
13
14 HYPRE_Int
hypre_BoomerAMGBuildInterp(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRMatrix ** P_ptr)15 hypre_BoomerAMGBuildInterp( hypre_ParCSRMatrix *A,
16 HYPRE_Int *CF_marker,
17 hypre_ParCSRMatrix *S,
18 HYPRE_BigInt *num_cpts_global,
19 HYPRE_Int num_functions,
20 HYPRE_Int *dof_func,
21 HYPRE_Int debug_flag,
22 HYPRE_Real trunc_factor,
23 HYPRE_Int max_elmts,
24 hypre_ParCSRMatrix **P_ptr)
25 {
26 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
27 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
28 hypre_ParCSRCommHandle *comm_handle;
29
30 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
31 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
32 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
33 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
34
35 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
36 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
37 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
38 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
39 HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
40 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
41
42 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
43 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
44 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
45
46 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
47 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
48 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
49
50 hypre_ParCSRMatrix *P;
51 HYPRE_BigInt *col_map_offd_P;
52 HYPRE_Int *tmp_map_offd = NULL;
53
54 HYPRE_Int *CF_marker_offd = NULL;
55 HYPRE_Int *dof_func_offd = NULL;
56
57 hypre_CSRMatrix *A_ext;
58
59 HYPRE_Real *A_ext_data = NULL;
60 HYPRE_Int *A_ext_i = NULL;
61 HYPRE_BigInt *A_ext_j = NULL;
62
63 hypre_CSRMatrix *P_diag;
64 hypre_CSRMatrix *P_offd;
65
66 HYPRE_Real *P_diag_data;
67 HYPRE_Int *P_diag_i;
68 HYPRE_Int *P_diag_j;
69 HYPRE_Real *P_offd_data;
70 HYPRE_Int *P_offd_i;
71 HYPRE_Int *P_offd_j;
72
73 HYPRE_Int P_diag_size, P_offd_size;
74
75 HYPRE_Int *P_marker, *P_marker_offd;
76
77 HYPRE_Int jj_counter,jj_counter_offd;
78 HYPRE_Int *jj_count, *jj_count_offd;
79 HYPRE_Int jj_begin_row,jj_begin_row_offd;
80 HYPRE_Int jj_end_row,jj_end_row_offd;
81
82 HYPRE_Int start_indexing = 0; /* start indexing for P_data at 0 */
83
84 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
85
86 HYPRE_Int strong_f_marker;
87
88 HYPRE_Int *fine_to_coarse;
89 //HYPRE_Int *fine_to_coarse_offd;
90 HYPRE_Int *coarse_counter;
91 HYPRE_Int coarse_shift;
92 HYPRE_BigInt total_global_cpts;
93 //HYPRE_BigInt my_first_cpt;
94 HYPRE_Int num_cols_P_offd;
95
96 HYPRE_Int i,i1,i2;
97 HYPRE_Int j,jl,jj,jj1;
98 HYPRE_Int kc;
99 HYPRE_BigInt big_k;
100 HYPRE_Int start;
101 HYPRE_Int sgn;
102 HYPRE_Int c_num;
103
104 HYPRE_Real diagonal;
105 HYPRE_Real sum;
106 HYPRE_Real distribute;
107
108 HYPRE_Real zero = 0.0;
109 HYPRE_Real one = 1.0;
110
111 HYPRE_Int my_id;
112 HYPRE_Int num_procs;
113 HYPRE_Int num_threads;
114 HYPRE_Int num_sends;
115 HYPRE_Int index;
116 HYPRE_Int ns, ne, size, rest;
117 HYPRE_Int print_level = 0;
118 HYPRE_Int *int_buf_data;
119
120 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
121 HYPRE_Int local_numrows = hypre_CSRMatrixNumRows(A_diag);
122 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt)local_numrows;
123
124 HYPRE_Real wall_time; /* for debugging instrumentation */
125
126 hypre_MPI_Comm_size(comm, &num_procs);
127 hypre_MPI_Comm_rank(comm,&my_id);
128 num_threads = hypre_NumThreads();
129
130 //my_first_cpt = num_cpts_global[0];
131 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
132 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
133
134 /*-------------------------------------------------------------------
135 * Get the CF_marker data for the off-processor columns
136 *-------------------------------------------------------------------*/
137
138 if (debug_flag < 0)
139 {
140 debug_flag = -debug_flag;
141 print_level = 1;
142 }
143
144 if (debug_flag==4) wall_time = time_getWallclockSeconds();
145
146 if (num_cols_A_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
147 if (num_functions > 1 && num_cols_A_offd)
148 {
149 dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
150 }
151
152 if (!comm_pkg)
153 {
154 hypre_MatvecCommPkgCreate(A);
155 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
156 }
157
158 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
159 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
160 HYPRE_MEMORY_HOST);
161
162 index = 0;
163 for (i = 0; i < num_sends; i++)
164 {
165 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
166 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
167 {
168 int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
169 }
170 }
171
172 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, CF_marker_offd);
173
174 hypre_ParCSRCommHandleDestroy(comm_handle);
175 if (num_functions > 1)
176 {
177 index = 0;
178 for (i = 0; i < num_sends; i++)
179 {
180 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
181 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
182 int_buf_data[index++] = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
183 }
184
185 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, dof_func_offd);
186
187 hypre_ParCSRCommHandleDestroy(comm_handle);
188 }
189
190 if (debug_flag==4)
191 {
192 wall_time = time_getWallclockSeconds() - wall_time;
193 hypre_printf("Proc = %d Interp: Comm 1 CF_marker = %f\n",
194 my_id, wall_time);
195 fflush(NULL);
196 }
197
198 /*----------------------------------------------------------------------
199 * Get the ghost rows of A
200 *---------------------------------------------------------------------*/
201
202 if (debug_flag==4) wall_time = time_getWallclockSeconds();
203
204 if (num_procs > 1)
205 {
206 A_ext = hypre_ParCSRMatrixExtractBExt(A,A,1);
207 A_ext_i = hypre_CSRMatrixI(A_ext);
208 A_ext_j = hypre_CSRMatrixBigJ(A_ext);
209 A_ext_data = hypre_CSRMatrixData(A_ext);
210 }
211
212 index = 0;
213 for (i=0; i < num_cols_A_offd; i++)
214 {
215 for (j=A_ext_i[i]; j < A_ext_i[i+1]; j++)
216 {
217 big_k = A_ext_j[j];
218 if (big_k >= col_1 && big_k < col_n)
219 {
220 A_ext_j[index] = big_k - col_1;
221 A_ext_data[index++] = A_ext_data[j];
222 }
223 else
224 {
225 kc = hypre_BigBinarySearch(col_map_offd,big_k,num_cols_A_offd);
226 if (kc > -1)
227 {
228 A_ext_j[index] = (HYPRE_BigInt)(-kc-1);
229 A_ext_data[index++] = A_ext_data[j];
230 }
231 }
232 }
233 A_ext_i[i] = index;
234 }
235 for (i = num_cols_A_offd; i > 0; i--)
236 A_ext_i[i] = A_ext_i[i-1];
237 if (num_procs > 1) A_ext_i[0] = 0;
238
239 if (debug_flag==4)
240 {
241 wall_time = time_getWallclockSeconds() - wall_time;
242 hypre_printf("Proc = %d Interp: Comm 2 Get A_ext = %f\n",
243 my_id, wall_time);
244 fflush(NULL);
245 }
246
247
248 /*-----------------------------------------------------------------------
249 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
250 *-----------------------------------------------------------------------*/
251
252 /*-----------------------------------------------------------------------
253 * Intialize counters and allocate mapping vector.
254 *-----------------------------------------------------------------------*/
255
256 coarse_counter = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
257 jj_count = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
258 jj_count_offd = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
259
260 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
261 #ifdef HYPRE_USING_OPENMP
262 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
263 #endif
264 for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1;
265
266 jj_counter = start_indexing;
267 jj_counter_offd = start_indexing;
268
269 /*-----------------------------------------------------------------------
270 * Loop over fine grid.
271 *-----------------------------------------------------------------------*/
272
273 /* RDF: this looks a little tricky, but doable */
274 #ifdef HYPRE_USING_OPENMP
275 #pragma omp parallel for private(i,j,i1,jj,ns,ne,size,rest) HYPRE_SMP_SCHEDULE
276 #endif
277 for (j = 0; j < num_threads; j++)
278 {
279 size = n_fine/num_threads;
280 rest = n_fine - size*num_threads;
281 if (j < rest)
282 {
283 ns = j*size+j;
284 ne = (j+1)*size+j+1;
285 }
286 else
287 {
288 ns = j*size+rest;
289 ne = (j+1)*size+rest;
290 }
291 for (i = ns; i < ne; i++)
292 {
293
294 /*--------------------------------------------------------------------
295 * If i is a C-point, interpolation is the identity. Also set up
296 * mapping vector.
297 *--------------------------------------------------------------------*/
298
299 if (CF_marker[i] >= 0)
300 {
301 jj_count[j]++;
302 fine_to_coarse[i] = coarse_counter[j];
303 coarse_counter[j]++;
304 }
305
306 /*--------------------------------------------------------------------
307 * If i is an F-point, interpolation is from the C-points that
308 * strongly influence i.
309 *--------------------------------------------------------------------*/
310
311 else
312 {
313 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
314 {
315 i1 = S_diag_j[jj];
316 if (CF_marker[i1] >= 0)
317 {
318 jj_count[j]++;
319 }
320 }
321
322 if (num_procs > 1)
323 {
324 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
325 {
326 i1 = S_offd_j[jj];
327 if (CF_marker_offd[i1] >= 0)
328 {
329 jj_count_offd[j]++;
330 }
331 }
332 }
333 }
334 }
335 }
336
337 /*-----------------------------------------------------------------------
338 * Allocate arrays.
339 *-----------------------------------------------------------------------*/
340
341 for (i=0; i < num_threads-1; i++)
342 {
343 coarse_counter[i+1] += coarse_counter[i];
344 jj_count[i+1] += jj_count[i];
345 jj_count_offd[i+1] += jj_count_offd[i];
346 }
347 i = num_threads-1;
348 jj_counter = jj_count[i];
349 jj_counter_offd = jj_count_offd[i];
350
351 P_diag_size = jj_counter;
352
353 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_DEVICE);
354 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_DEVICE);
355 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, HYPRE_MEMORY_DEVICE);
356
357 P_diag_i[n_fine] = jj_counter;
358
359
360 P_offd_size = jj_counter_offd;
361
362 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_DEVICE);
363 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_DEVICE);
364 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, HYPRE_MEMORY_DEVICE);
365
366 /*-----------------------------------------------------------------------
367 * Intialize some stuff.
368 *-----------------------------------------------------------------------*/
369
370 jj_counter = start_indexing;
371 jj_counter_offd = start_indexing;
372
373 if (debug_flag==4)
374 {
375 wall_time = time_getWallclockSeconds() - wall_time;
376 hypre_printf("Proc = %d Interp: Internal work 1 = %f\n",
377 my_id, wall_time);
378 fflush(NULL);
379 }
380
381 /*-----------------------------------------------------------------------
382 * Send and receive fine_to_coarse info.
383 *-----------------------------------------------------------------------*/
384
385 if (debug_flag==4) wall_time = time_getWallclockSeconds();
386
387 //fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
388
389 #ifdef HYPRE_USING_OPENMP
390 #pragma omp parallel for private(i,j,ns,ne,size,rest,coarse_shift) HYPRE_SMP_SCHEDULE
391 #endif
392 for (j = 0; j < num_threads; j++)
393 {
394 coarse_shift = 0;
395 if (j > 0) coarse_shift = coarse_counter[j-1];
396 size = n_fine/num_threads;
397 rest = n_fine - size*num_threads;
398 if (j < rest)
399 {
400 ns = j*size+j;
401 ne = (j+1)*size+j+1;
402 }
403 else
404 {
405 ns = j*size+rest;
406 ne = (j+1)*size+rest;
407 }
408 for (i = ns; i < ne; i++)
409 {
410 fine_to_coarse[i] += coarse_shift;
411 }
412 //fine_to_coarse[i] += my_first_cpt+coarse_shift;
413 }
414 /*index = 0;
415 for (i = 0; i < num_sends; i++)
416 {
417 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
418 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
419 int_buf_data[index++]
420 = fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
421 }
422
423 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, fine_to_coarse_offd);
424
425 hypre_ParCSRCommHandleDestroy(comm_handle);
426
427 if (debug_flag==4)
428 {
429 wall_time = time_getWallclockSeconds() - wall_time;
430 hypre_printf("Proc = %d Interp: Comm 4 FineToCoarse = %f\n",
431 my_id, wall_time);
432 fflush(NULL);
433 }*/
434
435 if (debug_flag==4) wall_time = time_getWallclockSeconds();
436
437 /*#ifdef HYPRE_USING_OPENMP
438 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
439 #endif
440 for (i = 0; i < n_fine; i++) fine_to_coarse[i] -= my_first_cpt; */
441
442 /*-----------------------------------------------------------------------
443 * Loop over fine grid points.
444 *-----------------------------------------------------------------------*/
445
446 #ifdef HYPRE_USING_OPENMP
447 #pragma omp parallel for private(i,j,jl,i1,i2,jj,jj1,ns,ne,size,rest,sum,diagonal,distribute,P_marker,P_marker_offd,strong_f_marker,jj_counter,jj_counter_offd,sgn,c_num,jj_begin_row,jj_end_row,jj_begin_row_offd,jj_end_row_offd) HYPRE_SMP_SCHEDULE
448 #endif
449 for (jl = 0; jl < num_threads; jl++)
450 {
451 size = n_fine/num_threads;
452 rest = n_fine - size*num_threads;
453 if (jl < rest)
454 {
455 ns = jl*size+jl;
456 ne = (jl+1)*size+jl+1;
457 }
458 else
459 {
460 ns = jl*size+rest;
461 ne = (jl+1)*size+rest;
462 }
463 jj_counter = 0;
464 if (jl > 0) jj_counter = jj_count[jl-1];
465 jj_counter_offd = 0;
466 if (jl > 0) jj_counter_offd = jj_count_offd[jl-1];
467
468 P_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
469 if (num_cols_A_offd)
470 P_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
471 else
472 P_marker_offd = NULL;
473
474 for (i = 0; i < n_fine; i++)
475 {
476 P_marker[i] = -1;
477 }
478 for (i = 0; i < num_cols_A_offd; i++)
479 {
480 P_marker_offd[i] = -1;
481 }
482 strong_f_marker = -2;
483
484 for (i = ns; i < ne; i++)
485 {
486
487 /*--------------------------------------------------------------------
488 * If i is a c-point, interpolation is the identity.
489 *--------------------------------------------------------------------*/
490
491 if (CF_marker[i] >= 0)
492 {
493 P_diag_i[i] = jj_counter;
494 P_diag_j[jj_counter] = fine_to_coarse[i];
495 P_diag_data[jj_counter] = one;
496 jj_counter++;
497 }
498
499 /*--------------------------------------------------------------------
500 * If i is an F-point, build interpolation.
501 *--------------------------------------------------------------------*/
502
503 else
504 {
505 /* Diagonal part of P */
506 P_diag_i[i] = jj_counter;
507 jj_begin_row = jj_counter;
508
509 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
510 {
511 i1 = S_diag_j[jj];
512
513 /*--------------------------------------------------------------
514 * If neighbor i1 is a C-point, set column number in P_diag_j
515 * and initialize interpolation weight to zero.
516 *--------------------------------------------------------------*/
517
518 if (CF_marker[i1] >= 0)
519 {
520 P_marker[i1] = jj_counter;
521 P_diag_j[jj_counter] = fine_to_coarse[i1];
522 P_diag_data[jj_counter] = zero;
523 jj_counter++;
524 }
525
526 /*--------------------------------------------------------------
527 * If neighbor i1 is an F-point, mark it as a strong F-point
528 * whose connection needs to be distributed.
529 *--------------------------------------------------------------*/
530
531 else if (CF_marker[i1] != -3)
532 {
533 P_marker[i1] = strong_f_marker;
534 }
535 }
536 jj_end_row = jj_counter;
537
538 /* Off-Diagonal part of P */
539 P_offd_i[i] = jj_counter_offd;
540 jj_begin_row_offd = jj_counter_offd;
541
542
543 if (num_procs > 1)
544 {
545 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
546 {
547 i1 = S_offd_j[jj];
548
549 /*-----------------------------------------------------------
550 * If neighbor i1 is a C-point, set column number in P_offd_j
551 * and initialize interpolation weight to zero.
552 *-----------------------------------------------------------*/
553
554 if (CF_marker_offd[i1] >= 0)
555 {
556 P_marker_offd[i1] = jj_counter_offd;
557 /*P_offd_j[jj_counter_offd] = fine_to_coarse_offd[i1];*/
558 P_offd_j[jj_counter_offd] = i1;
559 P_offd_data[jj_counter_offd] = zero;
560 jj_counter_offd++;
561 }
562
563 /*-----------------------------------------------------------
564 * If neighbor i1 is an F-point, mark it as a strong F-point
565 * whose connection needs to be distributed.
566 *-----------------------------------------------------------*/
567
568 else if (CF_marker_offd[i1] != -3)
569 {
570 P_marker_offd[i1] = strong_f_marker;
571 }
572 }
573 }
574
575 jj_end_row_offd = jj_counter_offd;
576
577 diagonal = A_diag_data[A_diag_i[i]];
578
579
580 /* Loop over ith row of A. First, the diagonal part of A */
581
582 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
583 {
584 i1 = A_diag_j[jj];
585
586 /*--------------------------------------------------------------
587 * Case 1: neighbor i1 is a C-point and strongly influences i,
588 * accumulate a_{i,i1} into the interpolation weight.
589 *--------------------------------------------------------------*/
590
591 if (P_marker[i1] >= jj_begin_row)
592 {
593 P_diag_data[P_marker[i1]] += A_diag_data[jj];
594 }
595
596 /*--------------------------------------------------------------
597 * Case 2: neighbor i1 is an F-point and strongly influences i,
598 * distribute a_{i,i1} to C-points that strongly infuence i.
599 * Note: currently no distribution to the diagonal in this case.
600 *--------------------------------------------------------------*/
601
602 else if (P_marker[i1] == strong_f_marker)
603 {
604 sum = zero;
605
606 /*-----------------------------------------------------------
607 * Loop over row of A for point i1 and calculate the sum
608 * of the connections to c-points that strongly influence i.
609 *-----------------------------------------------------------*/
610 sgn = 1;
611 if (A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
612 /* Diagonal block part of row i1 */
613 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
614 {
615 i2 = A_diag_j[jj1];
616 if (P_marker[i2] >= jj_begin_row &&
617 (sgn*A_diag_data[jj1]) < 0)
618 {
619 sum += A_diag_data[jj1];
620 }
621 }
622
623 /* Off-Diagonal block part of row i1 */
624 if (num_procs > 1)
625 {
626 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
627 {
628 i2 = A_offd_j[jj1];
629 if (P_marker_offd[i2] >= jj_begin_row_offd
630 && (sgn*A_offd_data[jj1]) < 0)
631 {
632 sum += A_offd_data[jj1];
633 }
634 }
635 }
636
637 if (sum != 0)
638 {
639 distribute = A_diag_data[jj] / sum;
640
641 /*-----------------------------------------------------------
642 * Loop over row of A for point i1 and do the distribution.
643 *-----------------------------------------------------------*/
644
645 /* Diagonal block part of row i1 */
646 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
647 {
648 i2 = A_diag_j[jj1];
649 if (P_marker[i2] >= jj_begin_row
650 && (sgn*A_diag_data[jj1]) < 0)
651 {
652 P_diag_data[P_marker[i2]]
653 += distribute * A_diag_data[jj1];
654 }
655 }
656
657 /* Off-Diagonal block part of row i1 */
658 if (num_procs > 1)
659 {
660 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
661 {
662 i2 = A_offd_j[jj1];
663 if (P_marker_offd[i2] >= jj_begin_row_offd
664 && (sgn*A_offd_data[jj1]) < 0)
665 {
666 P_offd_data[P_marker_offd[i2]]
667 += distribute * A_offd_data[jj1];
668 }
669 }
670 }
671 }
672 else
673 {
674 if (num_functions == 1 || dof_func[i] == dof_func[i1])
675 {
676 diagonal += A_diag_data[jj];
677 }
678 }
679 }
680
681 /*--------------------------------------------------------------
682 * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
683 * into the diagonal.
684 *--------------------------------------------------------------*/
685
686 else if (CF_marker[i1] != -3)
687 {
688 if (num_functions == 1 || dof_func[i] == dof_func[i1])
689 {
690 diagonal += A_diag_data[jj];
691 }
692 }
693
694 }
695
696
697 /*----------------------------------------------------------------
698 * Still looping over ith row of A. Next, loop over the
699 * off-diagonal part of A
700 *---------------------------------------------------------------*/
701
702 if (num_procs > 1)
703 {
704 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
705 {
706 i1 = A_offd_j[jj];
707
708 /*--------------------------------------------------------------
709 * Case 1: neighbor i1 is a C-point and strongly influences i,
710 * accumulate a_{i,i1} into the interpolation weight.
711 *--------------------------------------------------------------*/
712
713 if (P_marker_offd[i1] >= jj_begin_row_offd)
714 {
715 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
716 }
717
718 /*------------------------------------------------------------
719 * Case 2: neighbor i1 is an F-point and strongly influences i,
720 * distribute a_{i,i1} to C-points that strongly infuence i.
721 * Note: currently no distribution to the diagonal in this case.
722 *-----------------------------------------------------------*/
723
724 else if (P_marker_offd[i1] == strong_f_marker)
725 {
726 sum = zero;
727
728 /*---------------------------------------------------------
729 * Loop over row of A_ext for point i1 and calculate the sum
730 * of the connections to c-points that strongly influence i.
731 *---------------------------------------------------------*/
732
733 /* find row number */
734 c_num = A_offd_j[jj];
735
736 sgn = 1;
737 if (A_ext_data[A_ext_i[c_num]] < 0) sgn = -1;
738 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
739 {
740 i2 = (HYPRE_Int)A_ext_j[jj1];
741
742 if (i2 > -1)
743 {
744 /* in the diagonal block */
745 if (P_marker[i2] >= jj_begin_row
746 && (sgn*A_ext_data[jj1]) < 0)
747 {
748 sum += A_ext_data[jj1];
749 }
750 }
751 else
752 {
753 /* in the off_diagonal block */
754 if (P_marker_offd[-i2-1] >= jj_begin_row_offd
755 && (sgn*A_ext_data[jj1]) < 0)
756 {
757 sum += A_ext_data[jj1];
758 }
759
760 }
761
762 }
763
764 if (sum != 0)
765 {
766 distribute = A_offd_data[jj] / sum;
767 /*---------------------------------------------------------
768 * Loop over row of A_ext for point i1 and do
769 * the distribution.
770 *--------------------------------------------------------*/
771
772 /* Diagonal block part of row i1 */
773
774 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
775 {
776 i2 = (HYPRE_Int)A_ext_j[jj1];
777
778 if (i2 > -1) /* in the diagonal block */
779 {
780 if (P_marker[i2] >= jj_begin_row
781 && (sgn*A_ext_data[jj1]) < 0)
782 {
783 P_diag_data[P_marker[i2]]
784 += distribute * A_ext_data[jj1];
785 }
786 }
787 else
788 {
789 /* in the off_diagonal block */
790 if (P_marker_offd[-i2-1] >= jj_begin_row_offd
791 && (sgn*A_ext_data[jj1]) < 0)
792 P_offd_data[P_marker_offd[-i2-1]]
793 += distribute * A_ext_data[jj1];
794 }
795 }
796 }
797 else
798 {
799 if (num_functions == 1 || dof_func[i] == dof_func_offd[i1])
800 {
801 diagonal += A_offd_data[jj];
802 }
803 }
804 }
805
806 /*-----------------------------------------------------------
807 * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
808 * into the diagonal.
809 *-----------------------------------------------------------*/
810
811 else if (CF_marker_offd[i1] != -3)
812 {
813 if (num_functions == 1 || dof_func[i] == dof_func_offd[i1])
814 {
815 diagonal += A_offd_data[jj];
816 }
817 }
818
819 }
820 }
821
822 /*-----------------------------------------------------------------
823 * Set interpolation weight by dividing by the diagonal.
824 *-----------------------------------------------------------------*/
825
826 if (diagonal == 0.0)
827 {
828 if (print_level)
829 {
830 hypre_printf(" Warning! zero diagonal! Proc id %d row %d\n", my_id,i);
831 }
832 for (jj = jj_begin_row; jj < jj_end_row; jj++)
833 {
834 P_diag_data[jj] = 0.0;
835 }
836 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
837 {
838 P_offd_data[jj] = 0.0;
839 }
840 }
841 else
842 {
843 for (jj = jj_begin_row; jj < jj_end_row; jj++)
844 {
845 P_diag_data[jj] /= -diagonal;
846 }
847 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
848 {
849 P_offd_data[jj] /= -diagonal;
850 }
851 }
852
853 }
854
855 strong_f_marker--;
856
857 P_offd_i[i+1] = jj_counter_offd;
858 }
859 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
860 hypre_TFree(P_marker_offd, HYPRE_MEMORY_HOST);
861 }
862
863 P = hypre_ParCSRMatrixCreate(comm,
864 hypre_ParCSRMatrixGlobalNumRows(A),
865 total_global_cpts,
866 hypre_ParCSRMatrixColStarts(A),
867 num_cpts_global,
868 0,
869 P_diag_i[n_fine],
870 P_offd_i[n_fine]);
871
872 P_diag = hypre_ParCSRMatrixDiag(P);
873 hypre_CSRMatrixData(P_diag) = P_diag_data;
874 hypre_CSRMatrixI(P_diag) = P_diag_i;
875 hypre_CSRMatrixJ(P_diag) = P_diag_j;
876 P_offd = hypre_ParCSRMatrixOffd(P);
877 hypre_CSRMatrixData(P_offd) = P_offd_data;
878 hypre_CSRMatrixI(P_offd) = P_offd_i;
879 hypre_CSRMatrixJ(P_offd) = P_offd_j;
880
881 /* Compress P, removing coefficients smaller than trunc_factor * Max */
882
883 if (trunc_factor != 0.0 || max_elmts > 0)
884 {
885 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
886 P_diag_data = hypre_CSRMatrixData(P_diag);
887 P_diag_i = hypre_CSRMatrixI(P_diag);
888 P_diag_j = hypre_CSRMatrixJ(P_diag);
889 P_offd_data = hypre_CSRMatrixData(P_offd);
890 P_offd_i = hypre_CSRMatrixI(P_offd);
891 P_offd_j = hypre_CSRMatrixJ(P_offd);
892 P_diag_size = P_diag_i[n_fine];
893 P_offd_size = P_offd_i[n_fine];
894 }
895
896 num_cols_P_offd = 0;
897 if (P_offd_size)
898 {
899 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
900
901 #ifdef HYPRE_USING_OPENMP
902 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
903 #endif
904 for (i=0; i < num_cols_A_offd; i++)
905 {
906 P_marker[i] = 0;
907 }
908
909 num_cols_P_offd = 0;
910 for (i=0; i < P_offd_size; i++)
911 {
912 index = P_offd_j[i];
913 if (!P_marker[index])
914 {
915 num_cols_P_offd++;
916 P_marker[index] = 1;
917 }
918 }
919
920 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
921 tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
922
923 index = 0;
924 for (i=0; i < num_cols_P_offd; i++)
925 {
926 while (P_marker[index]==0) index++;
927 tmp_map_offd[i] = index++;
928 }
929
930 #ifdef HYPRE_USING_OPENMP
931 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
932 #endif
933 for (i=0; i < P_offd_size; i++)
934 P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
935 P_offd_j[i],
936 num_cols_P_offd);
937 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
938 }
939
940 for (i=0; i < n_fine; i++)
941 {
942 if (CF_marker[i] == -3) CF_marker[i] = -1;
943 }
944
945 if (num_cols_P_offd)
946 {
947 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
948 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
949 }
950
951 hypre_GetCommPkgRTFromCommPkgA(P,A, fine_to_coarse, tmp_map_offd);
952
953
954 *P_ptr = P;
955
956 hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
957 hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
958 hypre_TFree(dof_func_offd, HYPRE_MEMORY_HOST);
959 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
960 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
961 //hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
962 hypre_TFree(coarse_counter, HYPRE_MEMORY_HOST);
963 hypre_TFree(jj_count, HYPRE_MEMORY_HOST);
964 hypre_TFree(jj_count_offd, HYPRE_MEMORY_HOST);
965
966 if (num_procs > 1) hypre_CSRMatrixDestroy(A_ext);
967
968 return hypre_error_flag;
969 }
970
971
972 /*---------------------------------------------------------------------------
973 * hypre_BoomerAMGBuildInterpHE
974 * interpolation routine for hyperbolic PDEs
975 * treats weak fine connections like strong fine connections
976 *--------------------------------------------------------------------------*/
977
978 HYPRE_Int
hypre_BoomerAMGBuildInterpHE(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRMatrix ** P_ptr)979 hypre_BoomerAMGBuildInterpHE( hypre_ParCSRMatrix *A,
980 HYPRE_Int *CF_marker,
981 hypre_ParCSRMatrix *S,
982 HYPRE_BigInt *num_cpts_global,
983 HYPRE_Int num_functions,
984 HYPRE_Int *dof_func,
985 HYPRE_Int debug_flag,
986 HYPRE_Real trunc_factor,
987 HYPRE_Int max_elmts,
988 hypre_ParCSRMatrix **P_ptr)
989 {
990
991 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
992 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
993 hypre_ParCSRCommHandle *comm_handle;
994
995 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
996 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
997 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
998 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
999
1000 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1001 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
1002 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
1003 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
1004 HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
1005 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
1006
1007 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
1008 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
1009 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
1010
1011 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
1012 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
1013 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
1014
1015 hypre_ParCSRMatrix *P;
1016 HYPRE_BigInt *col_map_offd_P;
1017 HYPRE_Int *tmp_map_offd = NULL;
1018
1019 HYPRE_Int *CF_marker_offd = NULL;
1020 HYPRE_Int *dof_func_offd = NULL;
1021
1022 hypre_CSRMatrix *A_ext;
1023
1024 HYPRE_Real *A_ext_data = NULL;
1025 HYPRE_Int *A_ext_i = NULL;
1026 HYPRE_BigInt *A_ext_j = NULL;
1027
1028 hypre_CSRMatrix *P_diag;
1029 hypre_CSRMatrix *P_offd;
1030
1031 HYPRE_Real *P_diag_data;
1032 HYPRE_Int *P_diag_i;
1033 HYPRE_Int *P_diag_j;
1034 HYPRE_Real *P_offd_data;
1035 HYPRE_Int *P_offd_i;
1036 HYPRE_Int *P_offd_j;
1037
1038 HYPRE_Int P_diag_size, P_offd_size;
1039
1040 HYPRE_Int *P_marker, *P_marker_offd;
1041
1042 HYPRE_Int jj_counter,jj_counter_offd;
1043 HYPRE_Int *jj_count, *jj_count_offd;
1044 HYPRE_Int jj_begin_row,jj_begin_row_offd;
1045 HYPRE_Int jj_end_row,jj_end_row_offd;
1046
1047 HYPRE_Int start_indexing = 0; /* start indexing for P_data at 0 */
1048
1049 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
1050
1051 HYPRE_Int *fine_to_coarse;
1052 //HYPRE_Int *fine_to_coarse_offd;
1053 HYPRE_Int *coarse_counter;
1054 HYPRE_Int coarse_shift;
1055 HYPRE_BigInt total_global_cpts;
1056 //HYPRE_BigInt my_first_cpt;
1057 HYPRE_Int num_cols_P_offd;
1058
1059 HYPRE_Int i,i1,i2;
1060 HYPRE_Int j,jl,jj,jj1;
1061 HYPRE_Int kc;
1062 HYPRE_BigInt big_k;
1063 HYPRE_Int start;
1064 HYPRE_Int sgn;
1065 HYPRE_Int c_num;
1066
1067 HYPRE_Real diagonal;
1068 HYPRE_Real sum;
1069 HYPRE_Real distribute;
1070
1071 HYPRE_Real zero = 0.0;
1072 HYPRE_Real one = 1.0;
1073
1074 HYPRE_Int my_id;
1075 HYPRE_Int num_procs;
1076 HYPRE_Int num_threads;
1077 HYPRE_Int num_sends;
1078 HYPRE_Int index;
1079 HYPRE_Int ns, ne, size, rest;
1080 HYPRE_Int *int_buf_data;
1081
1082 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
1083 HYPRE_Int local_numrows = hypre_CSRMatrixNumRows(A_diag);
1084 HYPRE_BigInt col_n = col_1 + local_numrows;
1085
1086 HYPRE_Real wall_time; /* for debugging instrumentation */
1087
1088 hypre_MPI_Comm_size(comm, &num_procs);
1089 hypre_MPI_Comm_rank(comm,&my_id);
1090 num_threads = hypre_NumThreads();
1091
1092
1093 //my_first_cpt = num_cpts_global[0];
1094 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
1095 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
1096
1097 /*-------------------------------------------------------------------
1098 * Get the CF_marker data for the off-processor columns
1099 *-------------------------------------------------------------------*/
1100
1101 if (debug_flag==4) wall_time = time_getWallclockSeconds();
1102
1103 if (num_cols_A_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
1104 if (num_functions > 1 && num_cols_A_offd)
1105 dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
1106
1107 if (!comm_pkg)
1108 {
1109 hypre_MatvecCommPkgCreate(A);
1110 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1111 }
1112
1113 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1114 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
1115 HYPRE_MEMORY_HOST);
1116
1117 index = 0;
1118 for (i = 0; i < num_sends; i++)
1119 {
1120 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1121 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1122 int_buf_data[index++]
1123 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1124 }
1125
1126 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, CF_marker_offd);
1127
1128 hypre_ParCSRCommHandleDestroy(comm_handle);
1129 if (num_functions > 1)
1130 {
1131 index = 0;
1132 for (i = 0; i < num_sends; i++)
1133 {
1134 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1135 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1136 int_buf_data[index++]
1137 = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1138 }
1139
1140 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, dof_func_offd);
1141
1142 hypre_ParCSRCommHandleDestroy(comm_handle);
1143 }
1144
1145 if (debug_flag==4)
1146 {
1147 wall_time = time_getWallclockSeconds() - wall_time;
1148 hypre_printf("Proc = %d Interp: Comm 1 CF_marker = %f\n",
1149 my_id, wall_time);
1150 fflush(NULL);
1151 }
1152
1153 /*----------------------------------------------------------------------
1154 * Get the ghost rows of A
1155 *---------------------------------------------------------------------*/
1156
1157 if (debug_flag==4) wall_time = time_getWallclockSeconds();
1158
1159 if (num_procs > 1)
1160 {
1161 A_ext = hypre_ParCSRMatrixExtractBExt(A,A,1);
1162 A_ext_i = hypre_CSRMatrixI(A_ext);
1163 A_ext_j = hypre_CSRMatrixBigJ(A_ext);
1164 A_ext_data = hypre_CSRMatrixData(A_ext);
1165 }
1166
1167 index = 0;
1168 for (i=0; i < num_cols_A_offd; i++)
1169 {
1170 for (j=A_ext_i[i]; j < A_ext_i[i+1]; j++)
1171 {
1172 big_k = A_ext_j[j];
1173 if (big_k >= col_1 && big_k < col_n)
1174 {
1175 A_ext_j[index] = big_k - col_1;
1176 A_ext_data[index++] = A_ext_data[j];
1177 }
1178 else
1179 {
1180 kc = hypre_BigBinarySearch(col_map_offd,big_k,num_cols_A_offd);
1181 if (kc > -1)
1182 {
1183 A_ext_j[index] = (HYPRE_BigInt)(-kc-1);
1184 A_ext_data[index++] = A_ext_data[j];
1185 }
1186 }
1187 }
1188 A_ext_i[i] = index;
1189 }
1190 for (i = num_cols_A_offd; i > 0; i--)
1191 A_ext_i[i] = A_ext_i[i-1];
1192 if (num_procs > 1) A_ext_i[0] = 0;
1193
1194 if (debug_flag==4)
1195 {
1196 wall_time = time_getWallclockSeconds() - wall_time;
1197 hypre_printf("Proc = %d Interp: Comm 2 Get A_ext = %f\n",
1198 my_id, wall_time);
1199 fflush(NULL);
1200 }
1201
1202 /*-----------------------------------------------------------------------
1203 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
1204 *-----------------------------------------------------------------------*/
1205
1206 /*-----------------------------------------------------------------------
1207 * Intialize counters and allocate mapping vector.
1208 *-----------------------------------------------------------------------*/
1209
1210 coarse_counter = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
1211 jj_count = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
1212 jj_count_offd = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
1213
1214 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
1215 #ifdef HYPRE_USING_OPENMP
1216 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
1217 #endif
1218 for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1;
1219
1220 jj_counter = start_indexing;
1221 jj_counter_offd = start_indexing;
1222
1223 /*-----------------------------------------------------------------------
1224 * Loop over fine grid.
1225 *-----------------------------------------------------------------------*/
1226
1227 /* RDF: this looks a little tricky, but doable */
1228 #ifdef HYPRE_USING_OPENMP
1229 #pragma omp parallel for private(i,j,i1,jj,ns,ne,size,rest) HYPRE_SMP_SCHEDULE
1230 #endif
1231 for (j = 0; j < num_threads; j++)
1232 {
1233 size = n_fine/num_threads;
1234 rest = n_fine - size*num_threads;
1235 if (j < rest)
1236 {
1237 ns = j*size+j;
1238 ne = (j+1)*size+j+1;
1239 }
1240 else
1241 {
1242 ns = j*size+rest;
1243 ne = (j+1)*size+rest;
1244 }
1245 for (i = ns; i < ne; i++)
1246 {
1247
1248 /*--------------------------------------------------------------------
1249 * If i is a C-point, interpolation is the identity. Also set up
1250 * mapping vector.
1251 *--------------------------------------------------------------------*/
1252
1253 if (CF_marker[i] >= 0)
1254 {
1255 jj_count[j]++;
1256 fine_to_coarse[i] = coarse_counter[j];
1257 coarse_counter[j]++;
1258 }
1259
1260 /*--------------------------------------------------------------------
1261 * If i is an F-point, interpolation is from the C-points that
1262 * strongly influence i.
1263 *--------------------------------------------------------------------*/
1264
1265 else
1266 {
1267 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
1268 {
1269 i1 = S_diag_j[jj];
1270 if (CF_marker[i1] >= 0)
1271 {
1272 jj_count[j]++;
1273 }
1274 }
1275
1276 if (num_procs > 1)
1277 {
1278 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
1279 {
1280 i1 = S_offd_j[jj];
1281 if (CF_marker_offd[i1] >= 0)
1282 {
1283 jj_count_offd[j]++;
1284 }
1285 }
1286 }
1287 }
1288 }
1289 }
1290
1291 /*-----------------------------------------------------------------------
1292 * Allocate arrays.
1293 *-----------------------------------------------------------------------*/
1294
1295 for (i=0; i < num_threads-1; i++)
1296 {
1297 coarse_counter[i+1] += coarse_counter[i];
1298 jj_count[i+1] += jj_count[i];
1299 jj_count_offd[i+1] += jj_count_offd[i];
1300 }
1301 i = num_threads-1;
1302 jj_counter = jj_count[i];
1303 jj_counter_offd = jj_count_offd[i];
1304
1305 P_diag_size = jj_counter;
1306
1307 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
1308 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
1309 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, HYPRE_MEMORY_HOST);
1310
1311 P_diag_i[n_fine] = jj_counter;
1312
1313
1314 P_offd_size = jj_counter_offd;
1315
1316 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
1317 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
1318 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, HYPRE_MEMORY_HOST);
1319
1320 /*-----------------------------------------------------------------------
1321 * Intialize some stuff.
1322 *-----------------------------------------------------------------------*/
1323
1324 jj_counter = start_indexing;
1325 jj_counter_offd = start_indexing;
1326
1327 if (debug_flag==4)
1328 {
1329 wall_time = time_getWallclockSeconds() - wall_time;
1330 hypre_printf("Proc = %d Interp: Internal work 1 = %f\n",
1331 my_id, wall_time);
1332 fflush(NULL);
1333 }
1334
1335 /*-----------------------------------------------------------------------
1336 * Send and receive fine_to_coarse info.
1337 *-----------------------------------------------------------------------*/
1338
1339 if (debug_flag==4) wall_time = time_getWallclockSeconds();
1340
1341 //fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
1342
1343 #ifdef HYPRE_USING_OPENMP
1344 #pragma omp parallel for private(i,j,ns,ne,size,rest,coarse_shift) HYPRE_SMP_SCHEDULE
1345 #endif
1346 for (j = 0; j < num_threads; j++)
1347 {
1348 coarse_shift = 0;
1349 if (j > 0) coarse_shift = coarse_counter[j-1];
1350 size = n_fine/num_threads;
1351 rest = n_fine - size*num_threads;
1352 if (j < rest)
1353 {
1354 ns = j*size+j;
1355 ne = (j+1)*size+j+1;
1356 }
1357 else
1358 {
1359 ns = j*size+rest;
1360 ne = (j+1)*size+rest;
1361 }
1362 for (i = ns; i < ne; i++)
1363 fine_to_coarse[i] += coarse_shift;
1364 }
1365 /*index = 0;
1366 for (i = 0; i < num_sends; i++)
1367 {
1368 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1369 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1370 int_buf_data[index++]
1371 = fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1372 }
1373
1374 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
1375 fine_to_coarse_offd);
1376
1377 hypre_ParCSRCommHandleDestroy(comm_handle);
1378
1379 if (debug_flag==4)
1380 {
1381 wall_time = time_getWallclockSeconds() - wall_time;
1382 hypre_printf("Proc = %d Interp: Comm 4 FineToCoarse = %f\n",
1383 my_id, wall_time);
1384 fflush(NULL);
1385 }*/
1386
1387 if (debug_flag==4) wall_time = time_getWallclockSeconds();
1388
1389 /*#ifdef HYPRE_USING_OPENMP
1390 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
1391 #endif
1392 for (i = 0; i < n_fine; i++) fine_to_coarse[i] -= my_first_cpt;*/
1393
1394 /*-----------------------------------------------------------------------
1395 * Loop over fine grid points.
1396 *-----------------------------------------------------------------------*/
1397
1398 #ifdef HYPRE_USING_OPENMP
1399 #pragma omp parallel for private(i,j,jl,i1,i2,jj,jj1,ns,ne,size,rest,sum,diagonal,distribute,P_marker,P_marker_offd,jj_counter,jj_counter_offd,sgn,c_num,jj_begin_row,jj_end_row,jj_begin_row_offd,jj_end_row_offd) HYPRE_SMP_SCHEDULE
1400 #endif
1401 for (jl = 0; jl < num_threads; jl++)
1402 {
1403 size = n_fine/num_threads;
1404 rest = n_fine - size*num_threads;
1405 if (jl < rest)
1406 {
1407 ns = jl*size+jl;
1408 ne = (jl+1)*size+jl+1;
1409 }
1410 else
1411 {
1412 ns = jl*size+rest;
1413 ne = (jl+1)*size+rest;
1414 }
1415 jj_counter = 0;
1416 if (jl > 0) jj_counter = jj_count[jl-1];
1417 jj_counter_offd = 0;
1418 if (jl > 0) jj_counter_offd = jj_count_offd[jl-1];
1419
1420 P_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
1421 if (num_cols_A_offd)
1422 P_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
1423 else
1424 P_marker_offd = NULL;
1425
1426 for (i = 0; i < n_fine; i++)
1427 {
1428 P_marker[i] = -1;
1429 }
1430 for (i = 0; i < num_cols_A_offd; i++)
1431 {
1432 P_marker_offd[i] = -1;
1433 }
1434
1435 for (i = ns; i < ne; i++)
1436 {
1437
1438 /*--------------------------------------------------------------------
1439 * If i is a c-point, interpolation is the identity.
1440 *--------------------------------------------------------------------*/
1441
1442 if (CF_marker[i] >= 0)
1443 {
1444 P_diag_i[i] = jj_counter;
1445 P_diag_j[jj_counter] = fine_to_coarse[i];
1446 P_diag_data[jj_counter] = one;
1447 jj_counter++;
1448 }
1449
1450 /*--------------------------------------------------------------------
1451 * If i is an F-point, build interpolation.
1452 *--------------------------------------------------------------------*/
1453
1454 else
1455 {
1456 /* Diagonal part of P */
1457 P_diag_i[i] = jj_counter;
1458 jj_begin_row = jj_counter;
1459
1460 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
1461 {
1462 i1 = S_diag_j[jj];
1463
1464 /*--------------------------------------------------------------
1465 * If neighbor i1 is a C-point, set column number in P_diag_j
1466 * and initialize interpolation weight to zero.
1467 *--------------------------------------------------------------*/
1468
1469 if (CF_marker[i1] >= 0)
1470 {
1471 P_marker[i1] = jj_counter;
1472 P_diag_j[jj_counter] = fine_to_coarse[i1];
1473 P_diag_data[jj_counter] = zero;
1474 jj_counter++;
1475 }
1476
1477 }
1478 jj_end_row = jj_counter;
1479
1480 /* Off-Diagonal part of P */
1481 P_offd_i[i] = jj_counter_offd;
1482 jj_begin_row_offd = jj_counter_offd;
1483
1484
1485 if (num_procs > 1)
1486 {
1487 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
1488 {
1489 i1 = S_offd_j[jj];
1490
1491 /*-----------------------------------------------------------
1492 * If neighbor i1 is a C-point, set column number in P_offd_j
1493 * and initialize interpolation weight to zero.
1494 *-----------------------------------------------------------*/
1495
1496 if (CF_marker_offd[i1] >= 0)
1497 {
1498 P_marker_offd[i1] = jj_counter_offd;
1499 P_offd_j[jj_counter_offd] = i1;
1500 P_offd_data[jj_counter_offd] = zero;
1501 jj_counter_offd++;
1502 }
1503 }
1504 }
1505
1506 jj_end_row_offd = jj_counter_offd;
1507
1508 diagonal = A_diag_data[A_diag_i[i]];
1509
1510
1511 /* Loop over ith row of A. First, the diagonal part of A */
1512
1513 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1514 {
1515 i1 = A_diag_j[jj];
1516
1517 /*--------------------------------------------------------------
1518 * Case 1: neighbor i1 is a C-point and strongly influences i,
1519 * accumulate a_{i,i1} into the interpolation weight.
1520 *--------------------------------------------------------------*/
1521
1522 if (P_marker[i1] >= jj_begin_row)
1523 {
1524 P_diag_data[P_marker[i1]] += A_diag_data[jj];
1525 }
1526
1527 /*--------------------------------------------------------------
1528 * Case 2: neighbor i1 is an F-point and influences i,
1529 * distribute a_{i,i1} to C-points that strongly influence i.
1530 * Note: currently no distribution to the diagonal in this case.
1531 *--------------------------------------------------------------*/
1532
1533 else
1534 {
1535 sum = zero;
1536
1537 /*-----------------------------------------------------------
1538 * Loop over row of A for point i1 and calculate the sum
1539 * of the connections to c-points that strongly influence i.
1540 *-----------------------------------------------------------*/
1541 sgn = 1;
1542 if (A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
1543 /* Diagonal block part of row i1 */
1544 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
1545 {
1546 i2 = A_diag_j[jj1];
1547 if (P_marker[i2] >= jj_begin_row &&
1548 (sgn*A_diag_data[jj1]) < 0)
1549 {
1550 sum += A_diag_data[jj1];
1551 }
1552 }
1553
1554 /* Off-Diagonal block part of row i1 */
1555 if (num_procs > 1)
1556 {
1557 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
1558 {
1559 i2 = A_offd_j[jj1];
1560 if (P_marker_offd[i2] >= jj_begin_row_offd
1561 && (sgn*A_offd_data[jj1]) < 0)
1562 {
1563 sum += A_offd_data[jj1];
1564 }
1565 }
1566 }
1567
1568 if (sum != 0)
1569 {
1570 distribute = A_diag_data[jj] / sum;
1571
1572 /*-----------------------------------------------------------
1573 * Loop over row of A for point i1 and do the distribution.
1574 *-----------------------------------------------------------*/
1575
1576 /* Diagonal block part of row i1 */
1577 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
1578 {
1579 i2 = A_diag_j[jj1];
1580 if (P_marker[i2] >= jj_begin_row
1581 && (sgn*A_diag_data[jj1]) < 0)
1582 {
1583 P_diag_data[P_marker[i2]]
1584 += distribute * A_diag_data[jj1];
1585 }
1586 }
1587
1588 /* Off-Diagonal block part of row i1 */
1589 if (num_procs > 1)
1590 {
1591 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
1592 {
1593 i2 = A_offd_j[jj1];
1594 if (P_marker_offd[i2] >= jj_begin_row_offd
1595 && (sgn*A_offd_data[jj1]) < 0)
1596 {
1597 P_offd_data[P_marker_offd[i2]]
1598 += distribute * A_offd_data[jj1];
1599 }
1600 }
1601 }
1602 }
1603 else
1604 {
1605 if (num_functions == 1 || dof_func[i] == dof_func[i1])
1606 diagonal += A_diag_data[jj];
1607 }
1608 }
1609
1610 }
1611
1612
1613 /*----------------------------------------------------------------
1614 * Still looping over ith row of A. Next, loop over the
1615 * off-diagonal part of A
1616 *---------------------------------------------------------------*/
1617
1618 if (num_procs > 1)
1619 {
1620 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1621 {
1622 i1 = A_offd_j[jj];
1623
1624 /*--------------------------------------------------------------
1625 * Case 1: neighbor i1 is a C-point and strongly influences i,
1626 * accumulate a_{i,i1} into the interpolation weight.
1627 *--------------------------------------------------------------*/
1628
1629 if (P_marker_offd[i1] >= jj_begin_row_offd)
1630 {
1631 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
1632 }
1633
1634 /*------------------------------------------------------------
1635 * Case 2: neighbor i1 is an F-point and influences i,
1636 * distribute a_{i,i1} to C-points that strongly infuence i.
1637 * Note: currently no distribution to the diagonal in this case.
1638 *-----------------------------------------------------------*/
1639
1640 else
1641 {
1642 sum = zero;
1643
1644 /*---------------------------------------------------------
1645 * Loop over row of A_ext for point i1 and calculate the sum
1646 * of the connections to c-points that strongly influence i.
1647 *---------------------------------------------------------*/
1648
1649 /* find row number */
1650 c_num = A_offd_j[jj];
1651
1652 sgn = 1;
1653 if (A_ext_data[A_ext_i[c_num]] < 0) sgn = -1;
1654 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
1655 {
1656 i2 = (HYPRE_Int)A_ext_j[jj1];
1657
1658 if (i2 > -1)
1659 {
1660 /* in the diagonal block */
1661 if (P_marker[i2] >= jj_begin_row
1662 && (sgn*A_ext_data[jj1]) < 0)
1663 {
1664 sum += A_ext_data[jj1];
1665 }
1666 }
1667 else
1668 {
1669 /* in the off_diagonal block */
1670 if (P_marker_offd[-i2-1] >= jj_begin_row_offd
1671 && (sgn*A_ext_data[jj1]) < 0)
1672 {
1673 sum += A_ext_data[jj1];
1674 }
1675
1676 }
1677
1678 }
1679
1680 if (sum != 0)
1681 {
1682 distribute = A_offd_data[jj] / sum;
1683 /*---------------------------------------------------------
1684 * Loop over row of A_ext for point i1 and do
1685 * the distribution.
1686 *--------------------------------------------------------*/
1687
1688 /* Diagonal block part of row i1 */
1689 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
1690 {
1691 i2 = (HYPRE_Int)A_ext_j[jj1];
1692
1693 if (i2 > -1) /* in the diagonal block */
1694 {
1695 if (P_marker[i2] >= jj_begin_row
1696 && (sgn*A_ext_data[jj1]) < 0)
1697 {
1698 P_diag_data[P_marker[i2]]
1699 += distribute * A_ext_data[jj1];
1700 }
1701 }
1702 else
1703 {
1704 /* in the off_diagonal block */
1705 if (P_marker_offd[-i2-1] >= jj_begin_row_offd
1706 && (sgn*A_ext_data[jj1]) < 0)
1707 P_offd_data[P_marker_offd[-i2-1]]
1708 += distribute * A_ext_data[jj1];
1709 }
1710 }
1711 }
1712 else
1713 {
1714 if (num_functions == 1 || dof_func[i] == dof_func_offd[i1])
1715 diagonal += A_offd_data[jj];
1716 }
1717 }
1718 }
1719 }
1720
1721 /*-----------------------------------------------------------------
1722 * Set interpolation weight by dividing by the diagonal.
1723 *-----------------------------------------------------------------*/
1724
1725 for (jj = jj_begin_row; jj < jj_end_row; jj++)
1726 {
1727 P_diag_data[jj] /= -diagonal;
1728 }
1729
1730 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
1731 {
1732 P_offd_data[jj] /= -diagonal;
1733 }
1734 }
1735
1736 P_offd_i[i+1] = jj_counter_offd;
1737 }
1738 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
1739 hypre_TFree(P_marker_offd, HYPRE_MEMORY_HOST);
1740 }
1741
1742 P = hypre_ParCSRMatrixCreate(comm,
1743 hypre_ParCSRMatrixGlobalNumRows(A),
1744 total_global_cpts,
1745 hypre_ParCSRMatrixColStarts(A),
1746 num_cpts_global,
1747 0,
1748 P_diag_i[n_fine],
1749 P_offd_i[n_fine]);
1750
1751
1752 P_diag = hypre_ParCSRMatrixDiag(P);
1753 hypre_CSRMatrixData(P_diag) = P_diag_data;
1754 hypre_CSRMatrixI(P_diag) = P_diag_i;
1755 hypre_CSRMatrixJ(P_diag) = P_diag_j;
1756 P_offd = hypre_ParCSRMatrixOffd(P);
1757 hypre_CSRMatrixData(P_offd) = P_offd_data;
1758 hypre_CSRMatrixI(P_offd) = P_offd_i;
1759 hypre_CSRMatrixJ(P_offd) = P_offd_j;
1760
1761 /* Compress P, removing coefficients smaller than trunc_factor * Max */
1762
1763 if (trunc_factor != 0.0 || max_elmts > 0)
1764 {
1765 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
1766 P_diag_data = hypre_CSRMatrixData(P_diag);
1767 P_diag_i = hypre_CSRMatrixI(P_diag);
1768 P_diag_j = hypre_CSRMatrixJ(P_diag);
1769 P_offd_data = hypre_CSRMatrixData(P_offd);
1770 P_offd_i = hypre_CSRMatrixI(P_offd);
1771 P_offd_j = hypre_CSRMatrixJ(P_offd);
1772 P_diag_size = P_diag_i[n_fine];
1773 P_offd_size = P_offd_i[n_fine];
1774 }
1775
1776 num_cols_P_offd = 0;
1777 if (P_offd_size)
1778 {
1779 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
1780
1781 #ifdef HYPRE_USING_OPENMP
1782 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
1783 #endif
1784 for (i=0; i < num_cols_A_offd; i++)
1785 P_marker[i] = 0;
1786
1787 num_cols_P_offd = 0;
1788 for (i=0; i < P_offd_size; i++)
1789 {
1790 index = P_offd_j[i];
1791 if (!P_marker[index])
1792 {
1793 num_cols_P_offd++;
1794 P_marker[index] = 1;
1795 }
1796 }
1797
1798 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
1799 tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
1800
1801 index = 0;
1802 for (i=0; i < num_cols_P_offd; i++)
1803 {
1804 while (P_marker[index]==0) index++;
1805 tmp_map_offd[i] = index++;
1806 }
1807
1808 #ifdef HYPRE_USING_OPENMP
1809 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
1810 #endif
1811 for (i=0; i < P_offd_size; i++)
1812 P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
1813 P_offd_j[i],
1814 num_cols_P_offd);
1815 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
1816 }
1817
1818 for (i=0; i < n_fine; i++)
1819 if (CF_marker[i] == -3) CF_marker[i] = -1;
1820
1821 if (num_cols_P_offd)
1822 {
1823 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
1824 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
1825 }
1826
1827 hypre_GetCommPkgRTFromCommPkgA(P,A,fine_to_coarse, tmp_map_offd);
1828
1829 *P_ptr = P;
1830
1831 hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
1832 hypre_TFree(dof_func_offd, HYPRE_MEMORY_HOST);
1833 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
1834 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
1835 hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
1836 hypre_TFree(coarse_counter, HYPRE_MEMORY_HOST);
1837 hypre_TFree(jj_count, HYPRE_MEMORY_HOST);
1838 hypre_TFree(jj_count_offd, HYPRE_MEMORY_HOST);
1839
1840 if (num_procs > 1)
1841 {
1842 hypre_CSRMatrixDestroy(A_ext);
1843 }
1844
1845 return hypre_error_flag;
1846 }
1847
1848
1849 /*---------------------------------------------------------------------------
1850 * hypre_BoomerAMGBuildDirInterp
1851 *--------------------------------------------------------------------------*/
1852
1853 HYPRE_Int
hypre_BoomerAMGBuildDirInterpHost(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRMatrix ** P_ptr)1854 hypre_BoomerAMGBuildDirInterpHost( hypre_ParCSRMatrix *A,
1855 HYPRE_Int *CF_marker,
1856 hypre_ParCSRMatrix *S,
1857 HYPRE_BigInt *num_cpts_global,
1858 HYPRE_Int num_functions,
1859 HYPRE_Int *dof_func,
1860 HYPRE_Int debug_flag,
1861 HYPRE_Real trunc_factor,
1862 HYPRE_Int max_elmts,
1863 hypre_ParCSRMatrix **P_ptr)
1864 {
1865
1866 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
1867 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1868 hypre_ParCSRCommHandle *comm_handle;
1869
1870 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1871 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
1872 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
1873 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
1874
1875 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1876 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
1877 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
1878 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
1879 HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
1880
1881 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
1882 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
1883 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
1884
1885 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
1886 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
1887 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
1888
1889 hypre_ParCSRMatrix *P;
1890 HYPRE_BigInt *col_map_offd_P;
1891 HYPRE_Int *tmp_map_offd = NULL;
1892
1893 HYPRE_Int *CF_marker_offd = NULL;
1894 HYPRE_Int *dof_func_offd = NULL;
1895
1896 hypre_CSRMatrix *P_diag;
1897 hypre_CSRMatrix *P_offd;
1898
1899 HYPRE_Real *P_diag_data;
1900 HYPRE_Int *P_diag_i;
1901 HYPRE_Int *P_diag_j;
1902 HYPRE_Real *P_offd_data;
1903 HYPRE_Int *P_offd_i;
1904 HYPRE_Int *P_offd_j;
1905
1906 HYPRE_Int P_diag_size, P_offd_size;
1907
1908 HYPRE_Int jj_counter,jj_counter_offd;
1909 HYPRE_Int *jj_count, *jj_count_offd;
1910 HYPRE_Int jj_begin_row,jj_begin_row_offd;
1911 HYPRE_Int jj_end_row,jj_end_row_offd;
1912
1913 HYPRE_Int start_indexing = 0; /* start indexing for P_data at 0 */
1914
1915 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
1916
1917 HYPRE_Int *fine_to_coarse;
1918 HYPRE_Int *coarse_counter;
1919 HYPRE_Int coarse_shift;
1920 HYPRE_BigInt total_global_cpts;
1921 HYPRE_Int num_cols_P_offd;
1922 //HYPRE_BigInt my_first_cpt;
1923
1924 HYPRE_Int i,i1;
1925 HYPRE_Int j,jl,jj;
1926 HYPRE_Int start;
1927
1928 HYPRE_Real diagonal;
1929 HYPRE_Real sum_N_pos, sum_P_pos;
1930 HYPRE_Real sum_N_neg, sum_P_neg;
1931 HYPRE_Real alfa = 1.0;
1932 HYPRE_Real beta = 1.0;
1933
1934 HYPRE_Real zero = 0.0;
1935 HYPRE_Real one = 1.0;
1936
1937 HYPRE_Int my_id;
1938 HYPRE_Int num_procs;
1939 HYPRE_Int num_threads;
1940 HYPRE_Int num_sends;
1941 HYPRE_Int index;
1942 HYPRE_Int ns, ne, size, rest;
1943 HYPRE_Int *int_buf_data;
1944
1945 HYPRE_Real wall_time; /* for debugging instrumentation */
1946
1947 hypre_MPI_Comm_size(comm, &num_procs);
1948 hypre_MPI_Comm_rank(comm,&my_id);
1949 num_threads = hypre_NumThreads();
1950
1951 //my_first_cpt = num_cpts_global[0];
1952 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
1953 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
1954
1955 /*-------------------------------------------------------------------
1956 * Get the CF_marker data for the off-processor columns
1957 *-------------------------------------------------------------------*/
1958
1959 if (debug_flag==4) wall_time = time_getWallclockSeconds();
1960
1961 if (num_cols_A_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
1962 if (num_functions > 1 && num_cols_A_offd)
1963 dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
1964
1965 if (!comm_pkg)
1966 {
1967 hypre_MatvecCommPkgCreate(A);
1968 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1969 }
1970
1971 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1972 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
1973 num_sends), HYPRE_MEMORY_HOST);
1974
1975 index = 0;
1976 for (i = 0; i < num_sends; i++)
1977 {
1978 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1979 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1980 int_buf_data[index++]
1981 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1982 }
1983
1984 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
1985 CF_marker_offd);
1986
1987 hypre_ParCSRCommHandleDestroy(comm_handle);
1988 if (num_functions > 1)
1989 {
1990 index = 0;
1991 for (i = 0; i < num_sends; i++)
1992 {
1993 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1994 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1995 int_buf_data[index++]
1996 = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1997 }
1998
1999 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
2000 dof_func_offd);
2001
2002 hypre_ParCSRCommHandleDestroy(comm_handle);
2003 }
2004
2005 if (debug_flag==4)
2006 {
2007 wall_time = time_getWallclockSeconds() - wall_time;
2008 hypre_printf("Proc = %d Interp: Comm 1 CF_marker = %f\n",
2009 my_id, wall_time);
2010 fflush(NULL);
2011 }
2012
2013 /*-----------------------------------------------------------------------
2014 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
2015 *-----------------------------------------------------------------------*/
2016
2017 /*-----------------------------------------------------------------------
2018 * Intialize counters and allocate mapping vector.
2019 *-----------------------------------------------------------------------*/
2020
2021 coarse_counter = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
2022 jj_count = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
2023 jj_count_offd = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
2024
2025 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
2026 #ifdef HYPRE_USING_OPENMP
2027 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
2028 #endif
2029 for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1;
2030
2031 jj_counter = start_indexing;
2032 jj_counter_offd = start_indexing;
2033
2034 /*-----------------------------------------------------------------------
2035 * Loop over fine grid.
2036 *-----------------------------------------------------------------------*/
2037
2038 /* RDF: this looks a little tricky, but doable */
2039 #ifdef HYPRE_USING_OPENMP
2040 #pragma omp parallel for private(i,j,i1,jj,ns,ne,size,rest) HYPRE_SMP_SCHEDULE
2041 #endif
2042 for (j = 0; j < num_threads; j++)
2043 {
2044 size = n_fine/num_threads;
2045 rest = n_fine - size*num_threads;
2046 if (j < rest)
2047 {
2048 ns = j*size+j;
2049 ne = (j+1)*size+j+1;
2050 }
2051 else
2052 {
2053 ns = j*size+rest;
2054 ne = (j+1)*size+rest;
2055 }
2056 for (i = ns; i < ne; i++)
2057 {
2058
2059 /*--------------------------------------------------------------------
2060 * If i is a C-point, interpolation is the identity. Also set up
2061 * mapping vector.
2062 *--------------------------------------------------------------------*/
2063
2064 if (CF_marker[i] >= 0)
2065 {
2066 jj_count[j]++;
2067 fine_to_coarse[i] = coarse_counter[j];
2068 coarse_counter[j]++;
2069 }
2070
2071 /*--------------------------------------------------------------------
2072 * If i is an F-point, interpolation is from the C-points that
2073 * strongly influence i.
2074 *--------------------------------------------------------------------*/
2075
2076 else
2077 {
2078 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2079 {
2080 i1 = S_diag_j[jj];
2081 if (CF_marker[i1] > 0)
2082 {
2083 jj_count[j]++;
2084 }
2085 }
2086
2087 if (num_procs > 1)
2088 {
2089 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2090 {
2091 i1 = S_offd_j[jj];
2092 if (CF_marker_offd[i1] > 0)
2093 {
2094 jj_count_offd[j]++;
2095 }
2096 }
2097 }
2098 }
2099 }
2100 }
2101
2102 /*-----------------------------------------------------------------------
2103 * Allocate arrays.
2104 *-----------------------------------------------------------------------*/
2105
2106 for (i=0; i < num_threads-1; i++)
2107 {
2108 coarse_counter[i+1] += coarse_counter[i];
2109 jj_count[i+1] += jj_count[i];
2110 jj_count_offd[i+1] += jj_count_offd[i];
2111 }
2112 i = num_threads-1;
2113 jj_counter = jj_count[i];
2114 jj_counter_offd = jj_count_offd[i];
2115
2116 P_diag_size = jj_counter;
2117
2118 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_DEVICE);
2119 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_DEVICE);
2120 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, HYPRE_MEMORY_DEVICE);
2121
2122 P_diag_i[n_fine] = jj_counter;
2123
2124
2125 P_offd_size = jj_counter_offd;
2126
2127 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_DEVICE);
2128 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_DEVICE);
2129 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, HYPRE_MEMORY_DEVICE);
2130
2131 /*-----------------------------------------------------------------------
2132 * Intialize some stuff.
2133 *-----------------------------------------------------------------------*/
2134
2135 jj_counter = start_indexing;
2136 jj_counter_offd = start_indexing;
2137
2138 if (debug_flag==4)
2139 {
2140 wall_time = time_getWallclockSeconds() - wall_time;
2141 hypre_printf("Proc = %d Interp: Internal work 1 = %f\n",
2142 my_id, wall_time);
2143 fflush(NULL);
2144 }
2145
2146 /*-----------------------------------------------------------------------
2147 * Send and receive fine_to_coarse info.
2148 *-----------------------------------------------------------------------*/
2149
2150 if (debug_flag==4) wall_time = time_getWallclockSeconds();
2151
2152 //fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
2153
2154 #ifdef HYPRE_USING_OPENMP
2155 #pragma omp parallel for private(i,j,ns,ne,size,rest,coarse_shift) HYPRE_SMP_SCHEDULE
2156 #endif
2157 for (j = 0; j < num_threads; j++)
2158 {
2159 coarse_shift = 0;
2160 if (j > 0) coarse_shift = coarse_counter[j-1];
2161 size = n_fine/num_threads;
2162 rest = n_fine - size*num_threads;
2163 if (j < rest)
2164 {
2165 ns = j*size+j;
2166 ne = (j+1)*size+j+1;
2167 }
2168 else
2169 {
2170 ns = j*size+rest;
2171 ne = (j+1)*size+rest;
2172 }
2173 for (i = ns; i < ne; i++)
2174 {
2175 fine_to_coarse[i] += coarse_shift;
2176 }
2177 }
2178 /*index = 0;
2179 for (i = 0; i < num_sends; i++)
2180 {
2181 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
2182 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
2183 int_buf_data[index++]
2184 = fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2185 }
2186
2187 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
2188 fine_to_coarse_offd);
2189
2190 hypre_ParCSRCommHandleDestroy(comm_handle);
2191
2192 if (debug_flag==4)
2193 {
2194 wall_time = time_getWallclockSeconds() - wall_time;
2195 hypre_printf("Proc = %d Interp: Comm 4 FineToCoarse = %f\n",
2196 my_id, wall_time);
2197 fflush(NULL);
2198 }*/
2199
2200 if (debug_flag==4) wall_time = time_getWallclockSeconds();
2201
2202 /*#ifdef HYPRE_USING_OPENMP
2203 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
2204 #endif
2205 for (i = 0; i < n_fine; i++) fine_to_coarse[i] -= my_first_cpt;*/
2206
2207 /*-----------------------------------------------------------------------
2208 * Loop over fine grid points.
2209 *-----------------------------------------------------------------------*/
2210
2211 #ifdef HYPRE_USING_OPENMP
2212 #pragma omp parallel for private(i,j,jl,i1,jj,ns,ne,size,rest,diagonal,jj_counter,jj_counter_offd,jj_begin_row,jj_end_row,jj_begin_row_offd,jj_end_row_offd,sum_P_pos,sum_P_neg,sum_N_pos,sum_N_neg,alfa,beta) HYPRE_SMP_SCHEDULE
2213 #endif
2214 for (jl = 0; jl < num_threads; jl++)
2215 {
2216 HYPRE_Int *P_marker, *P_marker_offd;
2217
2218 size = n_fine/num_threads;
2219 rest = n_fine - size*num_threads;
2220 if (jl < rest)
2221 {
2222 ns = jl*size+jl;
2223 ne = (jl+1)*size+jl+1;
2224 }
2225 else
2226 {
2227 ns = jl*size+rest;
2228 ne = (jl+1)*size+rest;
2229 }
2230 jj_counter = 0;
2231 if (jl > 0) jj_counter = jj_count[jl-1];
2232 jj_counter_offd = 0;
2233 if (jl > 0) jj_counter_offd = jj_count_offd[jl-1];
2234
2235 P_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
2236 if (num_cols_A_offd)
2237 P_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
2238 else
2239 P_marker_offd = NULL;
2240
2241 for (i = 0; i < n_fine; i++)
2242 {
2243 P_marker[i] = -1;
2244 }
2245 for (i = 0; i < num_cols_A_offd; i++)
2246 {
2247 P_marker_offd[i] = -1;
2248 }
2249
2250 for (i = ns; i < ne; i++)
2251 {
2252
2253 /*--------------------------------------------------------------------
2254 * If i is a c-point, interpolation is the identity.
2255 *--------------------------------------------------------------------*/
2256
2257 if (CF_marker[i] >= 0)
2258 {
2259 P_diag_i[i] = jj_counter;
2260 P_diag_j[jj_counter] = fine_to_coarse[i];
2261 P_diag_data[jj_counter] = one;
2262 jj_counter++;
2263 }
2264
2265 /*--------------------------------------------------------------------
2266 * If i is an F-point, build interpolation.
2267 *--------------------------------------------------------------------*/
2268
2269 else
2270 {
2271 /* Diagonal part of P */
2272 P_diag_i[i] = jj_counter;
2273 jj_begin_row = jj_counter;
2274
2275 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2276 {
2277 i1 = S_diag_j[jj];
2278
2279 /*--------------------------------------------------------------
2280 * If neighbor i1 is a C-point, set column number in P_diag_j
2281 * and initialize interpolation weight to zero.
2282 *--------------------------------------------------------------*/
2283
2284 if (CF_marker[i1] >= 0)
2285 {
2286 P_marker[i1] = jj_counter;
2287 P_diag_j[jj_counter] = fine_to_coarse[i1];
2288 P_diag_data[jj_counter] = zero;
2289 jj_counter++;
2290 }
2291
2292 }
2293 jj_end_row = jj_counter;
2294
2295 /* Off-Diagonal part of P */
2296 P_offd_i[i] = jj_counter_offd;
2297 jj_begin_row_offd = jj_counter_offd;
2298
2299
2300 if (num_procs > 1)
2301 {
2302 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2303 {
2304 i1 = S_offd_j[jj];
2305
2306 /*-----------------------------------------------------------
2307 * If neighbor i1 is a C-point, set column number in P_offd_j
2308 * and initialize interpolation weight to zero.
2309 *-----------------------------------------------------------*/
2310
2311 if (CF_marker_offd[i1] >= 0)
2312 {
2313 P_marker_offd[i1] = jj_counter_offd;
2314 P_offd_j[jj_counter_offd] = i1;
2315 P_offd_data[jj_counter_offd] = zero;
2316 jj_counter_offd++;
2317 }
2318 }
2319 }
2320
2321 jj_end_row_offd = jj_counter_offd;
2322
2323 diagonal = A_diag_data[A_diag_i[i]];
2324
2325
2326 /* Loop over ith row of A. First, the diagonal part of A */
2327 sum_N_pos = 0;
2328 sum_N_neg = 0;
2329 sum_P_pos = 0;
2330 sum_P_neg = 0;
2331
2332 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2333 {
2334 i1 = A_diag_j[jj];
2335 if (num_functions == 1 || dof_func[i1] == dof_func[i])
2336 {
2337 if (A_diag_data[jj] > 0)
2338 sum_N_pos += A_diag_data[jj];
2339 else
2340 sum_N_neg += A_diag_data[jj];
2341 }
2342 /*--------------------------------------------------------------
2343 * Case 1: neighbor i1 is a C-point and strongly influences i,
2344 * accumulate a_{i,i1} into the interpolation weight.
2345 *--------------------------------------------------------------*/
2346
2347 if (P_marker[i1] >= jj_begin_row)
2348 {
2349 P_diag_data[P_marker[i1]] += A_diag_data[jj];
2350 if (A_diag_data[jj] > 0)
2351 sum_P_pos += A_diag_data[jj];
2352 else
2353 sum_P_neg += A_diag_data[jj];
2354 }
2355 }
2356
2357 /*----------------------------------------------------------------
2358 * Still looping over ith row of A. Next, loop over the
2359 * off-diagonal part of A
2360 *---------------------------------------------------------------*/
2361
2362 if (num_procs > 1)
2363 {
2364 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2365 {
2366 i1 = A_offd_j[jj];
2367 if (num_functions == 1 || dof_func_offd[i1] == dof_func[i])
2368 {
2369 if (A_offd_data[jj] > 0)
2370 sum_N_pos += A_offd_data[jj];
2371 else
2372 sum_N_neg += A_offd_data[jj];
2373 }
2374
2375 /*--------------------------------------------------------------
2376 * Case 1: neighbor i1 is a C-point and strongly influences i,
2377 * accumulate a_{i,i1} into the interpolation weight.
2378 *--------------------------------------------------------------*/
2379
2380 if (P_marker_offd[i1] >= jj_begin_row_offd)
2381 {
2382 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
2383 if (A_offd_data[jj] > 0)
2384 sum_P_pos += A_offd_data[jj];
2385 else
2386 sum_P_neg += A_offd_data[jj];
2387 }
2388
2389 }
2390 }
2391 if (sum_P_neg) alfa = sum_N_neg/sum_P_neg/diagonal;
2392 if (sum_P_pos) beta = sum_N_pos/sum_P_pos/diagonal;
2393
2394 /*-----------------------------------------------------------------
2395 * Set interpolation weight by dividing by the diagonal.
2396 *-----------------------------------------------------------------*/
2397
2398 for (jj = jj_begin_row; jj < jj_end_row; jj++)
2399 {
2400 if (P_diag_data[jj]> 0)
2401 P_diag_data[jj] *= -beta;
2402 else
2403 P_diag_data[jj] *= -alfa;
2404 }
2405
2406 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
2407 {
2408 if (P_offd_data[jj]> 0)
2409 P_offd_data[jj] *= -beta;
2410 else
2411 P_offd_data[jj] *= -alfa;
2412 }
2413
2414 }
2415
2416 P_offd_i[i+1] = jj_counter_offd;
2417 }
2418 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
2419 hypre_TFree(P_marker_offd, HYPRE_MEMORY_HOST);
2420 }
2421
2422 P = hypre_ParCSRMatrixCreate(comm,
2423 hypre_ParCSRMatrixGlobalNumRows(A),
2424 total_global_cpts,
2425 hypre_ParCSRMatrixColStarts(A),
2426 num_cpts_global,
2427 0,
2428 P_diag_i[n_fine],
2429 P_offd_i[n_fine]);
2430
2431
2432 P_diag = hypre_ParCSRMatrixDiag(P);
2433 hypre_CSRMatrixData(P_diag) = P_diag_data;
2434 hypre_CSRMatrixI(P_diag) = P_diag_i;
2435 hypre_CSRMatrixJ(P_diag) = P_diag_j;
2436 P_offd = hypre_ParCSRMatrixOffd(P);
2437 hypre_CSRMatrixData(P_offd) = P_offd_data;
2438 hypre_CSRMatrixI(P_offd) = P_offd_i;
2439 hypre_CSRMatrixJ(P_offd) = P_offd_j;
2440
2441 /* Compress P, removing coefficients smaller than trunc_factor * Max */
2442
2443 if (trunc_factor != 0.0 || max_elmts > 0)
2444 {
2445 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
2446 P_diag_data = hypre_CSRMatrixData(P_diag);
2447 P_diag_i = hypre_CSRMatrixI(P_diag);
2448 P_diag_j = hypre_CSRMatrixJ(P_diag);
2449 P_offd_data = hypre_CSRMatrixData(P_offd);
2450 P_offd_i = hypre_CSRMatrixI(P_offd);
2451 P_offd_j = hypre_CSRMatrixJ(P_offd);
2452 P_diag_size = P_diag_i[n_fine];
2453 P_offd_size = P_offd_i[n_fine];
2454 }
2455
2456 num_cols_P_offd = 0;
2457 if (P_offd_size)
2458 {
2459 HYPRE_Int *P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
2460
2461 #ifdef HYPRE_USING_OPENMP
2462 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
2463 #endif
2464 for (i=0; i < num_cols_A_offd; i++)
2465 P_marker[i] = 0;
2466
2467 num_cols_P_offd = 0;
2468 for (i=0; i < P_offd_size; i++)
2469 {
2470 index = P_offd_j[i];
2471 if (!P_marker[index])
2472 {
2473 num_cols_P_offd++;
2474 P_marker[index] = 1;
2475 }
2476 }
2477
2478 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
2479 tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
2480
2481 index = 0;
2482 for (i=0; i < num_cols_P_offd; i++)
2483 {
2484 while (P_marker[index]==0) index++;
2485 tmp_map_offd[i] = index++;
2486 }
2487
2488 #ifdef HYPRE_USING_OPENMP
2489 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
2490 #endif
2491 for (i=0; i < P_offd_size; i++)
2492 {
2493 P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
2494 P_offd_j[i],
2495 num_cols_P_offd);
2496 }
2497 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
2498 }
2499
2500 for (i=0; i < n_fine; i++)
2501 if (CF_marker[i] == -3) CF_marker[i] = -1;
2502
2503 if (num_cols_P_offd)
2504 {
2505 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
2506 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
2507 }
2508
2509 hypre_GetCommPkgRTFromCommPkgA(P, A, fine_to_coarse, tmp_map_offd);
2510
2511 *P_ptr = P;
2512
2513 hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
2514 hypre_TFree(dof_func_offd, HYPRE_MEMORY_HOST);
2515 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
2516 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
2517 hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
2518 hypre_TFree(coarse_counter, HYPRE_MEMORY_HOST);
2519 hypre_TFree(jj_count, HYPRE_MEMORY_HOST);
2520 hypre_TFree(jj_count_offd, HYPRE_MEMORY_HOST);
2521
2522 return hypre_error_flag;
2523 }
2524
2525 HYPRE_Int
hypre_BoomerAMGBuildDirInterp(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,HYPRE_Int interp_type,hypre_ParCSRMatrix ** P_ptr)2526 hypre_BoomerAMGBuildDirInterp( hypre_ParCSRMatrix *A,
2527 HYPRE_Int *CF_marker,
2528 hypre_ParCSRMatrix *S,
2529 HYPRE_BigInt *num_cpts_global,
2530 HYPRE_Int num_functions,
2531 HYPRE_Int *dof_func,
2532 HYPRE_Int debug_flag,
2533 HYPRE_Real trunc_factor,
2534 HYPRE_Int max_elmts,
2535 HYPRE_Int interp_type,
2536 hypre_ParCSRMatrix **P_ptr)
2537 {
2538 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
2539 hypre_GpuProfilingPushRange("DirInterp");
2540 #endif
2541
2542 HYPRE_Int ierr = 0;
2543
2544 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
2545 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
2546
2547 if (exec == HYPRE_EXEC_DEVICE)
2548 {
2549 ierr = hypre_BoomerAMGBuildDirInterpDevice(A,CF_marker,S,num_cpts_global,num_functions,dof_func,
2550 debug_flag,trunc_factor,max_elmts,
2551 interp_type, P_ptr);
2552 }
2553 else
2554 #endif
2555 {
2556 ierr = hypre_BoomerAMGBuildDirInterpHost(A,CF_marker,S,num_cpts_global,num_functions,dof_func,
2557 debug_flag,trunc_factor,max_elmts, P_ptr);
2558 }
2559
2560 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
2561 hypre_GpuProfilingPopRange();
2562 #endif
2563
2564 return ierr;
2565 }
2566
2567 /*------------------------------------------------
2568 * Drop entries in interpolation matrix P
2569 * max_elmts == 0 means no limit on rownnz
2570 *------------------------------------------------*/
2571 HYPRE_Int
hypre_BoomerAMGInterpTruncation(hypre_ParCSRMatrix * P,HYPRE_Real trunc_factor,HYPRE_Int max_elmts)2572 hypre_BoomerAMGInterpTruncation( hypre_ParCSRMatrix *P,
2573 HYPRE_Real trunc_factor,
2574 HYPRE_Int max_elmts)
2575 {
2576 if (trunc_factor <= 0.0 && max_elmts == 0)
2577 {
2578 return 0;
2579 }
2580
2581 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
2582 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(P) );
2583
2584 if (exec == HYPRE_EXEC_DEVICE)
2585 {
2586 return hypre_BoomerAMGInterpTruncationDevice(P, trunc_factor, max_elmts);
2587 }
2588 else
2589 #endif
2590 {
2591 HYPRE_Int rescale = 1; // rescale P
2592 HYPRE_Int nrm_type = 0; // Use infty-norm of row to perform treshold dropping
2593 return hypre_ParCSRMatrixTruncate(P, trunc_factor, max_elmts, rescale, nrm_type);
2594 }
2595 }
2596
2597 /*---------------------------------------------------------------------------
2598 * hypre_BoomerAMGBuildInterpModUnk - this is a modified interpolation for the unknown approach.
2599 * here we need to pass in a strength matrix built on the entire matrix.
2600 *
2601 *--------------------------------------------------------------------------*/
2602
2603 HYPRE_Int
hypre_BoomerAMGBuildInterpModUnk(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRMatrix ** P_ptr)2604 hypre_BoomerAMGBuildInterpModUnk( hypre_ParCSRMatrix *A,
2605 HYPRE_Int *CF_marker,
2606 hypre_ParCSRMatrix *S,
2607 HYPRE_BigInt *num_cpts_global,
2608 HYPRE_Int num_functions,
2609 HYPRE_Int *dof_func,
2610 HYPRE_Int debug_flag,
2611 HYPRE_Real trunc_factor,
2612 HYPRE_Int max_elmts,
2613 hypre_ParCSRMatrix **P_ptr)
2614 {
2615
2616 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
2617 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2618 hypre_ParCSRCommHandle *comm_handle;
2619
2620 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2621 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
2622 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
2623 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
2624
2625 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
2626 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
2627 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
2628 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
2629 HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
2630 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
2631
2632 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
2633 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
2634 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
2635
2636 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
2637 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
2638 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
2639
2640 hypre_ParCSRMatrix *P;
2641 HYPRE_BigInt *col_map_offd_P;
2642 HYPRE_Int *tmp_map_offd;
2643
2644 HYPRE_Int *CF_marker_offd = NULL;
2645 HYPRE_Int *dof_func_offd = NULL;
2646
2647 hypre_CSRMatrix *A_ext;
2648
2649 HYPRE_Real *A_ext_data = NULL;
2650 HYPRE_Int *A_ext_i = NULL;
2651 HYPRE_BigInt *A_ext_j = NULL;
2652
2653 hypre_CSRMatrix *P_diag;
2654 hypre_CSRMatrix *P_offd;
2655
2656 HYPRE_Real *P_diag_data;
2657 HYPRE_Int *P_diag_i;
2658 HYPRE_Int *P_diag_j;
2659 HYPRE_Real *P_offd_data;
2660 HYPRE_Int *P_offd_i;
2661 HYPRE_Int *P_offd_j;
2662
2663 HYPRE_Int P_diag_size, P_offd_size;
2664
2665 HYPRE_Int *P_marker, *P_marker_offd;
2666
2667 HYPRE_Int jj_counter,jj_counter_offd;
2668 HYPRE_Int *jj_count, *jj_count_offd;
2669 HYPRE_Int jj_begin_row,jj_begin_row_offd;
2670 HYPRE_Int jj_end_row,jj_end_row_offd;
2671
2672 HYPRE_Int start_indexing = 0; /* start indexing for P_data at 0 */
2673
2674 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
2675
2676 HYPRE_Int strong_f_marker;
2677
2678 HYPRE_Int *fine_to_coarse;
2679 //HYPRE_Int *fine_to_coarse_offd;
2680 HYPRE_Int *coarse_counter;
2681 HYPRE_Int coarse_shift;
2682 HYPRE_BigInt total_global_cpts;
2683 HYPRE_Int num_cols_P_offd;
2684 //HYPRE_BigInt my_first_cpt;
2685
2686 HYPRE_Int i,i1,i2;
2687 HYPRE_Int j,jl,jj,jj1;
2688 HYPRE_Int kc;
2689 HYPRE_BigInt big_k;
2690 HYPRE_Int start;
2691 HYPRE_Int sgn;
2692 HYPRE_Int c_num;
2693
2694 HYPRE_Real diagonal;
2695 HYPRE_Real sum;
2696 HYPRE_Real distribute;
2697
2698 HYPRE_Real zero = 0.0;
2699 HYPRE_Real one = 1.0;
2700
2701 HYPRE_Int my_id;
2702 HYPRE_Int num_procs;
2703 HYPRE_Int num_threads;
2704 HYPRE_Int num_sends;
2705 HYPRE_Int index;
2706 HYPRE_Int ns, ne, size, rest;
2707 HYPRE_Int print_level = 0;
2708 HYPRE_Int *int_buf_data;
2709
2710 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
2711 HYPRE_Int local_numrows = hypre_CSRMatrixNumRows(A_diag);
2712 HYPRE_BigInt col_n = col_1 + local_numrows;
2713
2714 HYPRE_Real wall_time; /* for debugging instrumentation */
2715
2716 hypre_MPI_Comm_size(comm, &num_procs);
2717 hypre_MPI_Comm_rank(comm,&my_id);
2718 num_threads = hypre_NumThreads();
2719
2720
2721 //my_first_cpt = num_cpts_global[0];
2722 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
2723 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
2724
2725 /*-------------------------------------------------------------------
2726 * Get the CF_marker data for the off-processor columns
2727 *-------------------------------------------------------------------*/
2728
2729 if (debug_flag < 0)
2730 {
2731 debug_flag = -debug_flag;
2732 print_level = 1;
2733 }
2734
2735 if (debug_flag==4) wall_time = time_getWallclockSeconds();
2736
2737 if (num_cols_A_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
2738 if (num_functions > 1 && num_cols_A_offd)
2739 dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
2740
2741 if (!comm_pkg)
2742 {
2743 hypre_MatvecCommPkgCreate(A);
2744 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2745 }
2746
2747 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2748 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
2749 num_sends), HYPRE_MEMORY_HOST);
2750
2751 index = 0;
2752 for (i = 0; i < num_sends; i++)
2753 {
2754 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
2755 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
2756 int_buf_data[index++]
2757 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2758 }
2759
2760 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
2761 CF_marker_offd);
2762
2763 hypre_ParCSRCommHandleDestroy(comm_handle);
2764 if (num_functions > 1)
2765 {
2766 index = 0;
2767 for (i = 0; i < num_sends; i++)
2768 {
2769 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
2770 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
2771 int_buf_data[index++]
2772 = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2773 }
2774
2775 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
2776 dof_func_offd);
2777
2778 hypre_ParCSRCommHandleDestroy(comm_handle);
2779 }
2780
2781 if (debug_flag==4)
2782 {
2783 wall_time = time_getWallclockSeconds() - wall_time;
2784 hypre_printf("Proc = %d Interp: Comm 1 CF_marker = %f\n",
2785 my_id, wall_time);
2786 fflush(NULL);
2787 }
2788
2789 /*----------------------------------------------------------------------
2790 * Get the ghost rows of A
2791 *---------------------------------------------------------------------*/
2792
2793 if (debug_flag==4) wall_time = time_getWallclockSeconds();
2794
2795 if (num_procs > 1)
2796 {
2797 A_ext = hypre_ParCSRMatrixExtractBExt(A,A,1);
2798 A_ext_i = hypre_CSRMatrixI(A_ext);
2799 A_ext_j = hypre_CSRMatrixBigJ(A_ext);
2800 A_ext_data = hypre_CSRMatrixData(A_ext);
2801 }
2802
2803 index = 0;
2804 for (i=0; i < num_cols_A_offd; i++)
2805 {
2806 for (j=A_ext_i[i]; j < A_ext_i[i+1]; j++)
2807 {
2808 big_k = A_ext_j[j];
2809 if (big_k >= col_1 && big_k < col_n)
2810 {
2811 A_ext_j[index] = big_k - col_1;
2812 A_ext_data[index++] = A_ext_data[j];
2813 }
2814 else
2815 {
2816 kc = hypre_BigBinarySearch(col_map_offd,big_k,num_cols_A_offd);
2817 if (kc > -1)
2818 {
2819 A_ext_j[index] = (HYPRE_BigInt)(-kc-1);
2820 A_ext_data[index++] = A_ext_data[j];
2821 }
2822 }
2823 }
2824 A_ext_i[i] = index;
2825 }
2826 for (i = num_cols_A_offd; i > 0; i--)
2827 A_ext_i[i] = A_ext_i[i-1];
2828 if (num_procs > 1) A_ext_i[0] = 0;
2829
2830 if (debug_flag==4)
2831 {
2832 wall_time = time_getWallclockSeconds() - wall_time;
2833 hypre_printf("Proc = %d Interp: Comm 2 Get A_ext = %f\n",
2834 my_id, wall_time);
2835 fflush(NULL);
2836 }
2837
2838
2839 /*-----------------------------------------------------------------------
2840 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
2841 *-----------------------------------------------------------------------*/
2842
2843 /*-----------------------------------------------------------------------
2844 * Intialize counters and allocate mapping vector.
2845 *-----------------------------------------------------------------------*/
2846
2847 coarse_counter = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
2848 jj_count = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
2849 jj_count_offd = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
2850
2851 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
2852 #ifdef HYPRE_USING_OPENMP
2853 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
2854 #endif
2855 for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1;
2856
2857 jj_counter = start_indexing;
2858 jj_counter_offd = start_indexing;
2859
2860 /*-----------------------------------------------------------------------
2861 * Loop over fine grid.
2862 *-----------------------------------------------------------------------*/
2863
2864 /* RDF: this looks a little tricky, but doable */
2865 #ifdef HYPRE_USING_OPENMP
2866 #pragma omp parallel for private(i,j,i1,jj,ns,ne,size,rest) HYPRE_SMP_SCHEDULE
2867 #endif
2868 for (j = 0; j < num_threads; j++)
2869 {
2870 size = n_fine/num_threads;
2871 rest = n_fine - size*num_threads;
2872 if (j < rest)
2873 {
2874 ns = j*size+j;
2875 ne = (j+1)*size+j+1;
2876 }
2877 else
2878 {
2879 ns = j*size+rest;
2880 ne = (j+1)*size+rest;
2881 }
2882 for (i = ns; i < ne; i++)
2883 {
2884
2885 /*--------------------------------------------------------------------
2886 * If i is a C-point, interpolation is the identity. Also set up
2887 * mapping vector.
2888 *--------------------------------------------------------------------*/
2889
2890 if (CF_marker[i] >= 0)
2891 {
2892 jj_count[j]++;
2893 fine_to_coarse[i] = coarse_counter[j];
2894 coarse_counter[j]++;
2895 }
2896
2897 /*--------------------------------------------------------------------
2898 * If i is an F-point, interpolation is from the C-points that
2899 * strongly influence i.
2900 *--------------------------------------------------------------------*/
2901
2902 else
2903 {
2904 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2905 {
2906 i1 = S_diag_j[jj];
2907 if (CF_marker[i1] >= 0)
2908 {
2909 jj_count[j]++;
2910 }
2911 }
2912
2913 if (num_procs > 1)
2914 {
2915 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2916 {
2917 i1 = S_offd_j[jj];
2918 if (CF_marker_offd[i1] >= 0)
2919 {
2920 jj_count_offd[j]++;
2921 }
2922 }
2923 }
2924 }
2925 }
2926 }
2927
2928 /*-----------------------------------------------------------------------
2929 * Allocate arrays.
2930 *-----------------------------------------------------------------------*/
2931
2932 for (i=0; i < num_threads-1; i++)
2933 {
2934 coarse_counter[i+1] += coarse_counter[i];
2935 jj_count[i+1] += jj_count[i];
2936 jj_count_offd[i+1] += jj_count_offd[i];
2937 }
2938 i = num_threads-1;
2939 jj_counter = jj_count[i];
2940 jj_counter_offd = jj_count_offd[i];
2941
2942 P_diag_size = jj_counter;
2943
2944 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
2945 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
2946 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, HYPRE_MEMORY_HOST);
2947
2948 P_diag_i[n_fine] = jj_counter;
2949
2950 P_offd_size = jj_counter_offd;
2951
2952 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
2953 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
2954 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, HYPRE_MEMORY_HOST);
2955
2956 /*-----------------------------------------------------------------------
2957 * Intialize some stuff.
2958 *-----------------------------------------------------------------------*/
2959
2960 jj_counter = start_indexing;
2961 jj_counter_offd = start_indexing;
2962
2963 if (debug_flag==4)
2964 {
2965 wall_time = time_getWallclockSeconds() - wall_time;
2966 hypre_printf("Proc = %d Interp: Internal work 1 = %f\n",
2967 my_id, wall_time);
2968 fflush(NULL);
2969 }
2970
2971 /*-----------------------------------------------------------------------
2972 * Send and receive fine_to_coarse info.
2973 *-----------------------------------------------------------------------*/
2974
2975 if (debug_flag==4) wall_time = time_getWallclockSeconds();
2976
2977 //fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
2978
2979 #ifdef HYPRE_USING_OPENMP
2980 #pragma omp parallel for private(i,j,ns,ne,size,rest,coarse_shift) HYPRE_SMP_SCHEDULE
2981 #endif
2982 for (j = 0; j < num_threads; j++)
2983 {
2984 coarse_shift = 0;
2985 if (j > 0) coarse_shift = coarse_counter[j-1];
2986 size = n_fine/num_threads;
2987 rest = n_fine - size*num_threads;
2988 if (j < rest)
2989 {
2990 ns = j*size+j;
2991 ne = (j+1)*size+j+1;
2992 }
2993 else
2994 {
2995 ns = j*size+rest;
2996 ne = (j+1)*size+rest;
2997 }
2998 for (i = ns; i < ne; i++)
2999 fine_to_coarse[i] += coarse_shift;
3000 }
3001 /*index = 0;
3002 for (i = 0; i < num_sends; i++)
3003 {
3004 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
3005 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
3006 int_buf_data[index++]
3007 = fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
3008 }
3009
3010 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
3011 fine_to_coarse_offd);
3012
3013 hypre_ParCSRCommHandleDestroy(comm_handle);
3014
3015 if (debug_flag==4)
3016 {
3017 wall_time = time_getWallclockSeconds() - wall_time;
3018 hypre_printf("Proc = %d Interp: Comm 4 FineToCoarse = %f\n",
3019 my_id, wall_time);
3020 fflush(NULL);
3021 }*/
3022
3023 if (debug_flag==4) wall_time = time_getWallclockSeconds();
3024
3025 /*#ifdef HYPRE_USING_OPENMP
3026 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
3027 #endif
3028 for (i = 0; i < n_fine; i++) fine_to_coarse[i] -= my_first_cpt;*/
3029
3030 /*-----------------------------------------------------------------------
3031 * Loop over fine grid points.
3032 *-----------------------------------------------------------------------*/
3033
3034 #ifdef HYPRE_USING_OPENMP
3035 #pragma omp parallel for private(i,j,jl,i1,i2,jj,jj1,ns,ne,size,rest,sum,diagonal,distribute,P_marker,P_marker_offd,strong_f_marker,jj_counter,jj_counter_offd,sgn,c_num,jj_begin_row,jj_end_row,jj_begin_row_offd,jj_end_row_offd) HYPRE_SMP_SCHEDULE
3036 #endif
3037 for (jl = 0; jl < num_threads; jl++)
3038 {
3039 size = n_fine/num_threads;
3040 rest = n_fine - size*num_threads;
3041 if (jl < rest)
3042 {
3043 ns = jl*size+jl;
3044 ne = (jl+1)*size+jl+1;
3045 }
3046 else
3047 {
3048 ns = jl*size+rest;
3049 ne = (jl+1)*size+rest;
3050 }
3051 jj_counter = 0;
3052 if (jl > 0) jj_counter = jj_count[jl-1];
3053 jj_counter_offd = 0;
3054 if (jl > 0) jj_counter_offd = jj_count_offd[jl-1];
3055
3056 P_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
3057 if (num_cols_A_offd)
3058 P_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
3059 else
3060 P_marker_offd = NULL;
3061
3062 for (i = 0; i < n_fine; i++)
3063 {
3064 P_marker[i] = -1;
3065 }
3066 for (i = 0; i < num_cols_A_offd; i++)
3067 {
3068 P_marker_offd[i] = -1;
3069 }
3070 strong_f_marker = -2;
3071
3072 for (i = ns; i < ne; i++)
3073 {
3074
3075 /*--------------------------------------------------------------------
3076 * If i is a c-point, interpolation is the identity.
3077 *--------------------------------------------------------------------*/
3078
3079 if (CF_marker[i] >= 0)
3080 {
3081 P_diag_i[i] = jj_counter;
3082 P_diag_j[jj_counter] = fine_to_coarse[i];
3083 P_diag_data[jj_counter] = one;
3084 jj_counter++;
3085 }
3086
3087 /*--------------------------------------------------------------------
3088 * If i is an F-point, build interpolation.
3089 *--------------------------------------------------------------------*/
3090
3091 else
3092 {
3093 /* Diagonal part of P */
3094 P_diag_i[i] = jj_counter;
3095 jj_begin_row = jj_counter;
3096
3097 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
3098 {
3099 i1 = S_diag_j[jj];
3100
3101 /*--------------------------------------------------------------
3102 * If neighbor i1 is a C-point, set column number in P_diag_j
3103 * and initialize interpolation weight to zero.
3104 *--------------------------------------------------------------*/
3105
3106 if (CF_marker[i1] >= 0)
3107 {
3108 P_marker[i1] = jj_counter;
3109 P_diag_j[jj_counter] = fine_to_coarse[i1];
3110 P_diag_data[jj_counter] = zero;
3111 jj_counter++;
3112 }
3113
3114 /*--------------------------------------------------------------
3115 * If neighbor i1 is an F-point, mark it as a strong F-point
3116 * whose connection needs to be distributed.
3117 *--------------------------------------------------------------*/
3118
3119 else if (CF_marker[i1] != -3)
3120 {
3121 P_marker[i1] = strong_f_marker;
3122 }
3123 }
3124 jj_end_row = jj_counter;
3125
3126 /* Off-Diagonal part of P */
3127 P_offd_i[i] = jj_counter_offd;
3128 jj_begin_row_offd = jj_counter_offd;
3129
3130
3131 if (num_procs > 1)
3132 {
3133 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
3134 {
3135 i1 = S_offd_j[jj];
3136
3137 /*-----------------------------------------------------------
3138 * If neighbor i1 is a C-point, set column number in P_offd_j
3139 * and initialize interpolation weight to zero.
3140 *-----------------------------------------------------------*/
3141
3142 if (CF_marker_offd[i1] >= 0)
3143 {
3144 P_marker_offd[i1] = jj_counter_offd;
3145 /*P_offd_j[jj_counter_offd] = fine_to_coarse_offd[i1];*/
3146 P_offd_j[jj_counter_offd] = i1;
3147 P_offd_data[jj_counter_offd] = zero;
3148 jj_counter_offd++;
3149 }
3150
3151 /*-----------------------------------------------------------
3152 * If neighbor i1 is an F-point, mark it as a strong F-point
3153 * whose connection needs to be distributed.
3154 *-----------------------------------------------------------*/
3155
3156 else if (CF_marker_offd[i1] != -3)
3157 {
3158 P_marker_offd[i1] = strong_f_marker;
3159 }
3160 }
3161 }
3162
3163 jj_end_row_offd = jj_counter_offd;
3164
3165 diagonal = A_diag_data[A_diag_i[i]];
3166
3167
3168 /* Loop over ith row of A. First, the diagonal part of A */
3169
3170 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
3171 {
3172 i1 = A_diag_j[jj];
3173
3174 /*--------------------------------------------------------------
3175 * Case 1: neighbor i1 is a C-point and strongly influences i,
3176 * accumulate a_{i,i1} into the interpolation weight.
3177 *--------------------------------------------------------------*/
3178
3179 if (P_marker[i1] >= jj_begin_row)
3180 {
3181 P_diag_data[P_marker[i1]] += A_diag_data[jj];
3182 }
3183
3184 /*--------------------------------------------------------------
3185 * Case 2: neighbor i1 is an F-point and strongly influences i,
3186 * distribute a_{i,i1} to C-points that strongly infuence i.
3187 * Note: currently no distribution to the diagonal in this case.
3188
3189 HERE, we only want to distribut to points of the SAME function type
3190
3191 *--------------------------------------------------------------*/
3192
3193 else if (P_marker[i1] == strong_f_marker)
3194 {
3195 sum = zero;
3196
3197 /*-----------------------------------------------------------
3198 * Loop over row of A for point i1 and calculate the sum
3199 * of the connections to c-points that strongly influence i.
3200 *-----------------------------------------------------------*/
3201 sgn = 1;
3202 if (A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
3203 /* Diagonal block part of row i1 */
3204 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
3205 {
3206 i2 = A_diag_j[jj1];
3207 if (num_functions == 1 || dof_func[i1] == dof_func[i2])
3208 {
3209
3210 if (P_marker[i2] >= jj_begin_row &&
3211 (sgn*A_diag_data[jj1]) < 0 )
3212 {
3213 sum += A_diag_data[jj1];
3214 }
3215 }
3216
3217 }
3218
3219 /* Off-Diagonal block part of row i1 */
3220 if (num_procs > 1)
3221 {
3222 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
3223 {
3224 i2 = A_offd_j[jj1];
3225 if (num_functions == 1 || dof_func[i1] == dof_func[i2])
3226 {
3227 if (P_marker_offd[i2] >= jj_begin_row_offd
3228 && (sgn*A_offd_data[jj1]) < 0)
3229 {
3230 sum += A_offd_data[jj1];
3231 }
3232 }
3233 }
3234 }
3235
3236 if (sum != 0)
3237 {
3238 distribute = A_diag_data[jj] / sum;
3239
3240 /*-----------------------------------------------------------
3241 * Loop over row of A for point i1 and do the distribution.
3242 *-----------------------------------------------------------*/
3243
3244 /* Diagonal block part of row i1 */
3245 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
3246 {
3247 i2 = A_diag_j[jj1];
3248 if (num_functions == 1 || dof_func[i1] == dof_func[i2])
3249 {
3250 if (P_marker[i2] >= jj_begin_row
3251 && (sgn*A_diag_data[jj1]) < 0)
3252 {
3253 P_diag_data[P_marker[i2]]
3254 += distribute * A_diag_data[jj1];
3255 }
3256 }
3257
3258 }
3259
3260 /* Off-Diagonal block part of row i1 */
3261 if (num_procs > 1)
3262 {
3263 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
3264 {
3265 i2 = A_offd_j[jj1];
3266 if (num_functions == 1 || dof_func[i1] == dof_func[i2])
3267 {
3268 if (P_marker_offd[i2] >= jj_begin_row_offd
3269 && (sgn*A_offd_data[jj1]) < 0)
3270 {
3271 P_offd_data[P_marker_offd[i2]]
3272 += distribute * A_offd_data[jj1];
3273 }
3274 }
3275 }
3276
3277 }
3278 }
3279 else /* sum = 0 - only add to diag if the same function type */
3280 {
3281 if (num_functions == 1 || dof_func[i] == dof_func[i1])
3282 diagonal += A_diag_data[jj];
3283 }
3284 }
3285
3286 /*--------------------------------------------------------------
3287 * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
3288 * into the diagonal. (only if the same function type)
3289 *--------------------------------------------------------------*/
3290
3291 else if (CF_marker[i1] != -3)
3292 {
3293 if (num_functions == 1 || dof_func[i] == dof_func[i1])
3294 diagonal += A_diag_data[jj];
3295 }
3296
3297 }
3298
3299
3300 /*----------------------------------------------------------------
3301 * Still looping over ith row of A. Next, loop over the
3302 * off-diagonal part of A
3303 *---------------------------------------------------------------*/
3304
3305 if (num_procs > 1)
3306 {
3307 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
3308 {
3309 i1 = A_offd_j[jj];
3310
3311 /*--------------------------------------------------------------
3312 * Case 1: neighbor i1 is a C-point and strongly influences i,
3313 * accumulate a_{i,i1} into the interpolation weight.
3314 *--------------------------------------------------------------*/
3315
3316 if (P_marker_offd[i1] >= jj_begin_row_offd)
3317 {
3318 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
3319 }
3320
3321 /*------------------------------------------------------------
3322 * Case 2: neighbor i1 is an F-point and strongly influences i,
3323 * distribute a_{i,i1} to C-points that strongly infuence i.
3324 * Note: currently no distribution to the diagonal in this case.
3325
3326 AGAIN, we only want to distribut to points of the SAME function type
3327
3328 *-----------------------------------------------------------*/
3329
3330 else if (P_marker_offd[i1] == strong_f_marker)
3331 {
3332 sum = zero;
3333
3334 /*---------------------------------------------------------
3335 * Loop over row of A_ext for point i1 and calculate the sum
3336 * of the connections to c-points that strongly influence i.
3337 *---------------------------------------------------------*/
3338
3339 /* find row number */
3340 c_num = A_offd_j[jj];
3341
3342 sgn = 1;
3343 if (A_ext_data[A_ext_i[c_num]] < 0) sgn = -1;
3344 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
3345 {
3346 i2 = (HYPRE_Int)A_ext_j[jj1];
3347 if (num_functions == 1 || dof_func[i1] == dof_func[i2])
3348 {
3349 if (i2 > -1)
3350 {
3351 /* in the diagonal block */
3352 if (P_marker[i2] >= jj_begin_row
3353 && (sgn*A_ext_data[jj1]) < 0)
3354 {
3355 sum += A_ext_data[jj1];
3356 }
3357 }
3358 else
3359 {
3360 /* in the off_diagonal block */
3361 if (P_marker_offd[-i2-1] >= jj_begin_row_offd
3362 && (sgn*A_ext_data[jj1]) < 0)
3363 {
3364 sum += A_ext_data[jj1];
3365 }
3366 }
3367
3368 }
3369 }
3370 if (sum != 0)
3371 {
3372 distribute = A_offd_data[jj] / sum;
3373 /*---------------------------------------------------------
3374 * Loop over row of A_ext for point i1 and do
3375 * the distribution.
3376 *--------------------------------------------------------*/
3377
3378 /* Diagonal block part of row i1 */
3379
3380 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
3381 {
3382 i2 = (HYPRE_Int)A_ext_j[jj1];
3383 if (num_functions == 1 || dof_func[i1] == dof_func[i2])
3384 {
3385 if (i2 > -1) /* in the diagonal block */
3386 {
3387 if (P_marker[i2] >= jj_begin_row
3388 && (sgn*A_ext_data[jj1]) < 0)
3389 {
3390 P_diag_data[P_marker[i2]]
3391 += distribute * A_ext_data[jj1];
3392 }
3393 }
3394 else
3395 {
3396 /* in the off_diagonal block */
3397 if (P_marker_offd[-i2-1] >= jj_begin_row_offd
3398 && (sgn*A_ext_data[jj1]) < 0)
3399 P_offd_data[P_marker_offd[-i2-1]]
3400 += distribute * A_ext_data[jj1];
3401 }
3402 }
3403 }
3404 }
3405 else /* sum = 0 */
3406 {
3407 if (num_functions == 1 || dof_func[i] == dof_func_offd[i1])
3408 diagonal += A_offd_data[jj];
3409 }
3410 }
3411
3412 /*-----------------------------------------------------------
3413 * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
3414 * into the diagonal.
3415 *-----------------------------------------------------------*/
3416
3417 else if (CF_marker_offd[i1] != -3)
3418 {
3419 if (num_functions == 1 || dof_func[i] == dof_func_offd[i1])
3420 diagonal += A_offd_data[jj];
3421 }
3422
3423 }
3424 }
3425
3426 /*-----------------------------------------------------------------
3427 * Set interpolation weight by dividing by the diagonal.
3428 *-----------------------------------------------------------------*/
3429
3430 if (diagonal == 0.0)
3431 {
3432 if (print_level)
3433 hypre_printf(" Warning! zero diagonal! Proc id %d row %d\n", my_id,i);
3434 for (jj = jj_begin_row; jj < jj_end_row; jj++)
3435 {
3436 P_diag_data[jj] = 0.0;
3437 }
3438 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
3439 {
3440 P_offd_data[jj] = 0.0;
3441 }
3442 }
3443 else
3444 {
3445 for (jj = jj_begin_row; jj < jj_end_row; jj++)
3446 {
3447 P_diag_data[jj] /= -diagonal;
3448 }
3449 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
3450 {
3451 P_offd_data[jj] /= -diagonal;
3452 }
3453 }
3454 }
3455
3456 strong_f_marker--;
3457
3458 P_offd_i[i+1] = jj_counter_offd;
3459 }
3460 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
3461 hypre_TFree(P_marker_offd, HYPRE_MEMORY_HOST);
3462 }
3463
3464 P = hypre_ParCSRMatrixCreate(comm,
3465 hypre_ParCSRMatrixGlobalNumRows(A),
3466 total_global_cpts,
3467 hypre_ParCSRMatrixColStarts(A),
3468 num_cpts_global,
3469 0,
3470 P_diag_i[n_fine],
3471 P_offd_i[n_fine]);
3472
3473 P_diag = hypre_ParCSRMatrixDiag(P);
3474 hypre_CSRMatrixData(P_diag) = P_diag_data;
3475 hypre_CSRMatrixI(P_diag) = P_diag_i;
3476 hypre_CSRMatrixJ(P_diag) = P_diag_j;
3477 P_offd = hypre_ParCSRMatrixOffd(P);
3478 hypre_CSRMatrixData(P_offd) = P_offd_data;
3479 hypre_CSRMatrixI(P_offd) = P_offd_i;
3480 hypre_CSRMatrixJ(P_offd) = P_offd_j;
3481
3482 /* Compress P, removing coefficients smaller than trunc_factor * Max */
3483
3484 if (trunc_factor != 0.0 || max_elmts > 0)
3485 {
3486 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
3487 P_diag_data = hypre_CSRMatrixData(P_diag);
3488 P_diag_i = hypre_CSRMatrixI(P_diag);
3489 P_diag_j = hypre_CSRMatrixJ(P_diag);
3490 P_offd_data = hypre_CSRMatrixData(P_offd);
3491 P_offd_i = hypre_CSRMatrixI(P_offd);
3492 P_offd_j = hypre_CSRMatrixJ(P_offd);
3493 P_diag_size = P_diag_i[n_fine];
3494 P_offd_size = P_offd_i[n_fine];
3495 }
3496
3497 num_cols_P_offd = 0;
3498 if (P_offd_size)
3499 {
3500 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
3501
3502 #ifdef HYPRE_USING_OPENMP
3503 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
3504 #endif
3505 for (i=0; i < num_cols_A_offd; i++)
3506 P_marker[i] = 0;
3507
3508 num_cols_P_offd = 0;
3509 for (i=0; i < P_offd_size; i++)
3510 {
3511 index = P_offd_j[i];
3512 if (!P_marker[index])
3513 {
3514 num_cols_P_offd++;
3515 P_marker[index] = 1;
3516 }
3517 }
3518
3519 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
3520 tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
3521
3522 index = 0;
3523 for (i=0; i < num_cols_P_offd; i++)
3524 {
3525 while (P_marker[index]==0) index++;
3526 tmp_map_offd[i] = index++;
3527 }
3528
3529 #ifdef HYPRE_USING_OPENMP
3530 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
3531 #endif
3532 for (i=0; i < P_offd_size; i++)
3533 P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
3534 P_offd_j[i],
3535 num_cols_P_offd);
3536 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
3537 }
3538
3539 for (i=0; i < n_fine; i++)
3540 if (CF_marker[i] == -3) CF_marker[i] = -1;
3541
3542 if (num_cols_P_offd)
3543 {
3544 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
3545 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
3546 }
3547
3548 hypre_GetCommPkgRTFromCommPkgA(P, A, fine_to_coarse, tmp_map_offd);
3549
3550
3551 *P_ptr = P;
3552
3553 hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
3554 hypre_TFree(dof_func_offd, HYPRE_MEMORY_HOST);
3555 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
3556 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
3557 hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
3558 hypre_TFree(coarse_counter, HYPRE_MEMORY_HOST);
3559 hypre_TFree(jj_count, HYPRE_MEMORY_HOST);
3560 hypre_TFree(jj_count_offd, HYPRE_MEMORY_HOST);
3561
3562 if (num_procs > 1) hypre_CSRMatrixDestroy(A_ext);
3563
3564 return hypre_error_flag;
3565
3566 }
3567
3568 /*---------------------------------------------------------------------------
3569 * hypre_BoomerAMGTruncandBuild
3570 *--------------------------------------------------------------------------*/
3571
3572 HYPRE_Int
hypre_BoomerAMGTruncandBuild(hypre_ParCSRMatrix * P,HYPRE_Real trunc_factor,HYPRE_Int max_elmts)3573 hypre_BoomerAMGTruncandBuild( hypre_ParCSRMatrix *P,
3574 HYPRE_Real trunc_factor,
3575 HYPRE_Int max_elmts)
3576 {
3577 hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
3578 hypre_ParCSRCommPkg *commpkg_P = hypre_ParCSRMatrixCommPkg(P);
3579 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(P);
3580 HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd);
3581 HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd);
3582 HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(P_offd);
3583 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(P_offd);
3584
3585 HYPRE_BigInt *new_col_map_offd;
3586 HYPRE_Int *tmp_map_offd = NULL;
3587
3588 HYPRE_Int P_offd_size=0, new_num_cols_offd;
3589
3590 HYPRE_Int *P_marker;
3591
3592 HYPRE_Int i;
3593
3594 HYPRE_Int index;
3595
3596 /* Compress P, removing coefficients smaller than trunc_factor * Max */
3597
3598 if (trunc_factor != 0.0 || max_elmts > 0)
3599 {
3600 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
3601 P_offd_j = hypre_CSRMatrixJ(P_offd);
3602 P_offd_i = hypre_CSRMatrixI(P_offd);
3603 P_offd_size = P_offd_i[n_fine];
3604 }
3605
3606 new_num_cols_offd = 0;
3607 if (P_offd_size)
3608 {
3609 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST);
3610
3611 /*#define HYPRE_SMP_PRIVATE i
3612 #include "../utilities/hypre_smp_forloop.h"*/
3613 for (i=0; i < num_cols_offd; i++)
3614 P_marker[i] = 0;
3615
3616 for (i=0; i < P_offd_size; i++)
3617 {
3618 index = P_offd_j[i];
3619 if (!P_marker[index])
3620 {
3621 new_num_cols_offd++;
3622 P_marker[index] = 1;
3623 }
3624 }
3625
3626 tmp_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd, HYPRE_MEMORY_HOST);
3627 new_col_map_offd = hypre_CTAlloc(HYPRE_BigInt, new_num_cols_offd, HYPRE_MEMORY_HOST);
3628
3629 index = 0;
3630 for (i=0; i < new_num_cols_offd; i++)
3631 {
3632 while (P_marker[index]==0) index++;
3633 tmp_map_offd[i] = index++;
3634 }
3635
3636 /*#define HYPRE_SMP_PRIVATE i
3637 #include "../utilities/hypre_smp_forloop.h"*/
3638 for (i=0; i < P_offd_size; i++)
3639 P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
3640 P_offd_j[i],
3641 new_num_cols_offd);
3642 }
3643
3644 index = 0;
3645 for (i = 0; i < new_num_cols_offd; i++)
3646 {
3647 while (P_marker[index] == 0) index++;
3648
3649 new_col_map_offd[i] = col_map_offd[index];
3650 index++;
3651 }
3652
3653 if (P_offd_size) hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
3654
3655 if (new_num_cols_offd)
3656 {
3657 hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
3658 hypre_TFree(col_map_offd, HYPRE_MEMORY_HOST);
3659 hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
3660 hypre_CSRMatrixNumCols(P_offd) = new_num_cols_offd;
3661 }
3662
3663 if (commpkg_P != NULL) hypre_MatvecCommPkgDestroy(commpkg_P);
3664 hypre_MatvecCommPkgCreate(P);
3665
3666 return hypre_error_flag;
3667
3668 }
3669
hypre_CreateC(hypre_ParCSRMatrix * A,HYPRE_Real w)3670 hypre_ParCSRMatrix *hypre_CreateC( hypre_ParCSRMatrix *A,
3671 HYPRE_Real w)
3672 {
3673 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
3674
3675 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
3676
3677 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
3678 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
3679 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
3680
3681 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
3682
3683 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
3684 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
3685 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
3686
3687 HYPRE_BigInt *row_starts = hypre_ParCSRMatrixRowStarts(A);
3688 HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
3689 HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
3690 HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
3691 HYPRE_BigInt global_num_rows = hypre_ParCSRMatrixGlobalNumRows(A);
3692
3693 hypre_ParCSRMatrix *C;
3694 hypre_CSRMatrix *C_diag;
3695 hypre_CSRMatrix *C_offd;
3696
3697 HYPRE_Real *C_diag_data;
3698 HYPRE_Int *C_diag_i;
3699 HYPRE_Int *C_diag_j;
3700
3701 HYPRE_Real *C_offd_data;
3702 HYPRE_Int *C_offd_i;
3703 HYPRE_Int *C_offd_j;
3704 HYPRE_BigInt *col_map_offd_C;
3705
3706 HYPRE_Int i, j, index;
3707 HYPRE_Real invdiag;
3708 HYPRE_Real w_local = w;
3709
3710 C = hypre_ParCSRMatrixCreate(comm, global_num_rows, global_num_rows, row_starts,
3711 row_starts, num_cols_offd, A_diag_i[num_rows], A_offd_i[num_rows]);
3712
3713 hypre_ParCSRMatrixInitialize(C);
3714
3715 C_diag = hypre_ParCSRMatrixDiag(C);
3716 C_offd = hypre_ParCSRMatrixOffd(C);
3717
3718 C_diag_i = hypre_CSRMatrixI(C_diag);
3719 C_diag_j = hypre_CSRMatrixJ(C_diag);
3720 C_diag_data = hypre_CSRMatrixData(C_diag);
3721
3722 C_offd_i = hypre_CSRMatrixI(C_offd);
3723 C_offd_j = hypre_CSRMatrixJ(C_offd);
3724 C_offd_data = hypre_CSRMatrixData(C_offd);
3725
3726 col_map_offd_C = hypre_ParCSRMatrixColMapOffd(C);
3727
3728 for (i = 0; i < num_cols_offd; i++)
3729 {
3730 col_map_offd_C[i] = col_map_offd_A[i];
3731 }
3732
3733 for (i = 0; i < num_rows; i++)
3734 {
3735 index = A_diag_i[i];
3736 invdiag = -w/A_diag_data[index];
3737 C_diag_data[index] = 1.0-w;
3738 C_diag_j[index] = A_diag_j[index];
3739 if (w == 0)
3740 {
3741 w_local = fabs(A_diag_data[index]);
3742 for (j = index+1; j < A_diag_i[i+1]; j++)
3743 w_local += fabs(A_diag_data[j]);
3744 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
3745 w_local += fabs(A_offd_data[j]);
3746 invdiag = -1/w_local;
3747 C_diag_data[index] = 1.0-A_diag_data[index]/w_local;
3748 }
3749 C_diag_i[i] = index;
3750 C_offd_i[i] = A_offd_i[i];
3751 for (j = index+1; j < A_diag_i[i+1]; j++)
3752 {
3753 C_diag_data[j] = A_diag_data[j]*invdiag;
3754 C_diag_j[j] = A_diag_j[j];
3755 }
3756 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
3757 {
3758 C_offd_data[j] = A_offd_data[j]*invdiag;
3759 C_offd_j[j] = A_offd_j[j];
3760 }
3761 }
3762 C_diag_i[num_rows] = A_diag_i[num_rows];
3763 C_offd_i[num_rows] = A_offd_i[num_rows];
3764
3765 return C;
3766 }
3767
3768 /* RL */
3769 HYPRE_Int
hypre_BoomerAMGBuildInterpOnePntHost(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,hypre_ParCSRMatrix ** P_ptr)3770 hypre_BoomerAMGBuildInterpOnePntHost( hypre_ParCSRMatrix *A,
3771 HYPRE_Int *CF_marker,
3772 hypre_ParCSRMatrix *S,
3773 HYPRE_BigInt *num_cpts_global,
3774 HYPRE_Int num_functions,
3775 HYPRE_Int *dof_func,
3776 HYPRE_Int debug_flag,
3777 hypre_ParCSRMatrix **P_ptr)
3778 {
3779 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
3780 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3781 hypre_ParCSRCommHandle *comm_handle;
3782
3783 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
3784 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
3785 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
3786 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
3787
3788 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
3789 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
3790 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
3791 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
3792
3793 HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
3794 //HYPRE_Int *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
3795
3796 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
3797 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
3798 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
3799
3800 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
3801 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
3802 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
3803
3804 /* Interpolation matrix P */
3805 hypre_ParCSRMatrix *P;
3806 /* csr's */
3807 hypre_CSRMatrix *P_diag;
3808 hypre_CSRMatrix *P_offd;
3809 /* arrays */
3810 HYPRE_Real *P_diag_data;
3811 HYPRE_Int *P_diag_i;
3812 HYPRE_Int *P_diag_j;
3813 HYPRE_Real *P_offd_data;
3814 HYPRE_Int *P_offd_i;
3815 HYPRE_Int *P_offd_j;
3816 HYPRE_Int num_cols_offd_P;
3817 HYPRE_Int *tmp_map_offd = NULL;
3818 HYPRE_BigInt *col_map_offd_P = NULL;
3819 /* CF marker off-diag part */
3820 HYPRE_Int *CF_marker_offd = NULL;
3821 /* func type off-diag part */
3822 HYPRE_Int *dof_func_offd = NULL;
3823 /* nnz */
3824 HYPRE_Int nnz_diag, nnz_offd, cnt_diag, cnt_offd;
3825 HYPRE_Int *marker_diag, *marker_offd = NULL;
3826 /* local size */
3827 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
3828 /* number of C-pts */
3829 HYPRE_Int n_cpts = 0;
3830 /* fine to coarse mapping: diag part and offd part */
3831 HYPRE_Int *fine_to_coarse;
3832 HYPRE_BigInt *fine_to_coarse_offd = NULL;
3833 HYPRE_BigInt total_global_cpts, my_first_cpt;
3834 HYPRE_Int my_id, num_procs;
3835 HYPRE_Int num_sends;
3836 HYPRE_Int *int_buf_data = NULL;
3837 HYPRE_BigInt *big_int_buf_data = NULL;
3838 //HYPRE_Int col_start = hypre_ParCSRMatrixFirstRowIndex(A);
3839 //HYPRE_Int col_end = col_start + n_fine;
3840
3841 HYPRE_Int i, j, i1, j1, k1, index, start;
3842 HYPRE_Int *max_abs_cij;
3843 char *max_abs_diag_offd;
3844 HYPRE_Real max_abs_aij, vv;
3845
3846 hypre_MPI_Comm_size(comm, &num_procs);
3847 hypre_MPI_Comm_rank(comm,&my_id);
3848
3849 my_first_cpt = num_cpts_global[0];
3850 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
3851 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
3852
3853 /*-------------------------------------------------------------------
3854 * Get the CF_marker data for the off-processor columns
3855 *-------------------------------------------------------------------*/
3856 /* CF marker for the off-diag columns */
3857 if (num_cols_A_offd)
3858 {
3859 CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd,HYPRE_MEMORY_HOST);
3860 }
3861 /* function type indicator for the off-diag columns */
3862 if (num_functions > 1 && num_cols_A_offd)
3863 {
3864 dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd,HYPRE_MEMORY_HOST);
3865 }
3866 /* if CommPkg of A is not present, create it */
3867 if (!comm_pkg)
3868 {
3869 hypre_MatvecCommPkgCreate(A);
3870 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3871 }
3872 /* number of sends to do (number of procs) */
3873 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
3874 /* send buffer, of size send_map_starts[num_sends]),
3875 * i.e., number of entries to send */
3876 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),HYPRE_MEMORY_HOST);
3877
3878 /* copy CF markers of elements to send to buffer
3879 * RL: why copy them with two for loops? Why not just loop through all in one */
3880 index = 0;
3881 for (i = 0; i < num_sends; i++)
3882 {
3883 /* start pos of elements sent to send_proc[i] */
3884 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
3885 /* loop through all elems to send_proc[i] */
3886 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
3887 {
3888 /* CF marker of send_map_elemts[j] */
3889 int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
3890 }
3891 }
3892 /* create a handle to start communication. 11: for integer */
3893 comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, CF_marker_offd);
3894 /* destroy the handle to finish communication */
3895 hypre_ParCSRCommHandleDestroy(comm_handle);
3896
3897 /* do a similar communication for dof_func */
3898 if (num_functions > 1)
3899 {
3900 index = 0;
3901 for (i = 0; i < num_sends; i++)
3902 {
3903 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
3904 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
3905 {
3906 int_buf_data[index++] = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
3907 }
3908 }
3909 comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, dof_func_offd);
3910 hypre_ParCSRCommHandleDestroy(comm_handle);
3911 }
3912
3913 hypre_TFree(int_buf_data,HYPRE_MEMORY_HOST);
3914 /*-----------------------------------------------------------------------
3915 * First Pass: Determine size of P and fill in fine_to_coarse mapping,
3916 * and find the most strongly influencing C-pt for each F-pt
3917 *-----------------------------------------------------------------------*/
3918 /* nnz in diag and offd parts */
3919 cnt_diag = 0;
3920 cnt_offd = 0;
3921 max_abs_cij = hypre_CTAlloc(HYPRE_Int, n_fine,HYPRE_MEMORY_HOST);
3922 max_abs_diag_offd = hypre_CTAlloc(char, n_fine,HYPRE_MEMORY_HOST);
3923 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine,HYPRE_MEMORY_HOST);
3924
3925 /* markers initialized as zeros */
3926 marker_diag = hypre_CTAlloc(HYPRE_Int, n_fine,HYPRE_MEMORY_HOST);
3927 marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd,HYPRE_MEMORY_HOST);
3928
3929 for (i = 0; i < n_fine; i++)
3930 {
3931 /*--------------------------------------------------------------------
3932 * If i is a C-point, interpolation is the identity. Also set up
3933 * mapping vector.
3934 *--------------------------------------------------------------------*/
3935 if (CF_marker[i] >= 0)
3936 {
3937 //fine_to_coarse[i] = my_first_cpt + n_cpts;
3938 fine_to_coarse[i] = n_cpts;
3939 n_cpts++;
3940 continue;
3941 }
3942
3943 /* mark all the strong connections: in S */
3944 HYPRE_Int MARK = i + 1;
3945 /* loop through row i of S, diag part */
3946 for (j = S_diag_i[i]; j < S_diag_i[i+1]; j++)
3947 {
3948 marker_diag[S_diag_j[j]] = MARK;
3949 }
3950 /* loop through row i of S, offd part */
3951 if (num_procs > 1)
3952 {
3953 for (j = S_offd_i[i]; j < S_offd_i[i+1]; j++)
3954 {
3955 j1 = S_offd_j[j];
3956 marker_offd[j1] = MARK;
3957 }
3958 }
3959
3960 fine_to_coarse[i] = -1;
3961 /*---------------------------------------------------------------------------
3962 * If i is an F-pt, interpolation is from the most strongly influencing C-pt
3963 * Find this C-pt and save it
3964 *--------------------------------------------------------------------------*/
3965 /* if we failed to find any strong C-pt, mark this point as an 'n' */
3966 char marker = 'n';
3967 /* max abs val */
3968 max_abs_aij = -1.0;
3969 /* loop through row i of A, diag part */
3970 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
3971 {
3972 i1 = A_diag_j[j];
3973 vv = fabs(A_diag_data[j]);
3974 #if 0
3975 /* !!! this is a hack just for code verification purpose !!!
3976 it basically says:
3977 1. if we see |a_ij| < 1e-14, force it to be 1e-14
3978 2. if we see |a_ij| == the max(|a_ij|) so far exactly,
3979 replace it if the j idx is smaller
3980 Reasons:
3981 1. numerical round-off for eps-level values
3982 2. entries in CSR rows may be listed in different orders
3983 */
3984 vv = vv < 1e-14 ? 1e-14 : vv;
3985 if (CF_marker[i1] >= 0 && marker_diag[i1] == MARK &&
3986 vv == max_abs_aij && i1 < max_abs_cij[i])
3987 {
3988 /* mark it as a 'd' */
3989 marker = 'd';
3990 max_abs_cij[i] = i1;
3991 max_abs_aij = vv;
3992 continue;
3993 }
3994 #endif
3995 /* it is a strong C-pt and has abs val larger than what have seen */
3996 if (CF_marker[i1] >= 0 && marker_diag[i1] == MARK && vv > max_abs_aij)
3997 {
3998 /* mark it as a 'd' */
3999 marker = 'd';
4000 max_abs_cij[i] = i1;
4001 max_abs_aij = vv;
4002 }
4003 }
4004 /* offd part */
4005 if (num_procs > 1)
4006 {
4007 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
4008 {
4009 i1 = A_offd_j[j];
4010 vv = fabs(A_offd_data[j]);
4011 if (CF_marker_offd[i1] >= 0 && marker_offd[i1] == MARK && vv > max_abs_aij)
4012 {
4013 /* mark it as an 'o' */
4014 marker = 'o';
4015 max_abs_cij[i] = i1;
4016 max_abs_aij = vv;
4017 }
4018 }
4019 }
4020
4021 max_abs_diag_offd[i] = marker;
4022
4023 if (marker == 'd')
4024 {
4025 cnt_diag ++;
4026 }
4027 else if (marker == 'o')
4028 {
4029 cnt_offd ++;
4030 }
4031 }
4032
4033 nnz_diag = cnt_diag + n_cpts;
4034 nnz_offd = cnt_offd;
4035
4036 /*------------- allocate arrays */
4037 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1,HYPRE_MEMORY_DEVICE);
4038 P_diag_j = hypre_CTAlloc(HYPRE_Int, nnz_diag,HYPRE_MEMORY_DEVICE);
4039 P_diag_data = hypre_CTAlloc(HYPRE_Real, nnz_diag,HYPRE_MEMORY_DEVICE);
4040
4041 /* not in ``if num_procs > 1'',
4042 * allocation needed even for empty CSR */
4043 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1,HYPRE_MEMORY_DEVICE);
4044 P_offd_j = hypre_CTAlloc(HYPRE_Int, nnz_offd,HYPRE_MEMORY_DEVICE);
4045 P_offd_data = hypre_CTAlloc(HYPRE_Real, nnz_offd,HYPRE_MEMORY_DEVICE);
4046
4047 /* redundant */
4048 P_diag_i[0] = 0;
4049 P_offd_i[0] = 0;
4050
4051 /* reset counters */
4052 cnt_diag = 0;
4053 cnt_offd = 0;
4054
4055 /*-----------------------------------------------------------------------
4056 * Send and receive fine_to_coarse info.
4057 *-----------------------------------------------------------------------*/
4058 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_A_offd,HYPRE_MEMORY_HOST);
4059 big_int_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),HYPRE_MEMORY_HOST);
4060 index = 0;
4061 for (i = 0; i < num_sends; i++)
4062 {
4063 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
4064 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
4065 {
4066 big_int_buf_data[index++] = my_first_cpt
4067 +(HYPRE_BigInt)fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
4068 }
4069 }
4070 comm_handle = hypre_ParCSRCommHandleCreate(21, comm_pkg, big_int_buf_data, fine_to_coarse_offd);
4071 hypre_ParCSRCommHandleDestroy(comm_handle);
4072
4073 /*-----------------------------------------------------------------------
4074 * Second Pass: Populate P
4075 *-----------------------------------------------------------------------*/
4076 for (i = 0; i < n_fine; i++)
4077 {
4078 if (CF_marker[i] >= 0)
4079 {
4080 /*--------------------------------------------------------------------
4081 * If i is a C-point, interpolation is the identity.
4082 *--------------------------------------------------------------------*/
4083 //P_diag_j[cnt_diag] = fine_to_coarse[i] - my_first_cpt;
4084 P_diag_j[cnt_diag] = fine_to_coarse[i];
4085 P_diag_data[cnt_diag++] = 1.0;
4086 }
4087 else
4088 {
4089 /*---------------------------------------------------------------------------
4090 * If i is an F-pt, interpolation is from the most strongly influencing C-pt
4091 *--------------------------------------------------------------------------*/
4092 if (max_abs_diag_offd[i] == 'd')
4093 {
4094 /* on diag part of P */
4095 j = max_abs_cij[i];
4096 //P_diag_j[cnt_diag] = fine_to_coarse[j] - my_first_cpt;
4097 P_diag_j[cnt_diag] = fine_to_coarse[j];
4098 P_diag_data[cnt_diag++] = 1.0;
4099 }
4100 else if (max_abs_diag_offd[i] == 'o')
4101 {
4102 /* on offd part of P */
4103 j = max_abs_cij[i];
4104 P_offd_j[cnt_offd] = j;
4105 P_offd_data[cnt_offd++] = 1.0;
4106 }
4107 }
4108
4109 P_diag_i[i+1] = cnt_diag;
4110 P_offd_i[i+1] = cnt_offd;
4111 }
4112
4113 hypre_assert(cnt_diag == nnz_diag);
4114 hypre_assert(cnt_offd == nnz_offd);
4115
4116 /* num of cols in the offd part of P */
4117 num_cols_offd_P = 0;
4118
4119 /* marker_offd: all -1 */
4120 for (i = 0; i < num_cols_A_offd; i++)
4121 {
4122 marker_offd[i] = -1;
4123 }
4124 for (i = 0; i < nnz_offd; i++)
4125 {
4126 i1 = P_offd_j[i];
4127 if (marker_offd[i1] == -1)
4128 {
4129 num_cols_offd_P++;
4130 marker_offd[i1] = 1;
4131 }
4132 }
4133
4134 /* col_map_offd_P: the col indices of the offd of P
4135 * we first keep them be the offd-idx of A */
4136 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd_P,HYPRE_MEMORY_HOST);
4137 tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_P,HYPRE_MEMORY_HOST);
4138 for (i = 0, i1 = 0; i < num_cols_A_offd; i++)
4139 {
4140 if (marker_offd[i] == 1)
4141 {
4142 tmp_map_offd[i1++] = i;
4143 }
4144 }
4145 hypre_assert(i1 == num_cols_offd_P);
4146
4147 /* now, adjust P_offd_j to local idx w.r.t col_map_offd_R
4148 * by searching */
4149 for (i = 0; i < nnz_offd; i++)
4150 {
4151 i1 = P_offd_j[i];
4152 k1 = hypre_BinarySearch(tmp_map_offd, i1, num_cols_offd_P);
4153 /* search must succeed */
4154 hypre_assert(k1 >= 0 && k1 < num_cols_offd_P);
4155 P_offd_j[i] = k1;
4156 }
4157
4158 /* change col_map_offd_P to global coarse ids */
4159 for (i = 0; i < num_cols_offd_P; i++)
4160 {
4161 col_map_offd_P[i] = fine_to_coarse_offd[tmp_map_offd[i]];
4162 }
4163
4164 /* Now, we should have everything of Parcsr matrix P */
4165 P = hypre_ParCSRMatrixCreate(comm,
4166 hypre_ParCSRMatrixGlobalNumCols(A), /* global num of rows */
4167 total_global_cpts, /* global num of cols */
4168 hypre_ParCSRMatrixColStarts(A), /* row_starts */
4169 num_cpts_global, /* col_starts */
4170 num_cols_offd_P, /* num cols offd */
4171 nnz_diag,
4172 nnz_offd);
4173
4174 P_diag = hypre_ParCSRMatrixDiag(P);
4175 hypre_CSRMatrixData(P_diag) = P_diag_data;
4176 hypre_CSRMatrixI(P_diag) = P_diag_i;
4177 hypre_CSRMatrixJ(P_diag) = P_diag_j;
4178
4179 P_offd = hypre_ParCSRMatrixOffd(P);
4180 hypre_CSRMatrixData(P_offd) = P_offd_data;
4181 hypre_CSRMatrixI(P_offd) = P_offd_i;
4182 hypre_CSRMatrixJ(P_offd) = P_offd_j;
4183
4184 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
4185
4186 /* create CommPkg of P */
4187 hypre_MatvecCommPkgCreate(P);
4188
4189 *P_ptr = P;
4190
4191 /* free workspace */
4192 hypre_TFree(CF_marker_offd,HYPRE_MEMORY_HOST);
4193 hypre_TFree(dof_func_offd,HYPRE_MEMORY_HOST);
4194 hypre_TFree(tmp_map_offd,HYPRE_MEMORY_HOST);
4195 hypre_TFree(big_int_buf_data,HYPRE_MEMORY_HOST);
4196 hypre_TFree(fine_to_coarse,HYPRE_MEMORY_HOST);
4197 hypre_TFree(fine_to_coarse_offd,HYPRE_MEMORY_HOST);
4198 hypre_TFree(marker_diag,HYPRE_MEMORY_HOST);
4199 hypre_TFree(marker_offd,HYPRE_MEMORY_HOST);
4200 hypre_TFree(max_abs_cij,HYPRE_MEMORY_HOST);
4201 hypre_TFree(max_abs_diag_offd,HYPRE_MEMORY_HOST);
4202
4203 return hypre_error_flag;
4204 }
4205
4206 HYPRE_Int
hypre_BoomerAMGBuildInterpOnePnt(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,hypre_ParCSRMatrix ** P_ptr)4207 hypre_BoomerAMGBuildInterpOnePnt( hypre_ParCSRMatrix *A,
4208 HYPRE_Int *CF_marker,
4209 hypre_ParCSRMatrix *S,
4210 HYPRE_BigInt *num_cpts_global,
4211 HYPRE_Int num_functions,
4212 HYPRE_Int *dof_func,
4213 HYPRE_Int debug_flag,
4214 hypre_ParCSRMatrix **P_ptr)
4215 {
4216 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
4217 hypre_GpuProfilingPushRange("OnePntInterp");
4218 #endif
4219
4220 HYPRE_Int ierr = 0;
4221
4222 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
4223 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
4224
4225 if (exec == HYPRE_EXEC_DEVICE)
4226 {
4227 ierr = hypre_BoomerAMGBuildInterpOnePntDevice(A,CF_marker,S,num_cpts_global,num_functions,
4228 dof_func,debug_flag,P_ptr);
4229 }
4230 else
4231 #endif
4232 {
4233 ierr = hypre_BoomerAMGBuildInterpOnePntHost(A,CF_marker,S,num_cpts_global,num_functions,
4234 dof_func,debug_flag,P_ptr);
4235 }
4236
4237 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
4238 hypre_GpuProfilingPopRange();
4239 #endif
4240
4241 return ierr;
4242 }
4243