1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "_hypre_parcsr_block_mv.h"
9
10 /*---------------------------------------------------------------------------
11 * hypre_BoomerAMGBlockBuildInterp
12
13 This is the block version of classical R-S interpolation. We use the complete
14 blocks of A (not just the diagonals of these blocks).
15
16 A and P are now Block matrices. The Strength matrix S is not as it gives
17 nodal strengths.
18
19 CF_marker is size number of nodes.
20
21 add_weak_to_diag 0 = don't add weak connections to diag (distribute instead)
22 1 = do add
23
24 *--------------------------------------------------------------------------*/
25
26 HYPRE_Int
hypre_BoomerAMGBuildBlockInterp(hypre_ParCSRBlockMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,HYPRE_Int add_weak_to_diag,hypre_ParCSRBlockMatrix ** P_ptr)27 hypre_BoomerAMGBuildBlockInterp( hypre_ParCSRBlockMatrix *A,
28 HYPRE_Int *CF_marker,
29 hypre_ParCSRMatrix *S,
30 HYPRE_BigInt *num_cpts_global,
31 HYPRE_Int num_functions,
32 HYPRE_Int *dof_func,
33 HYPRE_Int debug_flag,
34 HYPRE_Real trunc_factor,
35 HYPRE_Int max_elmts,
36 HYPRE_Int add_weak_to_diag,
37 hypre_ParCSRBlockMatrix **P_ptr )
38 {
39
40 MPI_Comm comm = hypre_ParCSRBlockMatrixComm(A);
41 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
42 hypre_ParCSRCommHandle *comm_handle;
43
44 hypre_CSRBlockMatrix *A_diag = hypre_ParCSRBlockMatrixDiag(A);
45 HYPRE_Real *A_diag_data = hypre_CSRBlockMatrixData(A_diag);
46 HYPRE_Int *A_diag_i = hypre_CSRBlockMatrixI(A_diag);
47 HYPRE_Int *A_diag_j = hypre_CSRBlockMatrixJ(A_diag);
48
49 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(A_diag);
50 HYPRE_Int bnnz = block_size*block_size;
51
52 hypre_CSRBlockMatrix *A_offd = hypre_ParCSRBlockMatrixOffd(A);
53 HYPRE_Real *A_offd_data = hypre_CSRBlockMatrixData(A_offd);
54 HYPRE_Int *A_offd_i = hypre_CSRBlockMatrixI(A_offd);
55 HYPRE_Int *A_offd_j = hypre_CSRBlockMatrixJ(A_offd);
56 HYPRE_Int num_cols_A_offd = hypre_CSRBlockMatrixNumCols(A_offd);
57 HYPRE_BigInt *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(A);
58
59 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
60 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
61 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
62
63 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
64 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
65 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
66
67 hypre_ParCSRBlockMatrix *P;
68 HYPRE_BigInt *col_map_offd_P;
69 HYPRE_Int *tmp_map_offd = NULL;
70
71 HYPRE_Int *CF_marker_offd = NULL;
72
73 hypre_CSRBlockMatrix *A_ext;
74
75 HYPRE_Real *A_ext_data = NULL;
76 HYPRE_Int *A_ext_i = NULL;
77 HYPRE_BigInt *A_ext_j = NULL;
78
79 hypre_CSRBlockMatrix *P_diag;
80 hypre_CSRBlockMatrix *P_offd;
81
82 HYPRE_Real *P_diag_data;
83 HYPRE_Int *P_diag_i;
84 HYPRE_Int *P_diag_j;
85 HYPRE_Real *P_offd_data;
86 HYPRE_Int *P_offd_i;
87 HYPRE_Int *P_offd_j;
88
89 HYPRE_Int P_diag_size, P_offd_size;
90
91 HYPRE_Int *P_marker, *P_marker_offd;
92
93 HYPRE_Int jj_counter,jj_counter_offd;
94 HYPRE_Int *jj_count, *jj_count_offd = NULL;
95 HYPRE_Int jj_begin_row,jj_begin_row_offd;
96 HYPRE_Int jj_end_row,jj_end_row_offd;
97
98 HYPRE_Int start_indexing = 0; /* start indexing for P_data at 0 */
99
100 HYPRE_Int n_fine = hypre_CSRBlockMatrixNumRows(A_diag);
101
102 HYPRE_Int strong_f_marker;
103
104 HYPRE_Int *fine_to_coarse;
105 HYPRE_BigInt *fine_to_coarse_offd = NULL;
106 HYPRE_Int *coarse_counter;
107 HYPRE_Int coarse_shift;
108 HYPRE_BigInt total_global_cpts, my_first_cpt;
109 HYPRE_Int num_cols_P_offd;
110
111 HYPRE_Int bd;
112
113 HYPRE_Int i,i1,i2;
114 HYPRE_Int j,jl,jj,jj1;
115 HYPRE_Int kc;
116 HYPRE_BigInt big_k;
117 HYPRE_Int start;
118
119 HYPRE_Int c_num;
120
121 HYPRE_Int my_id;
122 HYPRE_Int num_procs;
123 HYPRE_Int num_threads;
124 HYPRE_Int num_sends;
125 HYPRE_Int index;
126 HYPRE_Int ns, ne, size, rest;
127 HYPRE_Int *int_buf_data = NULL;
128 HYPRE_BigInt *big_buf_data = NULL;
129
130 HYPRE_BigInt col_1 = hypre_ParCSRBlockMatrixFirstRowIndex(A);
131 HYPRE_Int local_numrows = hypre_CSRBlockMatrixNumRows(A_diag);
132 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt)local_numrows;
133
134 HYPRE_Real wall_time; /* for debugging instrumentation */
135
136 HYPRE_Real *identity_block;
137 HYPRE_Real *zero_block;
138 HYPRE_Real *diagonal_block;
139 HYPRE_Real *sum_block;
140 HYPRE_Real *distribute_block;
141
142 hypre_MPI_Comm_size(comm, &num_procs);
143 hypre_MPI_Comm_rank(comm,&my_id);
144 /* num_threads = hypre_NumThreads(); */
145 num_threads = 1;
146
147 my_first_cpt = num_cpts_global[0];
148 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
149 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
150
151 /*-------------------------------------------------------------------
152 * Get the CF_marker data for the off-processor columns
153 *-------------------------------------------------------------------*/
154
155 if (debug_flag==4) wall_time = time_getWallclockSeconds();
156
157 CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
158
159
160 if (!comm_pkg)
161 {
162 hypre_BlockMatvecCommPkgCreate(A);
163 comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
164 }
165
166 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
167 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
168 num_sends), HYPRE_MEMORY_HOST);
169
170 index = 0;
171 for (i = 0; i < num_sends; i++)
172 {
173 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
174 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
175 {
176 int_buf_data[index++]
177 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
178 }
179
180 }
181
182 /* we do not need the block version of comm handle - because
183 CF_marker corresponds to the nodal matrix. This call populates
184 CF_marker_offd */
185 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
186 CF_marker_offd);
187
188 hypre_ParCSRCommHandleDestroy(comm_handle);
189
190
191 if (debug_flag==4)
192 {
193 wall_time = time_getWallclockSeconds() - wall_time;
194 hypre_printf("Proc = %d Interp: Comm 1 CF_marker = %f\n",
195 my_id, wall_time);
196 fflush(NULL);
197 }
198
199 /*----------------------------------------------------------------------
200 * Get the ghost rows of A
201 *---------------------------------------------------------------------*/
202
203 if (debug_flag==4) wall_time = time_getWallclockSeconds();
204
205 if (num_procs > 1)
206 {
207 A_ext = hypre_ParCSRBlockMatrixExtractBExt(A, A, 1);
208 A_ext_i = hypre_CSRBlockMatrixI(A_ext);
209 A_ext_j = hypre_CSRBlockMatrixBigJ(A_ext);
210 A_ext_data = hypre_CSRBlockMatrixData(A_ext);
211 }
212
213 index = 0;
214 for (i=0; i < num_cols_A_offd; i++)
215 {
216 for (j=A_ext_i[i]; j < A_ext_i[i+1]; j++)
217 {
218 big_k = A_ext_j[j];
219 if (big_k >= col_1 && big_k < col_n)
220 {
221 A_ext_j[index] = big_k - col_1;
222 /* for the data field we must get all of the block data */
223 for (bd = 0; bd < bnnz; bd++)
224 {
225 A_ext_data[index*bnnz + bd] = A_ext_data[j*bnnz + bd];
226 }
227 index++;
228 }
229 else
230 {
231 kc = hypre_BigBinarySearch(col_map_offd, big_k, num_cols_A_offd);
232 if (kc > -1)
233 {
234 A_ext_j[index] = (HYPRE_BigInt)(-kc-1);
235 for (bd = 0; bd < bnnz; bd++)
236 {
237 A_ext_data[index*bnnz + bd] = A_ext_data[j*bnnz + bd];
238 }
239 index++;
240 }
241 }
242 }
243 A_ext_i[i] = index;
244 }
245 for (i = num_cols_A_offd; i > 0; i--)
246 A_ext_i[i] = A_ext_i[i-1];
247 if (num_procs > 1) A_ext_i[0] = 0;
248
249 if (debug_flag==4)
250 {
251 wall_time = time_getWallclockSeconds() - wall_time;
252 hypre_printf("Proc = %d Interp: Comm 2 Get A_ext = %f\n",
253 my_id, wall_time);
254 fflush(NULL);
255 }
256
257
258 /*-----------------------------------------------------------------------
259 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
260 *-----------------------------------------------------------------------*/
261
262 /*-----------------------------------------------------------------------
263 * Intialize counters and allocate mapping vector.
264 *-----------------------------------------------------------------------*/
265
266 coarse_counter = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
267 jj_count = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
268 jj_count_offd = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
269
270 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
271
272 for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1;
273
274 jj_counter = start_indexing;
275 jj_counter_offd = start_indexing;
276
277 /*-----------------------------------------------------------------------
278 * Loop over fine grid.
279 *-----------------------------------------------------------------------*/
280
281
282 for (j = 0; j < num_threads; j++)
283 {
284 size = n_fine/num_threads;
285 rest = n_fine - size*num_threads;
286 if (j < rest)
287 {
288 ns = j*size+j;
289 ne = (j+1)*size+j+1;
290 }
291 else
292 {
293 ns = j*size+rest;
294 ne = (j+1)*size+rest;
295 }
296
297
298 /* loop over the fine grid points */
299 for (i = ns; i < ne; i++)
300 {
301
302 /*--------------------------------------------------------------------
303 * If i is a C-point, interpolation is the identity. Also set up
304 * mapping vector (fine_to_coarse is the mapping vector).
305 *--------------------------------------------------------------------*/
306
307 if (CF_marker[i] >= 0)
308 {
309 jj_count[j]++;
310 fine_to_coarse[i] = coarse_counter[j];
311 coarse_counter[j]++;
312 }
313
314 /*--------------------------------------------------------------------
315 * If i is an F-point, interpolation is from the C-points that
316 * strongly influence i.
317 *--------------------------------------------------------------------*/
318
319 else
320 {
321 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
322 {
323 i1 = S_diag_j[jj];
324 if (CF_marker[i1] >= 0)
325 {
326 jj_count[j]++;
327 }
328 }
329
330 if (num_procs > 1)
331 {
332 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
333 {
334 i1 = S_offd_j[jj];
335 if (CF_marker_offd[i1] >= 0)
336 {
337 jj_count_offd[j]++;
338 }
339 }
340 }
341 }
342 }
343 }
344
345 /*-----------------------------------------------------------------------
346 * Allocate arrays.
347 *-----------------------------------------------------------------------*/
348
349 for (i=0; i < num_threads-1; i++)
350 {
351 coarse_counter[i+1] += coarse_counter[i];
352 jj_count[i+1] += jj_count[i];
353 jj_count_offd[i+1] += jj_count_offd[i];
354 }
355 i = num_threads-1;
356 jj_counter = jj_count[i];
357 jj_counter_offd = jj_count_offd[i];
358
359 P_diag_size = jj_counter;
360
361 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
362 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
363 /* we need to include the size of the blocks in the data size */
364 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size*bnnz, HYPRE_MEMORY_HOST);
365
366 P_diag_i[n_fine] = jj_counter;
367
368
369 P_offd_size = jj_counter_offd;
370
371 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
372 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
373 /* we need to include the size of the blocks in the data size */
374 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size*bnnz, HYPRE_MEMORY_HOST);
375
376 /*-----------------------------------------------------------------------
377 * Intialize some stuff.
378 *-----------------------------------------------------------------------*/
379
380 jj_counter = start_indexing;
381 jj_counter_offd = start_indexing;
382
383 if (debug_flag==4)
384 {
385 wall_time = time_getWallclockSeconds() - wall_time;
386 hypre_printf("Proc = %d Interp: Internal work 1 = %f\n",
387 my_id, wall_time);
388 fflush(NULL);
389 }
390
391 /* we need a block identity and a block of zeros*/
392 identity_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
393 zero_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
394
395 for(i = 0; i < block_size; i++)
396 {
397 identity_block[i*block_size + i] = 1.0;
398 }
399
400
401 /* we also need a block to keep track of the diagonal values and a sum */
402 diagonal_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
403 sum_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
404 distribute_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
405
406 /*-----------------------------------------------------------------------
407 * Send and receive fine_to_coarse info.
408 *-----------------------------------------------------------------------*/
409
410 if (debug_flag==4) wall_time = time_getWallclockSeconds();
411
412 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_A_offd, HYPRE_MEMORY_HOST);
413 big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
414 num_sends), HYPRE_MEMORY_HOST);
415
416 for (j = 0; j < num_threads; j++)
417 {
418 coarse_shift = 0;
419 if (j > 0) coarse_shift = coarse_counter[j-1];
420 size = n_fine/num_threads;
421 rest = n_fine - size*num_threads;
422 if (j < rest)
423 {
424 ns = j*size+j;
425 ne = (j+1)*size+j+1;
426 }
427 else
428 {
429 ns = j*size+rest;
430 ne = (j+1)*size+rest;
431 }
432 for (i = ns; i < ne; i++)
433 fine_to_coarse[i] += coarse_shift;
434 }
435 index = 0;
436 for (i = 0; i < num_sends; i++)
437 {
438 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
439 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
440 big_buf_data[index++]
441 = my_first_cpt+fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
442 }
443
444 /* again, we do not need to use the block version of comm handle since
445 the fine to coarse mapping is size of the nodes */
446
447 comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data,
448 fine_to_coarse_offd);
449
450 hypre_ParCSRCommHandleDestroy(comm_handle);
451
452 if (debug_flag==4)
453 {
454 wall_time = time_getWallclockSeconds() - wall_time;
455 hypre_printf("Proc = %d Interp: Comm 4 FineToCoarse = %f\n",
456 my_id, wall_time);
457 fflush(NULL);
458 }
459
460 if (debug_flag==4) wall_time = time_getWallclockSeconds();
461
462 /*-----------------------------------------------------------------------
463 * Loop over fine grid points.
464 *-----------------------------------------------------------------------*/
465
466 for (jl = 0; jl < num_threads; jl++)
467 {
468 size = n_fine/num_threads;
469 rest = n_fine - size*num_threads;
470 if (jl < rest)
471 {
472 ns = jl*size+jl;
473 ne = (jl+1)*size+jl+1;
474 }
475 else
476 {
477 ns = jl*size+rest;
478 ne = (jl+1)*size+rest;
479 }
480 jj_counter = 0;
481 if (jl > 0) jj_counter = jj_count[jl-1];
482 jj_counter_offd = 0;
483 if (jl > 0) jj_counter_offd = jj_count_offd[jl-1];
484
485 P_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
486 P_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
487
488 for (i = 0; i < n_fine; i++)
489 {
490 P_marker[i] = -1;
491 }
492 for (i = 0; i < num_cols_A_offd; i++)
493 {
494 P_marker_offd[i] = -1;
495 }
496 strong_f_marker = -2;
497
498 for (i = ns; i < ne; i++)
499 {
500
501 /*--------------------------------------------------------------------
502 * If i is a c-point, interpolation is the identity.
503 *--------------------------------------------------------------------*/
504
505 if (CF_marker[i] >= 0)
506 {
507 P_diag_i[i] = jj_counter;
508 P_diag_j[jj_counter] = fine_to_coarse[i];
509 /* P_diag_data[jj_counter] = one; */
510 hypre_CSRBlockMatrixBlockCopyData(identity_block,
511 &P_diag_data[jj_counter*bnnz],
512 1.0, block_size);
513 jj_counter++;
514 }
515
516 /*--------------------------------------------------------------------
517 * If i is an F-point, build interpolation.
518 *--------------------------------------------------------------------*/
519
520 else
521 {
522 /* Diagonal part of P */
523 P_diag_i[i] = jj_counter;
524 jj_begin_row = jj_counter;
525
526 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
527 {
528 i1 = S_diag_j[jj];
529
530 /*--------------------------------------------------------------
531 * If neighbor i1 is a C-point, set column number in P_diag_j
532 * and initialize interpolation weight to zero.
533 *--------------------------------------------------------------*/
534
535 if (CF_marker[i1] >= 0)
536 {
537 P_marker[i1] = jj_counter;
538 P_diag_j[jj_counter] = fine_to_coarse[i1];
539 /* P_diag_data[jj_counter] = zero; */
540 hypre_CSRBlockMatrixBlockCopyData(zero_block,
541 &P_diag_data[jj_counter*bnnz],
542 1.0, block_size);
543 jj_counter++;
544 }
545
546 /*--------------------------------------------------------------
547 * If neighbor i1 is an F-point, mark it as a strong F-point
548 * whose connection needs to be distributed.
549 *--------------------------------------------------------------*/
550
551 else if (CF_marker[i1] != -3)
552 {
553 P_marker[i1] = strong_f_marker;
554 }
555 }
556 jj_end_row = jj_counter;
557
558 /* Off-Diagonal part of P */
559 P_offd_i[i] = jj_counter_offd;
560 jj_begin_row_offd = jj_counter_offd;
561
562
563 if (num_procs > 1)
564 {
565 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
566 {
567 i1 = S_offd_j[jj];
568
569 /*-----------------------------------------------------------
570 * If neighbor i1 is a C-point, set column number in P_offd_j
571 * and initialize interpolation weight to zero.
572 *-----------------------------------------------------------*/
573
574 if (CF_marker_offd[i1] >= 0)
575 {
576 P_marker_offd[i1] = jj_counter_offd;
577 P_offd_j[jj_counter_offd] = i1;
578 /* P_offd_data[jj_counter_offd] = zero; */
579 hypre_CSRBlockMatrixBlockCopyData(zero_block,
580 &P_offd_data[jj_counter_offd*bnnz],
581 1.0, block_size);
582
583 jj_counter_offd++;
584 }
585
586 /*-----------------------------------------------------------
587 * If neighbor i1 is an F-point, mark it as a strong F-point
588 * whose connection needs to be distributed.
589 *-----------------------------------------------------------*/
590
591 else if (CF_marker_offd[i1] != -3)
592 {
593 P_marker_offd[i1] = strong_f_marker;
594 }
595 }
596 }
597
598 jj_end_row_offd = jj_counter_offd;
599
600
601 /* get the diagonal block */
602 /* diagonal = A_diag_data[A_diag_i[i]]; */
603 hypre_CSRBlockMatrixBlockCopyData(&A_diag_data[A_diag_i[i]*bnnz], diagonal_block,
604 1.0, block_size);
605
606
607
608 /* Here we go through the neighborhood of this grid point */
609
610 /* Loop over ith row of A. First, the diagonal part of A */
611
612 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
613 {
614 i1 = A_diag_j[jj];
615
616 /*--------------------------------------------------------------
617 * Case 1: neighbor i1 is a C-point and strongly influences i,
618 * accumulate a_{i,i1} into the interpolation weight.
619 *--------------------------------------------------------------*/
620
621 if (P_marker[i1] >= jj_begin_row)
622 {
623 /* P_diag_data[P_marker[i1]] += A_diag_data[jj]; */
624 hypre_CSRBlockMatrixBlockAddAccumulate(&A_diag_data[jj*bnnz],
625 &P_diag_data[P_marker[i1]*bnnz],
626 block_size);
627
628 }
629
630 /*--------------------------------------------------------------
631 * Case 2: neighbor i1 is an F-point and strongly influences i,
632 * distribute a_{i,i1} to C-points that strongly infuence i.
633 * Note: currently no distribution to the diagonal in this case.
634 *--------------------------------------------------------------*/
635
636 else if (P_marker[i1] == strong_f_marker || (!add_weak_to_diag && CF_marker[i1] != -3))
637 {
638 /* initialize sum to zero */
639 /* sum = zero; */
640 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block, 1.0,
641 block_size);
642
643
644 /*-----------------------------------------------------------
645 * Loop over row of A for point i1 and calculate the sum
646 * of the connections to c-points that strongly influence i.
647 *-----------------------------------------------------------*/
648
649 /* Diagonal block part of row i1 */
650 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
651 {
652 i2 = A_diag_j[jj1];
653 if (P_marker[i2] >= jj_begin_row)
654 {
655 /* add diag data to sum */
656 /* sum += A_diag_data[jj1]; */
657 hypre_CSRBlockMatrixBlockAddAccumulate(&A_diag_data[jj1*bnnz],
658 sum_block, block_size);
659 }
660 }
661
662 /* Off-Diagonal block part of row i1 */
663 if (num_procs > 1)
664 {
665 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
666 {
667 i2 = A_offd_j[jj1];
668 if (P_marker_offd[i2] >= jj_begin_row_offd )
669 {
670 /* add off diag data to sum */
671 /*sum += A_offd_data[jj1];*/
672 hypre_CSRBlockMatrixBlockAddAccumulate(&A_offd_data[jj1*bnnz],
673 sum_block, block_size);
674
675 }
676 }
677 }
678 /* check whether sum_block is singular */
679
680 /* distribute = A_diag_data[jj] / sum;*/
681 /* here we want: A_diag_data * sum^(-1) */
682 /* note that results are uneffected for most problems if
683 we do sum^(-1) * A_diag_data - but it seems to matter
684 a little for very non-sym */
685
686 if (hypre_CSRBlockMatrixBlockMultInv(sum_block, &A_diag_data[jj*bnnz],
687 distribute_block, block_size) == 0)
688 {
689
690
691 /*-----------------------------------------------------------
692 * Loop over row of A for point i1 and do the distribution.
693 *-----------------------------------------------------------*/
694
695 /* Diagonal block part of row i1 */
696 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
697 {
698 i2 = A_diag_j[jj1];
699 if (P_marker[i2] >= jj_begin_row )
700 {
701
702 /* P_diag_data[P_marker[i2]]
703 += distribute * A_diag_data[jj1];*/
704
705 /* multiply - result in sum_block */
706 hypre_CSRBlockMatrixBlockMultAdd(distribute_block,
707 &A_diag_data[jj1*bnnz], 0.0,
708 sum_block, block_size);
709
710
711 /* add result to p_diag_data */
712 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
713 &P_diag_data[P_marker[i2]*bnnz],
714 block_size);
715
716 }
717 }
718
719 /* Off-Diagonal block part of row i1 */
720 if (num_procs > 1)
721 {
722 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
723 {
724 i2 = A_offd_j[jj1];
725 if (P_marker_offd[i2] >= jj_begin_row_offd)
726 {
727 /* P_offd_data[P_marker_offd[i2]]
728 += distribute * A_offd_data[jj1]; */
729
730 /* multiply - result in sum_block */
731 hypre_CSRBlockMatrixBlockMultAdd(distribute_block,
732 &A_offd_data[jj1*bnnz], 0.0,
733 sum_block, block_size);
734
735
736 /* add result to p_offd_data */
737 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
738 &P_offd_data[P_marker_offd[i2]*bnnz],
739 block_size);
740 }
741 }
742 }
743 }
744 else /* sum block is all zeros (or almost singular) - just add to diagonal */
745 {
746 /* diagonal += A_diag_data[jj]; */
747 if (add_weak_to_diag) hypre_CSRBlockMatrixBlockAddAccumulate(&A_diag_data[jj*bnnz],
748 diagonal_block,
749 block_size);
750
751 }
752 }
753
754 /*--------------------------------------------------------------
755 * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
756 * into the diagonal.
757 *--------------------------------------------------------------*/
758
759 else if (CF_marker[i1] != -3 && add_weak_to_diag)
760 {
761 /* diagonal += A_diag_data[jj];*/
762 hypre_CSRBlockMatrixBlockAddAccumulate(&A_diag_data[jj*bnnz],
763 diagonal_block,
764 block_size);
765
766 }
767
768 }
769
770
771 /*----------------------------------------------------------------
772 * Still looping over ith row of A. Next, loop over the
773 * off-diagonal part of A
774 *---------------------------------------------------------------*/
775
776 if (num_procs > 1)
777 {
778 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
779 {
780 i1 = A_offd_j[jj];
781
782 /*--------------------------------------------------------------
783 * Case 1: neighbor i1 is a C-point and strongly influences i,
784 * accumulate a_{i,i1} into the interpolation weight.
785 *--------------------------------------------------------------*/
786
787 if (P_marker_offd[i1] >= jj_begin_row_offd)
788 {
789 /* P_offd_data[P_marker_offd[i1]] += A_offd_data[jj]; */
790 hypre_CSRBlockMatrixBlockAddAccumulate( &A_offd_data[jj*bnnz],
791 &P_offd_data[P_marker_offd[i1]*bnnz],
792 block_size);
793 }
794
795 /*------------------------------------------------------------
796 * Case 2: neighbor i1 is an F-point and strongly influences i,
797 * distribute a_{i,i1} to C-points that strongly infuence i.
798 * Note: currently no distribution to the diagonal in this case.
799 *-----------------------------------------------------------*/
800
801 else if (P_marker_offd[i1] == strong_f_marker || (!add_weak_to_diag && CF_marker[i1] != -3))
802 {
803
804 /* initialize sum to zero */
805 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block,
806 1.0, block_size);
807
808 /*---------------------------------------------------------
809 * Loop over row of A_ext for point i1 and calculate the sum
810 * of the connections to c-points that strongly influence i.
811 *---------------------------------------------------------*/
812
813 /* find row number */
814 c_num = A_offd_j[jj];
815
816 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
817 {
818 i2 = (HYPRE_Int)A_ext_j[jj1];
819
820 if (i2 > -1)
821 {
822 /* in the diagonal block */
823 if (P_marker[i2] >= jj_begin_row)
824 {
825 /* sum += A_ext_data[jj1]; */
826 hypre_CSRBlockMatrixBlockAddAccumulate(&A_ext_data[jj1*bnnz],
827 sum_block, block_size);
828 }
829 }
830 else
831 {
832 /* in the off_diagonal block */
833 if (P_marker_offd[-i2-1] >= jj_begin_row_offd)
834 {
835 /* sum += A_ext_data[jj1]; */
836 hypre_CSRBlockMatrixBlockAddAccumulate(&A_ext_data[jj1*bnnz],
837 sum_block, block_size);
838
839 }
840 }
841 }
842
843 /* check whether sum_block is singular */
844
845
846 /* distribute = A_offd_data[jj] / sum; */
847 /* here we want: A_offd_data * sum^(-1) */
848 if (hypre_CSRBlockMatrixBlockMultInv(sum_block, &A_offd_data[jj*bnnz],
849 distribute_block, block_size) == 0)
850 {
851
852 /*---------------------------------------------------------
853 * Loop over row of A_ext for point i1 and do
854 * the distribution.
855 *--------------------------------------------------------*/
856
857 /* Diagonal block part of row i1 */
858
859 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
860 {
861 i2 = (HYPRE_Int)A_ext_j[jj1];
862
863 if (i2 > -1) /* in the diagonal block */
864 {
865 if (P_marker[i2] >= jj_begin_row)
866 {
867 /* P_diag_data[P_marker[i2]]
868 += distribute * A_ext_data[jj1]; */
869
870 /* multiply - result in sum_block */
871 hypre_CSRBlockMatrixBlockMultAdd(distribute_block,
872 &A_ext_data[jj1*bnnz], 0.0,
873 sum_block, block_size);
874
875
876 /* add result to p_diag_data */
877 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
878 &P_diag_data[P_marker[i2]*bnnz],
879 block_size);
880
881 }
882 }
883 else
884 {
885 /* in the off_diagonal block */
886 if (P_marker_offd[-i2-1] >= jj_begin_row_offd)
887
888 /*P_offd_data[P_marker_offd[-i2-1]]
889 += distribute * A_ext_data[jj1];*/
890 {
891
892 /* multiply - result in sum_block */
893 hypre_CSRBlockMatrixBlockMultAdd(distribute_block,
894 &A_ext_data[jj1*bnnz], 0.0,
895 sum_block, block_size);
896
897
898 /* add result to p_offd_data */
899 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
900 &P_offd_data[P_marker_offd[-i2-1]*bnnz],
901 block_size);
902 }
903
904
905 }
906 }
907 }
908 else /* sum block is all zeros - just add to diagonal */
909 {
910 /* diagonal += A_offd_data[jj]; */
911 if (add_weak_to_diag) hypre_CSRBlockMatrixBlockAddAccumulate(&A_offd_data[jj*bnnz],
912 diagonal_block,
913 block_size);
914
915 }
916 }
917
918 /*-----------------------------------------------------------
919 * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
920 * into the diagonal.
921 *-----------------------------------------------------------*/
922
923 else if (CF_marker_offd[i1] != -3 && add_weak_to_diag)
924 {
925 /* diagonal += A_offd_data[jj]; */
926 hypre_CSRBlockMatrixBlockAddAccumulate(&A_offd_data[jj*bnnz],
927 diagonal_block,
928 block_size);
929
930 }
931 }
932 }
933
934 /*-----------------------------------------------------------------
935 * Set interpolation weight by dividing by the diagonal.
936 *-----------------------------------------------------------------*/
937
938 for (jj = jj_begin_row; jj < jj_end_row; jj++)
939 {
940
941 /* P_diag_data[jj] /= -diagonal; */
942
943 /* want diagonal^(-1)*P_diag_data */
944 /* do division - put in sum_block */
945 if ( hypre_CSRBlockMatrixBlockInvMult(diagonal_block, &P_diag_data[jj*bnnz],
946 sum_block, block_size) == 0)
947 {
948 /* now copy to P_diag_data[jj] and make negative */
949 hypre_CSRBlockMatrixBlockCopyData(sum_block, &P_diag_data[jj*bnnz],
950 -1.0, block_size);
951 }
952 else
953 {
954 /* hypre_printf(" Warning! singular diagonal block! Proc id %d row %d\n", my_id,i); */
955 /* just make P_diag_data negative since diagonal is singular) */
956 hypre_CSRBlockMatrixBlockCopyData(&P_diag_data[jj*bnnz], &P_diag_data[jj*bnnz],
957 -1.0, block_size);
958
959 }
960 }
961
962 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
963 {
964 /* P_offd_data[jj] /= -diagonal; */
965
966 /* do division - put in sum_block */
967 hypre_CSRBlockMatrixBlockInvMult(diagonal_block, &P_offd_data[jj*bnnz],
968 sum_block, block_size);
969
970 /* now copy to P_offd_data[jj] and make negative */
971 hypre_CSRBlockMatrixBlockCopyData(sum_block, &P_offd_data[jj*bnnz],
972 -1.0, block_size);
973
974
975
976 }
977
978 }
979
980 strong_f_marker--;
981
982 P_offd_i[i+1] = jj_counter_offd;
983 }
984 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
985 hypre_TFree(P_marker_offd, HYPRE_MEMORY_HOST);
986 }
987
988 /* Now create P - as a block matrix */
989 P = hypre_ParCSRBlockMatrixCreate(comm, block_size,
990 hypre_ParCSRBlockMatrixGlobalNumRows(A),
991 total_global_cpts,
992 hypre_ParCSRBlockMatrixColStarts(A),
993 num_cpts_global,
994 0,
995 P_diag_i[n_fine],
996 P_offd_i[n_fine]);
997
998 P_diag = hypre_ParCSRBlockMatrixDiag(P);
999 hypre_CSRBlockMatrixData(P_diag) = P_diag_data;
1000 hypre_CSRBlockMatrixI(P_diag) = P_diag_i;
1001 hypre_CSRBlockMatrixJ(P_diag) = P_diag_j;
1002
1003 P_offd = hypre_ParCSRBlockMatrixOffd(P);
1004 hypre_CSRBlockMatrixData(P_offd) = P_offd_data;
1005 hypre_CSRBlockMatrixI(P_offd) = P_offd_i;
1006 hypre_CSRBlockMatrixJ(P_offd) = P_offd_j;
1007
1008 /* Compress P, removing coefficients smaller than trunc_factor * Max */
1009 if (trunc_factor != 0.0 || max_elmts > 0)
1010 {
1011 hypre_BoomerAMGBlockInterpTruncation(P, trunc_factor, max_elmts);
1012 P_diag_data = hypre_CSRBlockMatrixData(P_diag);
1013 P_diag_i = hypre_CSRBlockMatrixI(P_diag);
1014 P_diag_j = hypre_CSRBlockMatrixJ(P_diag);
1015 P_offd_data = hypre_CSRBlockMatrixData(P_offd);
1016 P_offd_i = hypre_CSRBlockMatrixI(P_offd);
1017 P_offd_j = hypre_CSRBlockMatrixJ(P_offd);
1018 P_diag_size = P_diag_i[n_fine];
1019 P_offd_size = P_offd_i[n_fine];
1020 }
1021
1022 num_cols_P_offd = 0;
1023 if (P_offd_size)
1024 {
1025 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
1026
1027 for (i=0; i < num_cols_A_offd; i++)
1028 P_marker[i] = 0;
1029
1030 num_cols_P_offd = 0;
1031 for (i=0; i < P_offd_size; i++)
1032 {
1033 index = P_offd_j[i];
1034 if (!P_marker[index])
1035 {
1036 num_cols_P_offd++;
1037 P_marker[index] = 1;
1038 }
1039 }
1040
1041 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
1042 tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
1043
1044 index = 0;
1045 for (i=0; i < num_cols_P_offd; i++)
1046 {
1047 while (P_marker[index]==0) index++;
1048 tmp_map_offd[i] = index++;
1049 }
1050
1051 for (i=0; i < P_offd_size; i++)
1052 P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
1053 P_offd_j[i],
1054 num_cols_P_offd);
1055 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
1056 }
1057
1058 for (i=0; i < n_fine; i++)
1059 if (CF_marker[i] == -3) CF_marker[i] = -1;
1060
1061 if (num_cols_P_offd)
1062 {
1063 hypre_ParCSRBlockMatrixColMapOffd(P) = col_map_offd_P;
1064 hypre_CSRBlockMatrixNumCols(P_offd) = num_cols_P_offd;
1065 }
1066
1067 /* use block version */
1068 hypre_GetCommPkgBlockRTFromCommPkgBlockA(P,A, tmp_map_offd, fine_to_coarse_offd);
1069
1070
1071 *P_ptr = P;
1072
1073
1074 hypre_TFree(zero_block, HYPRE_MEMORY_HOST);
1075 hypre_TFree(identity_block, HYPRE_MEMORY_HOST);
1076 hypre_TFree(diagonal_block, HYPRE_MEMORY_HOST);
1077 hypre_TFree(sum_block, HYPRE_MEMORY_HOST);
1078 hypre_TFree(distribute_block, HYPRE_MEMORY_HOST);
1079
1080 hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
1081 hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
1082 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
1083 hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
1084 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
1085 hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
1086 hypre_TFree(coarse_counter, HYPRE_MEMORY_HOST);
1087 hypre_TFree(jj_count, HYPRE_MEMORY_HOST);
1088 hypre_TFree(jj_count_offd, HYPRE_MEMORY_HOST);
1089
1090 if (num_procs > 1) hypre_CSRBlockMatrixDestroy(A_ext);
1091
1092 return hypre_error_flag;
1093 }
1094
1095 /* 8/07 - not sure that it is appropriate to scale by the blocks - for
1096 now it is commented out - may want to change this or do something
1097 different */
1098
1099 HYPRE_Int
hypre_BoomerAMGBlockInterpTruncation(hypre_ParCSRBlockMatrix * P,HYPRE_Real trunc_factor,HYPRE_Int max_elmts)1100 hypre_BoomerAMGBlockInterpTruncation( hypre_ParCSRBlockMatrix *P,
1101 HYPRE_Real trunc_factor,
1102 HYPRE_Int max_elmts)
1103 {
1104 hypre_CSRBlockMatrix *P_diag = hypre_ParCSRBlockMatrixDiag(P);
1105 HYPRE_Int *P_diag_i = hypre_CSRBlockMatrixI(P_diag);
1106 HYPRE_Int *P_diag_j = hypre_CSRBlockMatrixJ(P_diag);
1107 HYPRE_Real *P_diag_data = hypre_CSRBlockMatrixData(P_diag);
1108 HYPRE_Int *P_diag_j_new;
1109 HYPRE_Real *P_diag_data_new;
1110
1111 hypre_CSRBlockMatrix *P_offd = hypre_ParCSRBlockMatrixOffd(P);
1112 HYPRE_Int *P_offd_i = hypre_CSRBlockMatrixI(P_offd);
1113 HYPRE_Int *P_offd_j = hypre_CSRBlockMatrixJ(P_offd);
1114 HYPRE_Real *P_offd_data = hypre_CSRBlockMatrixData(P_offd);
1115 HYPRE_Int *P_offd_j_new;
1116 HYPRE_Real *P_offd_data_new;
1117
1118 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(P_diag);
1119 HYPRE_Int bnnz = block_size*block_size;
1120
1121 HYPRE_Int n_fine = hypre_CSRBlockMatrixNumRows(P_diag);
1122 HYPRE_Int num_cols = hypre_CSRBlockMatrixNumCols(P_diag);
1123 HYPRE_Int i, j, start_j, k;
1124 HYPRE_Int ierr = 0;
1125 HYPRE_Int next_open = 0;
1126 HYPRE_Int now_checking = 0;
1127 HYPRE_Int num_lost = 0;
1128 HYPRE_Int next_open_offd = 0;
1129 HYPRE_Int now_checking_offd = 0;
1130 HYPRE_Int num_lost_offd = 0;
1131 HYPRE_Int P_diag_size;
1132 HYPRE_Int P_offd_size;
1133 HYPRE_Real max_coef, tmp;
1134 HYPRE_Real *row_sum;
1135 HYPRE_Real *scale;
1136 HYPRE_Real *out_block;
1137 HYPRE_Int cnt, cnt_diag, cnt_offd;
1138 HYPRE_Int num_elmts;
1139
1140 /* for now we will use the frobenius norm to
1141 determine whether to keep a block or not - so norm_type = 1*/
1142 row_sum = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
1143 scale = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
1144 out_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
1145
1146 if (trunc_factor > 0)
1147 {
1148 /* go through each row */
1149 for (i = 0; i < n_fine; i++)
1150 {
1151 max_coef = 0.0;
1152
1153 /* diag */
1154 for (j = P_diag_i[i]; j < P_diag_i[i+1]; j++)
1155 {
1156 hypre_CSRBlockMatrixBlockNorm(1, &P_diag_data[j*bnnz] , &tmp, block_size);
1157 max_coef = (max_coef < tmp) ? tmp : max_coef;
1158 }
1159
1160 /* off_diag */
1161 for (j = P_offd_i[i]; j < P_offd_i[i+1]; j++)
1162 {
1163 hypre_CSRBlockMatrixBlockNorm(1, &P_offd_data[j*bnnz], &tmp, block_size);
1164 max_coef = (max_coef < tmp) ? tmp : max_coef;
1165 }
1166
1167 max_coef *= trunc_factor;
1168
1169 start_j = P_diag_i[i];
1170 P_diag_i[i] -= num_lost;
1171
1172 /* set scale and row sum to zero */
1173 hypre_CSRBlockMatrixBlockSetScalar(scale, 0.0, block_size);
1174 hypre_CSRBlockMatrixBlockSetScalar(row_sum, 0.0, block_size);
1175
1176 for (j = start_j; j < P_diag_i[i+1]; j++)
1177 {
1178 /* row_sum += P_diag_data[now_checking];*/
1179 hypre_CSRBlockMatrixBlockAddAccumulate(&P_diag_data[now_checking*bnnz], row_sum, block_size);
1180
1181 hypre_CSRBlockMatrixBlockNorm(1, &P_diag_data[now_checking*bnnz] , &tmp, block_size);
1182
1183 if ( tmp < max_coef)
1184 {
1185 num_lost++;
1186 now_checking++;
1187 }
1188 else
1189 {
1190 /* scale += P_diag_data[now_checking]; */
1191 hypre_CSRBlockMatrixBlockAddAccumulate(&P_diag_data[now_checking*bnnz], scale, block_size);
1192
1193 /* P_diag_data[next_open] = P_diag_data[now_checking]; */
1194 hypre_CSRBlockMatrixBlockCopyData( &P_diag_data[now_checking*bnnz], &P_diag_data[next_open*bnnz],
1195 1.0, block_size);
1196
1197 P_diag_j[next_open] = P_diag_j[now_checking];
1198 now_checking++;
1199 next_open++;
1200 }
1201 }
1202
1203 start_j = P_offd_i[i];
1204 P_offd_i[i] -= num_lost_offd;
1205
1206 for (j = start_j; j < P_offd_i[i+1]; j++)
1207 {
1208 /* row_sum += P_offd_data[now_checking_offd]; */
1209 hypre_CSRBlockMatrixBlockAddAccumulate(&P_offd_data[now_checking_offd*bnnz], row_sum, block_size);
1210
1211 hypre_CSRBlockMatrixBlockNorm(1, &P_offd_data[now_checking_offd*bnnz] , &tmp, block_size);
1212
1213 if ( tmp < max_coef)
1214 {
1215 num_lost_offd++;
1216 now_checking_offd++;
1217 }
1218 else
1219 {
1220 /* scale += P_offd_data[now_checking_offd]; */
1221 hypre_CSRBlockMatrixBlockAddAccumulate(&P_offd_data[now_checking_offd*bnnz], scale, block_size);
1222
1223 /* P_offd_data[next_open_offd] = P_offd_data[now_checking_offd];*/
1224 hypre_CSRBlockMatrixBlockCopyData( &P_offd_data[now_checking_offd*bnnz], &P_offd_data[next_open_offd*bnnz],
1225 1.0, block_size);
1226
1227
1228 P_offd_j[next_open_offd] = P_offd_j[now_checking_offd];
1229 now_checking_offd++;
1230 next_open_offd++;
1231 }
1232 }
1233 /* normalize row of P */
1234 #if 0
1235 /* out_block = row_sum/scale; */
1236 if (hypre_CSRBlockMatrixBlockInvMult(scale, row_sum, out_block, block_size) == 0)
1237 {
1238
1239 for (j = P_diag_i[i]; j < (P_diag_i[i+1]-num_lost); j++)
1240 {
1241 /* P_diag_data[j] *= out_block; */
1242
1243 /* put mult result in row_sum */
1244 hypre_CSRBlockMatrixBlockMultAdd(out_block, &P_diag_data[j*bnnz], 0.0,
1245 row_sum, block_size);
1246 /* add to P_diag_data */
1247 hypre_CSRBlockMatrixBlockAddAccumulate(row_sum, &P_diag_data[j*bnnz], block_size);
1248 }
1249
1250 for (j = P_offd_i[i]; j < (P_offd_i[i+1]-num_lost_offd); j++)
1251 {
1252
1253 /* P_offd_data[j] *= out_block; */
1254
1255 /* put mult result in row_sum */
1256 hypre_CSRBlockMatrixBlockMultAdd(out_block, &P_offd_data[j*bnnz], 0.0,
1257 row_sum, block_size);
1258 /* add to to P_offd_data */
1259 hypre_CSRBlockMatrixBlockAddAccumulate(row_sum, &P_offd_data[j*bnnz], block_size);
1260
1261 }
1262
1263 }
1264 #endif
1265 }
1266
1267 P_diag_i[n_fine] -= num_lost;
1268 P_offd_i[n_fine] -= num_lost_offd;
1269 }
1270 if (max_elmts > 0)
1271 {
1272 HYPRE_Int P_mxnum, cnt1, rowlength;
1273 HYPRE_Int *P_aux_j;
1274 HYPRE_Real *P_aux_data;
1275 HYPRE_Real *norm_array;
1276
1277 rowlength = 0;
1278 if (n_fine)
1279 rowlength = P_diag_i[1]+P_offd_i[1];
1280 P_mxnum = rowlength;
1281 for (i=1; i<n_fine; i++)
1282 {
1283 rowlength = P_diag_i[i+1]-P_diag_i[i]+P_offd_i[i+1]-P_offd_i[i];
1284 if (rowlength > P_mxnum) P_mxnum = rowlength;
1285 }
1286 if (P_mxnum > max_elmts)
1287 {
1288 P_aux_j = hypre_CTAlloc(HYPRE_Int, P_mxnum, HYPRE_MEMORY_HOST);
1289 P_aux_data = hypre_CTAlloc(HYPRE_Real, P_mxnum*bnnz, HYPRE_MEMORY_HOST);
1290 cnt_diag = 0;
1291 cnt_offd = 0;
1292
1293 for (i = 0; i < n_fine; i++)
1294 {
1295 hypre_CSRBlockMatrixBlockSetScalar(row_sum, 0.0, block_size);
1296 /*row_sum = 0; */
1297
1298 num_elmts = P_diag_i[i+1]-P_diag_i[i]+P_offd_i[i+1]-P_offd_i[i];
1299 if (max_elmts < num_elmts)
1300 {
1301 cnt = 0;
1302 for (j = P_diag_i[i]; j < P_diag_i[i+1]; j++)
1303 {
1304 P_aux_j[cnt] = P_diag_j[j];
1305 /*P_aux_data[cnt++] = P_diag_data[j];*/
1306 hypre_CSRBlockMatrixBlockCopyData(&P_diag_data[j*bnnz],
1307 &P_aux_data[cnt*bnnz],
1308 1.0, block_size);
1309 cnt++;
1310 /*row_sum += P_diag_data[j];*/
1311 hypre_CSRBlockMatrixBlockAddAccumulate(&P_diag_data[j*bnnz], row_sum, block_size);
1312
1313
1314 }
1315 num_lost += cnt;
1316 cnt1 = cnt;
1317 for (j = P_offd_i[i]; j < P_offd_i[i+1]; j++)
1318 {
1319 P_aux_j[cnt] = P_offd_j[j]+num_cols;
1320 /*P_aux_data[cnt++] = P_offd_data[j];*/
1321 hypre_CSRBlockMatrixBlockCopyData(&P_offd_data[j*bnnz],
1322 &P_aux_data[cnt*bnnz],
1323 1.0, block_size);
1324 cnt++;
1325
1326 /*row_sum += P_offd_data[j];*/
1327 hypre_CSRBlockMatrixBlockAddAccumulate(&P_offd_data[j*bnnz], row_sum, block_size);
1328
1329
1330 }
1331 num_lost_offd += cnt-cnt1;
1332 /* sort data */
1333 norm_array = hypre_CTAlloc(HYPRE_Real, cnt, HYPRE_MEMORY_HOST);
1334 for (j=0; j< cnt; j++)
1335 {
1336 hypre_CSRBlockMatrixBlockNorm(1, &P_aux_data[j*bnnz] , &norm_array[j], block_size);
1337 }
1338
1339 hypre_block_qsort(P_aux_j, norm_array, P_aux_data,block_size, 0,cnt-1);
1340
1341 hypre_TFree(norm_array, HYPRE_MEMORY_HOST);
1342
1343 /* scale = 0; */
1344 hypre_CSRBlockMatrixBlockSetScalar(scale, 0.0, block_size);
1345 P_diag_i[i] = cnt_diag;
1346 P_offd_i[i] = cnt_offd;
1347 for (j = 0; j < max_elmts; j++)
1348 {
1349 /* scale += P_aux_data[j];*/
1350 hypre_CSRBlockMatrixBlockAddAccumulate(&P_aux_data[j*bnnz],
1351 scale, block_size);
1352
1353
1354 if (P_aux_j[j] < num_cols)
1355 {
1356 P_diag_j[cnt_diag] = P_aux_j[j];
1357 /*P_diag_data[cnt_diag++] = P_aux_data[j];*/
1358 hypre_CSRBlockMatrixBlockCopyData(&P_aux_data[j*bnnz],
1359 &P_diag_data[cnt_diag*bnnz],
1360 1.0, block_size);
1361
1362 cnt_diag++;
1363
1364
1365 }
1366 else
1367 {
1368 P_offd_j[cnt_offd] = P_aux_j[j]-num_cols;
1369 /*P_offd_data[cnt_offd++] = P_aux_data[j];*/
1370 hypre_CSRBlockMatrixBlockCopyData(&P_aux_data[j*bnnz],
1371 &P_offd_data[cnt_offd*bnnz],
1372 1.0, block_size);
1373 cnt_offd++;
1374
1375 }
1376 }
1377 num_lost -= cnt_diag-P_diag_i[i];
1378 num_lost_offd -= cnt_offd-P_offd_i[i];
1379 /* normalize row of P */
1380 /* out_block = row_sum/scale; */
1381 /*if (scale != 0.)*/
1382 #if 0
1383 if (hypre_CSRBlockMatrixBlockInvMult(scale, row_sum, out_block, block_size) == 0)
1384 {
1385
1386 for (j = P_diag_i[i]; j < cnt_diag; j++)
1387 {
1388
1389 /* P_diag_data[j] *= out_block; */
1390
1391 /* put mult result in row_sum */
1392 hypre_CSRBlockMatrixBlockMultAdd(out_block, &P_diag_data[j*bnnz], 0.0,
1393 row_sum, block_size);
1394 /* add to P_diag_data */
1395 hypre_CSRBlockMatrixBlockAddAccumulate(row_sum, &P_diag_data[j*bnnz], block_size);
1396 }
1397
1398 for (j = P_offd_i[i]; j < cnt_offd; j++)
1399 {
1400
1401 /* P_offd_data[j] *= out_block; */
1402
1403 /* put mult result in row_sum */
1404 hypre_CSRBlockMatrixBlockMultAdd(out_block, &P_offd_data[j*bnnz], 0.0,
1405 row_sum, block_size);
1406 /* add to to P_offd_data */
1407 hypre_CSRBlockMatrixBlockAddAccumulate(row_sum, &P_offd_data[j*bnnz], block_size);
1408 }
1409
1410
1411 }
1412 #endif
1413 }
1414 else
1415 {
1416 if (P_diag_i[i] != cnt_diag)
1417 {
1418 start_j = P_diag_i[i];
1419 P_diag_i[i] = cnt_diag;
1420 for (j = start_j; j < P_diag_i[i+1]; j++)
1421 {
1422 P_diag_j[cnt_diag] = P_diag_j[j];
1423 /*P_diag_data[cnt_diag++] = P_diag_data[j];*/
1424 hypre_CSRBlockMatrixBlockCopyData(&P_diag_data[j*bnnz],
1425 &P_diag_data[cnt_diag*bnnz],
1426 1.0, block_size);
1427 cnt_diag++;
1428
1429
1430 }
1431 }
1432 else
1433 cnt_diag += P_diag_i[i+1]-P_diag_i[i];
1434 if (P_offd_i[i] != cnt_offd)
1435 {
1436 start_j = P_offd_i[i];
1437 P_offd_i[i] = cnt_offd;
1438 for (j = start_j; j < P_offd_i[i+1]; j++)
1439 {
1440 P_offd_j[cnt_offd] = P_offd_j[j];
1441 /*P_offd_data[cnt_offd++] = P_offd_data[j];*/
1442
1443 hypre_CSRBlockMatrixBlockCopyData(&P_offd_data[j*bnnz],
1444 &P_offd_data[cnt_offd*bnnz],
1445 1.0, block_size);
1446 cnt_offd++;
1447 }
1448 }
1449 else
1450 cnt_offd += P_offd_i[i+1]-P_offd_i[i];
1451 }
1452 }
1453 P_diag_i[n_fine] = cnt_diag;
1454 P_offd_i[n_fine] = cnt_offd;
1455 hypre_TFree(P_aux_j, HYPRE_MEMORY_HOST);
1456 hypre_TFree(P_aux_data, HYPRE_MEMORY_HOST);
1457 }
1458 }
1459
1460
1461
1462
1463 if (num_lost)
1464 {
1465 P_diag_size = P_diag_i[n_fine];
1466 P_diag_j_new = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
1467 P_diag_data_new = hypre_CTAlloc(HYPRE_Real, P_diag_size*bnnz, HYPRE_MEMORY_HOST);
1468 for (i=0; i < P_diag_size; i++)
1469 {
1470 P_diag_j_new[i] = P_diag_j[i];
1471 for (k=0; k < bnnz; k++)
1472 {
1473 P_diag_data_new[i*bnnz+k] = P_diag_data[i*bnnz+k];
1474 }
1475
1476 }
1477 hypre_TFree(P_diag_j, HYPRE_MEMORY_HOST);
1478 hypre_TFree(P_diag_data, HYPRE_MEMORY_HOST);
1479 hypre_CSRMatrixJ(P_diag) = P_diag_j_new;
1480 hypre_CSRMatrixData(P_diag) = P_diag_data_new;
1481 hypre_CSRMatrixNumNonzeros(P_diag) = P_diag_size;
1482 }
1483 if (num_lost_offd)
1484 {
1485 P_offd_size = P_offd_i[n_fine];
1486 P_offd_j_new = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
1487 P_offd_data_new = hypre_CTAlloc(HYPRE_Real, P_offd_size*bnnz, HYPRE_MEMORY_HOST);
1488 for (i=0; i < P_offd_size; i++)
1489 {
1490 P_offd_j_new[i] = P_offd_j[i];
1491 for (k=0; k < bnnz; k++)
1492 {
1493 P_offd_data_new[i*bnnz + k] = P_offd_data[i*bnnz + k];
1494 }
1495
1496 }
1497 hypre_TFree(P_offd_j, HYPRE_MEMORY_HOST);
1498 hypre_TFree(P_offd_data, HYPRE_MEMORY_HOST);
1499 hypre_CSRMatrixJ(P_offd) = P_offd_j_new;
1500 hypre_CSRMatrixData(P_offd) = P_offd_data_new;
1501 hypre_CSRMatrixNumNonzeros(P_offd) = P_offd_size;
1502 }
1503
1504 hypre_TFree(row_sum, HYPRE_MEMORY_HOST);
1505 hypre_TFree(scale, HYPRE_MEMORY_HOST);
1506 hypre_TFree(out_block, HYPRE_MEMORY_HOST);
1507
1508 return ierr;
1509 }
1510
1511 /*-----------------------------------------------*/
1512 /* compare on w, move v and blk_array */
1513
hypre_block_qsort(HYPRE_Int * v,HYPRE_Complex * w,HYPRE_Complex * blk_array,HYPRE_Int block_size,HYPRE_Int left,HYPRE_Int right)1514 void hypre_block_qsort( HYPRE_Int *v,
1515 HYPRE_Complex *w,
1516 HYPRE_Complex *blk_array,
1517 HYPRE_Int block_size,
1518 HYPRE_Int left,
1519 HYPRE_Int right )
1520 {
1521 HYPRE_Int i, last;
1522
1523 if (left >= right)
1524 return;
1525
1526 hypre_swap2( v, w, left, (left+right)/2);
1527 hypre_swap_blk(blk_array, block_size, left, (left+right)/2);
1528 last = left;
1529 for (i = left+1; i <= right; i++)
1530 if (hypre_cabs(w[i]) > hypre_cabs(w[left]))
1531 {
1532 hypre_swap2(v, w, ++last, i);
1533 hypre_swap_blk(blk_array, block_size, last, i);
1534 }
1535 hypre_swap2(v, w, left, last);
1536 hypre_swap_blk(blk_array, block_size, left, last);
1537 hypre_block_qsort(v, w, blk_array, block_size, left, last-1);
1538 hypre_block_qsort(v, w, blk_array, block_size, last+1, right);
1539 }
1540
hypre_swap_blk(HYPRE_Complex * v,HYPRE_Int block_size,HYPRE_Int i,HYPRE_Int j)1541 void hypre_swap_blk( HYPRE_Complex *v,
1542 HYPRE_Int block_size,
1543 HYPRE_Int i,
1544 HYPRE_Int j )
1545 {
1546 HYPRE_Int bnnz = block_size*block_size;
1547 HYPRE_Real *temp;
1548
1549 temp = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
1550
1551 /*temp = v[i];*/
1552 hypre_CSRBlockMatrixBlockCopyData(&v[i*bnnz],temp, 1.0, block_size);
1553 /*v[i] = v[j];*/
1554 hypre_CSRBlockMatrixBlockCopyData(&v[j*bnnz],&v[i*bnnz], 1.0, block_size);
1555 /* v[j] = temp; */
1556 hypre_CSRBlockMatrixBlockCopyData(temp,&v[j*bnnz], 1.0, block_size);
1557
1558 hypre_TFree(temp, HYPRE_MEMORY_HOST);
1559 }
1560
1561 /*---------------------------------------------------------------------------
1562 * hypre_BoomerAMGBlockBuildInterpDiag
1563
1564 This is the block version of classical R-S interpolation. We use just the
1565 diagonals of these blocks.
1566
1567 A and P are now Block matrices. The Strength matrix S is not as it gives
1568 nodal strengths.
1569
1570 CF_marker is size number of nodes.
1571
1572 add_weak_to_diag 0 = don't add weak connections to diag (distribute instead)
1573 1 = do add
1574 *--------------------------------------------------------------------------*/
1575
1576 HYPRE_Int
hypre_BoomerAMGBuildBlockInterpDiag(hypre_ParCSRBlockMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,HYPRE_Int add_weak_to_diag,hypre_ParCSRBlockMatrix ** P_ptr)1577 hypre_BoomerAMGBuildBlockInterpDiag( hypre_ParCSRBlockMatrix *A,
1578 HYPRE_Int *CF_marker,
1579 hypre_ParCSRMatrix *S,
1580 HYPRE_BigInt *num_cpts_global,
1581 HYPRE_Int num_functions,
1582 HYPRE_Int *dof_func,
1583 HYPRE_Int debug_flag,
1584 HYPRE_Real trunc_factor,
1585 HYPRE_Int max_elmts,
1586 HYPRE_Int add_weak_to_diag,
1587 hypre_ParCSRBlockMatrix **P_ptr)
1588 {
1589
1590 MPI_Comm comm = hypre_ParCSRBlockMatrixComm(A);
1591 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
1592 hypre_ParCSRCommHandle *comm_handle;
1593
1594 hypre_CSRBlockMatrix *A_diag = hypre_ParCSRBlockMatrixDiag(A);
1595 HYPRE_Real *A_diag_data = hypre_CSRBlockMatrixData(A_diag);
1596 HYPRE_Int *A_diag_i = hypre_CSRBlockMatrixI(A_diag);
1597 HYPRE_Int *A_diag_j = hypre_CSRBlockMatrixJ(A_diag);
1598
1599 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(A_diag);
1600 HYPRE_Int bnnz = block_size*block_size;
1601
1602 hypre_CSRBlockMatrix *A_offd = hypre_ParCSRBlockMatrixOffd(A);
1603 HYPRE_Real *A_offd_data = hypre_CSRBlockMatrixData(A_offd);
1604 HYPRE_Int *A_offd_i = hypre_CSRBlockMatrixI(A_offd);
1605 HYPRE_Int *A_offd_j = hypre_CSRBlockMatrixJ(A_offd);
1606 HYPRE_Int num_cols_A_offd = hypre_CSRBlockMatrixNumCols(A_offd);
1607 HYPRE_BigInt *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(A);
1608
1609 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
1610 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
1611 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
1612
1613 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
1614 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
1615 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
1616
1617 hypre_ParCSRBlockMatrix *P;
1618 HYPRE_BigInt *col_map_offd_P;
1619 HYPRE_Int *tmp_map_offd = NULL;
1620
1621 HYPRE_Int *CF_marker_offd = NULL;
1622
1623 hypre_CSRBlockMatrix *A_ext;
1624
1625 HYPRE_Real *A_ext_data = NULL;
1626 HYPRE_Int *A_ext_i = NULL;
1627 HYPRE_BigInt *A_ext_j = NULL;
1628
1629 hypre_CSRBlockMatrix *P_diag;
1630 hypre_CSRBlockMatrix *P_offd;
1631
1632 HYPRE_Real *P_diag_data;
1633 HYPRE_Int *P_diag_i;
1634 HYPRE_Int *P_diag_j;
1635 HYPRE_Real *P_offd_data;
1636 HYPRE_Int *P_offd_i;
1637 HYPRE_Int *P_offd_j;
1638
1639 HYPRE_Int P_diag_size, P_offd_size;
1640
1641 HYPRE_Int *P_marker, *P_marker_offd = NULL;
1642
1643 HYPRE_Int jj_counter,jj_counter_offd;
1644 HYPRE_Int *jj_count, *jj_count_offd = NULL;
1645 HYPRE_Int jj_begin_row,jj_begin_row_offd;
1646 HYPRE_Int jj_end_row,jj_end_row_offd;
1647
1648 HYPRE_Int start_indexing = 0; /* start indexing for P_data at 0 */
1649
1650 HYPRE_Int n_fine = hypre_CSRBlockMatrixNumRows(A_diag);
1651
1652 HYPRE_Int strong_f_marker;
1653
1654 HYPRE_Int *fine_to_coarse;
1655 HYPRE_BigInt *fine_to_coarse_offd = NULL;
1656 HYPRE_Int *coarse_counter;
1657 HYPRE_Int coarse_shift;
1658 HYPRE_BigInt total_global_cpts;
1659 HYPRE_Int num_cols_P_offd, my_first_cpt;
1660
1661 HYPRE_Int bd;
1662
1663 HYPRE_Int i,i1,i2;
1664 HYPRE_Int j,jl,jj,jj1;
1665 HYPRE_Int kc;
1666 HYPRE_BigInt big_k;
1667 HYPRE_Int start;
1668
1669 HYPRE_Int c_num;
1670
1671 HYPRE_Int my_id;
1672 HYPRE_Int num_procs;
1673 HYPRE_Int num_threads;
1674 HYPRE_Int num_sends;
1675 HYPRE_Int index;
1676 HYPRE_Int ns, ne, size, rest;
1677 HYPRE_Int *int_buf_data = NULL;
1678 HYPRE_BigInt *big_buf_data = NULL;
1679
1680 HYPRE_BigInt col_1 = hypre_ParCSRBlockMatrixFirstRowIndex(A);
1681 HYPRE_Int local_numrows = hypre_CSRBlockMatrixNumRows(A_diag);
1682 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt)local_numrows;
1683
1684 HYPRE_Real wall_time; /* for debugging instrumentation */
1685
1686
1687 HYPRE_Real *identity_block;
1688 HYPRE_Real *zero_block;
1689 HYPRE_Real *diagonal_block;
1690 HYPRE_Real *sum_block;
1691 HYPRE_Real *distribute_block;
1692
1693 HYPRE_Real *sign;
1694
1695 hypre_MPI_Comm_size(comm, &num_procs);
1696 hypre_MPI_Comm_rank(comm,&my_id);
1697 num_threads = hypre_NumThreads();
1698
1699
1700 my_first_cpt = num_cpts_global[0];
1701 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
1702 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
1703
1704 /*-------------------------------------------------------------------
1705 * Get the CF_marker data for the off-processor columns
1706 *-------------------------------------------------------------------*/
1707
1708 if (debug_flag==4) wall_time = time_getWallclockSeconds();
1709
1710 CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
1711
1712
1713 if (!comm_pkg)
1714 {
1715 hypre_BlockMatvecCommPkgCreate(A);
1716 comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
1717 }
1718
1719 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1720 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
1721 num_sends), HYPRE_MEMORY_HOST);
1722
1723 index = 0;
1724 for (i = 0; i < num_sends; i++)
1725 {
1726 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1727 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1728 {
1729 int_buf_data[index++]
1730 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1731 }
1732
1733 }
1734
1735 /* we do not need the block version of comm handle - because
1736 CF_marker corresponds to the nodal matrix. This call populates
1737 CF_marker_offd */
1738 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
1739 CF_marker_offd);
1740
1741 hypre_ParCSRCommHandleDestroy(comm_handle);
1742
1743
1744 if (debug_flag==4)
1745 {
1746 wall_time = time_getWallclockSeconds() - wall_time;
1747 hypre_printf("Proc = %d Interp: Comm 1 CF_marker = %f\n",
1748 my_id, wall_time);
1749 fflush(NULL);
1750 }
1751
1752 /*----------------------------------------------------------------------
1753 * Get the ghost rows of A
1754 *---------------------------------------------------------------------*/
1755
1756 if (debug_flag==4) wall_time = time_getWallclockSeconds();
1757
1758 if (num_procs > 1)
1759 {
1760 A_ext = hypre_ParCSRBlockMatrixExtractBExt(A, A, 1);
1761 A_ext_i = hypre_CSRBlockMatrixI(A_ext);
1762 A_ext_j = hypre_CSRBlockMatrixBigJ(A_ext);
1763 A_ext_data = hypre_CSRBlockMatrixData(A_ext);
1764 }
1765
1766 index = 0;
1767 for (i=0; i < num_cols_A_offd; i++)
1768 {
1769 for (j=A_ext_i[i]; j < A_ext_i[i+1]; j++)
1770 {
1771 big_k = A_ext_j[j];
1772 if (big_k >= col_1 && big_k < col_n)
1773 {
1774 A_ext_j[index] = big_k - col_1;
1775 /* for the data field we must get all of the block data */
1776 for (bd = 0; bd < bnnz; bd++)
1777 {
1778 A_ext_data[index*bnnz + bd] = A_ext_data[j*bnnz + bd];
1779 }
1780 index++;
1781 }
1782 else
1783 {
1784 kc = hypre_BigBinarySearch(col_map_offd, big_k, num_cols_A_offd);
1785 if (kc > -1)
1786 {
1787 A_ext_j[index] = (HYPRE_BigInt)(-kc-1);
1788 for (bd = 0; bd < bnnz; bd++)
1789 {
1790 A_ext_data[index*bnnz + bd] = A_ext_data[j*bnnz + bd];
1791 }
1792 index++;
1793 }
1794 }
1795 }
1796 A_ext_i[i] = index;
1797 }
1798 for (i = num_cols_A_offd; i > 0; i--)
1799 A_ext_i[i] = A_ext_i[i-1];
1800 if (num_procs > 1) A_ext_i[0] = 0;
1801
1802 if (debug_flag==4)
1803 {
1804 wall_time = time_getWallclockSeconds() - wall_time;
1805 hypre_printf("Proc = %d Interp: Comm 2 Get A_ext = %f\n",
1806 my_id, wall_time);
1807 fflush(NULL);
1808 }
1809
1810
1811 /*-----------------------------------------------------------------------
1812 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
1813 *-----------------------------------------------------------------------*/
1814
1815 /*-----------------------------------------------------------------------
1816 * Intialize counters and allocate mapping vector.
1817 *-----------------------------------------------------------------------*/
1818
1819 coarse_counter = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
1820 jj_count = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
1821 jj_count_offd = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
1822
1823 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
1824
1825 for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1;
1826
1827 jj_counter = start_indexing;
1828 jj_counter_offd = start_indexing;
1829
1830 /*-----------------------------------------------------------------------
1831 * Loop over fine grid.
1832 *-----------------------------------------------------------------------*/
1833
1834
1835 for (j = 0; j < num_threads; j++)
1836 {
1837 size = n_fine/num_threads;
1838 rest = n_fine - size*num_threads;
1839 if (j < rest)
1840 {
1841 ns = j*size+j;
1842 ne = (j+1)*size+j+1;
1843 }
1844 else
1845 {
1846 ns = j*size+rest;
1847 ne = (j+1)*size+rest;
1848 }
1849
1850
1851 /* loop over the fine grid points */
1852 for (i = ns; i < ne; i++)
1853 {
1854
1855 /*--------------------------------------------------------------------
1856 * If i is a C-point, interpolation is the identity. Also set up
1857 * mapping vector (fine_to_coarse is the mapping vector).
1858 *--------------------------------------------------------------------*/
1859
1860 if (CF_marker[i] >= 0)
1861 {
1862 jj_count[j]++;
1863 fine_to_coarse[i] = coarse_counter[j];
1864 coarse_counter[j]++;
1865 }
1866
1867 /*--------------------------------------------------------------------
1868 * If i is an F-point, interpolation is from the C-points that
1869 * strongly influence i.
1870 *--------------------------------------------------------------------*/
1871
1872 else
1873 {
1874 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
1875 {
1876 i1 = S_diag_j[jj];
1877 if (CF_marker[i1] >= 0)
1878 {
1879 jj_count[j]++;
1880 }
1881 }
1882
1883 if (num_procs > 1)
1884 {
1885 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
1886 {
1887 i1 = S_offd_j[jj];
1888 if (CF_marker_offd[i1] >= 0)
1889 {
1890 jj_count_offd[j]++;
1891 }
1892 }
1893 }
1894 }
1895 }
1896 }
1897
1898 /*-----------------------------------------------------------------------
1899 * Allocate arrays.
1900 *-----------------------------------------------------------------------*/
1901
1902 for (i=0; i < num_threads-1; i++)
1903 {
1904 coarse_counter[i+1] += coarse_counter[i];
1905 jj_count[i+1] += jj_count[i];
1906 jj_count_offd[i+1] += jj_count_offd[i];
1907 }
1908 i = num_threads-1;
1909 jj_counter = jj_count[i];
1910 jj_counter_offd = jj_count_offd[i];
1911
1912 P_diag_size = jj_counter;
1913
1914 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
1915 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
1916 /* we need to include the size of the blocks in the data size */
1917 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size*bnnz, HYPRE_MEMORY_HOST);
1918
1919 P_diag_i[n_fine] = jj_counter;
1920
1921
1922 P_offd_size = jj_counter_offd;
1923
1924 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
1925 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
1926 /* we need to include the size of the blocks in the data size */
1927 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size*bnnz, HYPRE_MEMORY_HOST);
1928
1929 /*-----------------------------------------------------------------------
1930 * Intialize some stuff.
1931 *-----------------------------------------------------------------------*/
1932
1933 jj_counter = start_indexing;
1934 jj_counter_offd = start_indexing;
1935
1936 if (debug_flag==4)
1937 {
1938 wall_time = time_getWallclockSeconds() - wall_time;
1939 hypre_printf("Proc = %d Interp: Internal work 1 = %f\n",
1940 my_id, wall_time);
1941 fflush(NULL);
1942 }
1943
1944 /* we need a block identity and a block of zeros*/
1945 identity_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
1946 zero_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
1947
1948
1949
1950 for(i = 0; i < block_size; i++)
1951 {
1952 identity_block[i*block_size + i] = 1.0;
1953 }
1954
1955
1956 /* we also need a block to keep track of the diagonal values and a sum */
1957 diagonal_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
1958 sum_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
1959 distribute_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
1960
1961
1962 sign = hypre_CTAlloc(HYPRE_Real, block_size, HYPRE_MEMORY_HOST);
1963
1964 /*-----------------------------------------------------------------------
1965 * Send and receive fine_to_coarse info.
1966 *-----------------------------------------------------------------------*/
1967
1968 if (debug_flag==4) wall_time = time_getWallclockSeconds();
1969
1970 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_A_offd, HYPRE_MEMORY_HOST);
1971 big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
1972 num_sends), HYPRE_MEMORY_HOST);
1973
1974 for (j = 0; j < num_threads; j++)
1975 {
1976 coarse_shift = 0;
1977 if (j > 0) coarse_shift = coarse_counter[j-1];
1978 size = n_fine/num_threads;
1979 rest = n_fine - size*num_threads;
1980 if (j < rest)
1981 {
1982 ns = j*size+j;
1983 ne = (j+1)*size+j+1;
1984 }
1985 else
1986 {
1987 ns = j*size+rest;
1988 ne = (j+1)*size+rest;
1989 }
1990 for (i = ns; i < ne; i++)
1991 fine_to_coarse[i] += coarse_shift;
1992 }
1993 index = 0;
1994 for (i = 0; i < num_sends; i++)
1995 {
1996 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1997 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1998 big_buf_data[index++] = my_first_cpt
1999 + fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2000 }
2001
2002 /* again, we do not need to use the block version of comm handle since
2003 the fine to coarse mapping is size of the nodes */
2004
2005 comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data,
2006 fine_to_coarse_offd);
2007
2008 hypre_ParCSRCommHandleDestroy(comm_handle);
2009
2010 if (debug_flag==4)
2011 {
2012 wall_time = time_getWallclockSeconds() - wall_time;
2013 hypre_printf("Proc = %d Interp: Comm 4 FineToCoarse = %f\n",
2014 my_id, wall_time);
2015 fflush(NULL);
2016 }
2017
2018 if (debug_flag==4) wall_time = time_getWallclockSeconds();
2019
2020 /*-----------------------------------------------------------------------
2021 * Loop over fine grid points.
2022 *-----------------------------------------------------------------------*/
2023
2024 for (jl = 0; jl < num_threads; jl++)
2025 {
2026 size = n_fine/num_threads;
2027 rest = n_fine - size*num_threads;
2028 if (jl < rest)
2029 {
2030 ns = jl*size+jl;
2031 ne = (jl+1)*size+jl+1;
2032 }
2033 else
2034 {
2035 ns = jl*size+rest;
2036 ne = (jl+1)*size+rest;
2037 }
2038 jj_counter = 0;
2039 if (jl > 0) jj_counter = jj_count[jl-1];
2040 jj_counter_offd = 0;
2041 if (jl > 0) jj_counter_offd = jj_count_offd[jl-1];
2042
2043 P_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
2044 P_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
2045
2046 for (i = 0; i < n_fine; i++)
2047 {
2048 P_marker[i] = -1;
2049 }
2050 for (i = 0; i < num_cols_A_offd; i++)
2051 {
2052 P_marker_offd[i] = -1;
2053 }
2054 strong_f_marker = -2;
2055
2056 for (i = ns; i < ne; i++)
2057 {
2058
2059 /*--------------------------------------------------------------------
2060 * If i is a c-point, interpolation is the identity.
2061 *--------------------------------------------------------------------*/
2062
2063 if (CF_marker[i] >= 0)
2064 {
2065 P_diag_i[i] = jj_counter;
2066 P_diag_j[jj_counter] = fine_to_coarse[i];
2067 /* P_diag_data[jj_counter] = one; */
2068 hypre_CSRBlockMatrixBlockCopyData(identity_block,
2069 &P_diag_data[jj_counter*bnnz],
2070 1.0, block_size);
2071 jj_counter++;
2072 }
2073
2074 /*--------------------------------------------------------------------
2075 * If i is an F-point, build interpolation.
2076 *--------------------------------------------------------------------*/
2077
2078 else
2079 {
2080 /* Diagonal part of P */
2081 P_diag_i[i] = jj_counter;
2082 jj_begin_row = jj_counter;
2083
2084 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2085 {
2086 i1 = S_diag_j[jj];
2087
2088 /*--------------------------------------------------------------
2089 * If neighbor i1 is a C-point, set column number in P_diag_j
2090 * and initialize interpolation weight to zero.
2091 *--------------------------------------------------------------*/
2092
2093 if (CF_marker[i1] >= 0)
2094 {
2095 P_marker[i1] = jj_counter;
2096 P_diag_j[jj_counter] = fine_to_coarse[i1];
2097 /* P_diag_data[jj_counter] = zero; */
2098 hypre_CSRBlockMatrixBlockCopyData(zero_block,
2099 &P_diag_data[jj_counter*bnnz],
2100 1.0, block_size);
2101 jj_counter++;
2102 }
2103
2104 /*--------------------------------------------------------------
2105 * If neighbor i1 is an F-point, mark it as a strong F-point
2106 * whose connection needs to be distributed.
2107 *--------------------------------------------------------------*/
2108
2109 else if (CF_marker[i1] != -3)
2110 {
2111 P_marker[i1] = strong_f_marker;
2112 }
2113 }
2114 jj_end_row = jj_counter;
2115
2116 /* Off-Diagonal part of P */
2117 P_offd_i[i] = jj_counter_offd;
2118 jj_begin_row_offd = jj_counter_offd;
2119
2120
2121 if (num_procs > 1)
2122 {
2123 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2124 {
2125 i1 = S_offd_j[jj];
2126
2127 /*-----------------------------------------------------------
2128 * If neighbor i1 is a C-point, set column number in P_offd_j
2129 * and initialize interpolation weight to zero.
2130 *-----------------------------------------------------------*/
2131
2132 if (CF_marker_offd[i1] >= 0)
2133 {
2134 P_marker_offd[i1] = jj_counter_offd;
2135 P_offd_j[jj_counter_offd] = i1;
2136 /* P_offd_data[jj_counter_offd] = zero; */
2137 hypre_CSRBlockMatrixBlockCopyData(zero_block,
2138 &P_offd_data[jj_counter_offd*bnnz],
2139 1.0, block_size);
2140
2141 jj_counter_offd++;
2142 }
2143
2144 /*-----------------------------------------------------------
2145 * If neighbor i1 is an F-point, mark it as a strong F-point
2146 * whose connection needs to be distributed.
2147 *-----------------------------------------------------------*/
2148
2149 else if (CF_marker_offd[i1] != -3)
2150 {
2151 P_marker_offd[i1] = strong_f_marker;
2152 }
2153 }
2154 }
2155
2156 jj_end_row_offd = jj_counter_offd;
2157
2158
2159 /* get the diagonal block */
2160 /* diagonal = A_diag_data[A_diag_i[i]]; */
2161 hypre_CSRBlockMatrixBlockCopyDataDiag(&A_diag_data[A_diag_i[i]*bnnz], diagonal_block,
2162 1.0, block_size);
2163
2164
2165
2166 /* Here we go through the neighborhood of this grid point */
2167
2168 /* Loop over ith row of A. First, the diagonal part of A */
2169
2170 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2171 {
2172 i1 = A_diag_j[jj];
2173
2174 /*--------------------------------------------------------------
2175 * Case 1: neighbor i1 is a C-point and strongly influences i,
2176 * accumulate a_{i,i1} into the interpolation weight.
2177 *--------------------------------------------------------------*/
2178
2179 if (P_marker[i1] >= jj_begin_row)
2180 {
2181 /* P_diag_data[P_marker[i1]] += A_diag_data[jj]; */
2182 hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_diag_data[jj*bnnz],
2183 &P_diag_data[P_marker[i1]*bnnz],
2184 block_size);
2185
2186 }
2187
2188 /*--------------------------------------------------------------
2189 * Case 2: neighbor i1 is an F-point and strongly influences i,
2190 * distribute a_{i,i1} to C-points that strongly infuence i.
2191 * Note: currently no distribution to the diagonal in this case.
2192 *--------------------------------------------------------------*/
2193
2194 else if (P_marker[i1] == strong_f_marker || (!add_weak_to_diag && CF_marker[i1] != -3))
2195 {
2196 /* initialize sum to zero */
2197 /* sum = zero; */
2198 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block, 1.0,
2199 block_size);
2200
2201
2202 /*-----------------------------------------------------------
2203 * Loop over row of A for point i1 and calculate the sum
2204 * of the connections to c-points that strongly influence i.
2205 *-----------------------------------------------------------*/
2206
2207 hypre_CSRBlockMatrixComputeSign(&A_diag_data[A_diag_i[i1]*bnnz], sign, block_size);
2208
2209
2210 /* Diagonal block part of row i1 */
2211 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
2212 {
2213 i2 = A_diag_j[jj1];
2214 if (P_marker[i2] >= jj_begin_row)
2215 {
2216 /* add diag data to sum */
2217 /* sum += A_diag_data[jj1]; */
2218 /* hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_diag_data[jj1*bnnz],
2219 sum_block, block_size);*/
2220
2221 hypre_CSRBlockMatrixBlockAddAccumulateDiagCheckSign(&A_diag_data[jj1*bnnz],
2222 sum_block, block_size, sign);
2223 }
2224 }
2225
2226 /* Off-Diagonal block part of row i1 */
2227 if (num_procs > 1)
2228 {
2229 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
2230 {
2231 i2 = A_offd_j[jj1];
2232 if (P_marker_offd[i2] >= jj_begin_row_offd )
2233 {
2234 /* add off diag data to sum */
2235 /*sum += A_offd_data[jj1];*/
2236 /* hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_offd_data[jj1*bnnz],
2237 sum_block, block_size);*/
2238 hypre_CSRBlockMatrixBlockAddAccumulateDiagCheckSign(&A_offd_data[jj1*bnnz],
2239 sum_block, block_size, sign);
2240 }
2241 }
2242 }
2243 /* check whether sum_block is singular */
2244
2245 /* distribute = A_diag_data[jj] / sum;*/
2246 /* here we want: A_diag_data * sum^(-1) */
2247
2248 if (hypre_CSRBlockMatrixBlockInvMultDiag(sum_block, &A_diag_data[jj*bnnz],
2249 distribute_block, block_size) == 0)
2250 {
2251
2252
2253 /*-----------------------------------------------------------
2254 * Loop over row of A for point i1 and do the distribution.
2255 *-----------------------------------------------------------*/
2256
2257 /* Diagonal block part of row i1 */
2258 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
2259 {
2260 i2 = A_diag_j[jj1];
2261 if (P_marker[i2] >= jj_begin_row )
2262 {
2263
2264 /* P_diag_data[P_marker[i2]]
2265 += distribute * A_diag_data[jj1];*/
2266
2267 /* multiply - result in sum_block */
2268 hypre_CSRBlockMatrixBlockCopyData(zero_block,
2269 sum_block, 1.0, block_size);
2270
2271
2272 /* hypre_CSRBlockMatrixBlockMultAddDiag(distribute_block,
2273 &A_diag_data[jj1*bnnz], 0.0,
2274 sum_block, block_size);*/
2275 hypre_CSRBlockMatrixBlockMultAddDiagCheckSign(distribute_block,
2276 &A_diag_data[jj1*bnnz], 0.0,
2277 sum_block, block_size, sign);
2278
2279
2280 /* add result to p_diag_data */
2281 hypre_CSRBlockMatrixBlockAddAccumulateDiag(sum_block,
2282 &P_diag_data[P_marker[i2]*bnnz],
2283 block_size);
2284
2285 }
2286 }
2287
2288 /* Off-Diagonal block part of row i1 */
2289 if (num_procs > 1)
2290 {
2291 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
2292 {
2293 i2 = A_offd_j[jj1];
2294 if (P_marker_offd[i2] >= jj_begin_row_offd)
2295 {
2296 /* P_offd_data[P_marker_offd[i2]]
2297 += distribute * A_offd_data[jj1]; */
2298
2299 /* multiply - result in sum_block */
2300
2301 hypre_CSRBlockMatrixBlockCopyData(zero_block,
2302 sum_block, 1.0, block_size);
2303 /* hypre_CSRBlockMatrixBlockMultAddDiag(distribute_block,
2304 &A_offd_data[jj1*bnnz], 0.0,
2305 sum_block, block_size); */
2306
2307 hypre_CSRBlockMatrixBlockMultAddDiagCheckSign(distribute_block,
2308 &A_offd_data[jj1*bnnz], 0.0,
2309 sum_block, block_size, sign);
2310
2311
2312
2313 /* add result to p_offd_data */
2314 hypre_CSRBlockMatrixBlockAddAccumulateDiag(sum_block,
2315 &P_offd_data[P_marker_offd[i2]*bnnz],
2316 block_size);
2317
2318
2319 }
2320 }
2321 }
2322 }
2323 else /* sum block is all zeros (or almost singular) - just add to diagonal */
2324 {
2325 /* diagonal += A_diag_data[jj]; */
2326 if (add_weak_to_diag) hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_diag_data[jj*bnnz],
2327 diagonal_block,
2328 block_size);
2329 }
2330 }
2331
2332 /*--------------------------------------------------------------
2333 * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
2334 * into the diagonal.
2335 *--------------------------------------------------------------*/
2336
2337 else if (CF_marker[i1] != -3 && add_weak_to_diag)
2338 {
2339 /* diagonal += A_diag_data[jj];*/
2340 hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_diag_data[jj*bnnz],
2341 diagonal_block,
2342 block_size);
2343 }
2344 }
2345
2346
2347 /*----------------------------------------------------------------
2348 * Still looping over ith row of A. Next, loop over the
2349 * off-diagonal part of A
2350 *---------------------------------------------------------------*/
2351
2352 if (num_procs > 1)
2353 {
2354 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2355 {
2356 i1 = A_offd_j[jj];
2357
2358 /*--------------------------------------------------------------
2359 * Case 1: neighbor i1 is a C-point and strongly influences i,
2360 * accumulate a_{i,i1} into the interpolation weight.
2361 *--------------------------------------------------------------*/
2362
2363 if (P_marker_offd[i1] >= jj_begin_row_offd)
2364 {
2365 /* P_offd_data[P_marker_offd[i1]] += A_offd_data[jj]; */
2366 hypre_CSRBlockMatrixBlockAddAccumulateDiag( &A_offd_data[jj*bnnz],
2367 &P_offd_data[P_marker_offd[i1]*bnnz],
2368 block_size);
2369 }
2370
2371 /*------------------------------------------------------------
2372 * Case 2: neighbor i1 is an F-point and strongly influences i,
2373 * distribute a_{i,i1} to C-points that strongly infuence i.
2374 * Note: currently no distribution to the diagonal in this case.
2375 *-----------------------------------------------------------*/
2376
2377 else if (P_marker_offd[i1] == strong_f_marker || (!add_weak_to_diag && CF_marker[i1] != -3))
2378 {
2379
2380 /* initialize sum to zero */
2381 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block,
2382 1.0, block_size);
2383
2384 /*---------------------------------------------------------
2385 * Loop over row of A_ext for point i1 and calculate the sum
2386 * of the connections to c-points that strongly influence i.
2387 *---------------------------------------------------------*/
2388
2389 /* find row number */
2390 c_num = A_offd_j[jj];
2391
2392 hypre_CSRBlockMatrixComputeSign(&A_ext_data[A_ext_i[c_num]*bnnz], sign, block_size);
2393
2394
2395 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
2396 {
2397 i2 = (HYPRE_Int)A_ext_j[jj1];
2398
2399 if (i2 > -1)
2400 {
2401 /* in the diagonal block */
2402 if (P_marker[i2] >= jj_begin_row)
2403 {
2404 /* sum += A_ext_data[jj1]; */
2405 /* hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_ext_data[jj1*bnnz],
2406 sum_block, block_size);*/
2407 hypre_CSRBlockMatrixBlockAddAccumulateDiagCheckSign(&A_ext_data[jj1*bnnz],
2408 sum_block, block_size, sign);
2409 }
2410 }
2411 else
2412 {
2413 /* in the off_diagonal block */
2414 if (P_marker_offd[-i2-1] >= jj_begin_row_offd)
2415 {
2416 /* sum += A_ext_data[jj1]; */
2417 /* hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_ext_data[jj1*bnnz],
2418 sum_block, block_size);*/
2419 hypre_CSRBlockMatrixBlockAddAccumulateDiagCheckSign(&A_ext_data[jj1*bnnz],
2420 sum_block, block_size, sign);
2421 }
2422 }
2423 }
2424
2425 /* check whether sum_block is singular */
2426
2427
2428 /* distribute = A_offd_data[jj] / sum; */
2429 /* here we want: A_offd_data * sum^(-1) */
2430 if (hypre_CSRBlockMatrixBlockInvMultDiag(sum_block, &A_offd_data[jj*bnnz],
2431 distribute_block, block_size) == 0)
2432 {
2433
2434 /*---------------------------------------------------------
2435 * Loop over row of A_ext for point i1 and do
2436 * the distribution.
2437 *--------------------------------------------------------*/
2438
2439 /* Diagonal block part of row i1 */
2440
2441 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
2442 {
2443 i2 = (HYPRE_Int)A_ext_j[jj1];
2444
2445 if (i2 > -1) /* in the diagonal block */
2446 {
2447 if (P_marker[i2] >= jj_begin_row)
2448 {
2449 /* P_diag_data[P_marker[i2]]
2450 += distribute * A_ext_data[jj1]; */
2451
2452 /* multiply - result in sum_block */
2453 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block, 1.0, block_size);
2454
2455 /* hypre_CSRBlockMatrixBlockMultAddDiag(distribute_block,
2456 &A_ext_data[jj1*bnnz], 0.0,
2457 sum_block, block_size); */
2458
2459 hypre_CSRBlockMatrixBlockMultAddDiagCheckSign(distribute_block,
2460 &A_ext_data[jj1*bnnz], 0.0,
2461 sum_block, block_size, sign);
2462 /* add result to p_diag_data */
2463 hypre_CSRBlockMatrixBlockAddAccumulateDiag(sum_block,
2464 &P_diag_data[P_marker[i2]*bnnz],
2465 block_size);
2466 }
2467 }
2468 else
2469 {
2470 /* in the off_diagonal block */
2471 if (P_marker_offd[-i2-1] >= jj_begin_row_offd)
2472
2473 /*P_offd_data[P_marker_offd[-i2-1]]
2474 += distribute * A_ext_data[jj1];*/
2475 {
2476
2477 /* multiply - result in sum_block */
2478 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block, 1.0, block_size);
2479
2480 /* hypre_CSRBlockMatrixBlockMultAddDiag(distribute_block,
2481 &A_ext_data[jj1*bnnz], 0.0,
2482 sum_block, block_size);*/
2483
2484 hypre_CSRBlockMatrixBlockMultAddDiagCheckSign(distribute_block,
2485 &A_ext_data[jj1*bnnz], 0.0,
2486 sum_block, block_size, sign);
2487 /* add result to p_offd_data */
2488 hypre_CSRBlockMatrixBlockAddAccumulateDiag(sum_block,
2489 &P_offd_data[P_marker_offd[-i2-1]*bnnz],
2490 block_size);
2491 }
2492 }
2493 }
2494 }
2495 else /* sum block is all zeros - just add to diagonal */
2496 {
2497 /* diagonal += A_offd_data[jj]; */
2498 if (add_weak_to_diag) hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_offd_data[jj*bnnz],
2499 diagonal_block,
2500 block_size);
2501 }
2502 }
2503
2504 /*-----------------------------------------------------------
2505 * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
2506 * into the diagonal.
2507 *-----------------------------------------------------------*/
2508
2509 else if (CF_marker_offd[i1] != -3 && add_weak_to_diag)
2510 {
2511 /* diagonal += A_offd_data[jj]; */
2512 hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_offd_data[jj*bnnz],
2513 diagonal_block, block_size);
2514 }
2515 }
2516 }
2517
2518 /*-----------------------------------------------------------------
2519 * Set interpolation weight by dividing by the diagonal.
2520 *-----------------------------------------------------------------*/
2521
2522 for (jj = jj_begin_row; jj < jj_end_row; jj++)
2523 {
2524
2525 /* P_diag_data[jj] /= -diagonal; */
2526
2527 /* want diagonal^(-1)*P_diag_data */
2528 /* do division - put in sum_block */
2529 if ( hypre_CSRBlockMatrixBlockInvMultDiag(diagonal_block, &P_diag_data[jj*bnnz],
2530 sum_block, block_size) == 0)
2531 {
2532 /* now copy to P_diag_data[jj] and make negative */
2533 hypre_CSRBlockMatrixBlockCopyData(sum_block, &P_diag_data[jj*bnnz],
2534 -1.0, block_size);
2535 }
2536 else
2537 {
2538 /* hypre_printf(" Warning! singular diagonal block! Proc id %d row %d\n", my_id,i); */
2539 /* just make P_diag_data negative since diagonal is zero */
2540 hypre_CSRBlockMatrixBlockCopyData(&P_diag_data[jj*bnnz], &P_diag_data[jj*bnnz],
2541 -1.0, block_size);
2542 }
2543 }
2544
2545 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
2546 {
2547 /* P_offd_data[jj] /= -diagonal; */
2548
2549 /* do division - put in sum_block */
2550 hypre_CSRBlockMatrixBlockInvMultDiag(diagonal_block, &P_offd_data[jj*bnnz],
2551 sum_block, block_size);
2552
2553 /* now copy to P_offd_data[jj] and make negative */
2554 hypre_CSRBlockMatrixBlockCopyData(sum_block, &P_offd_data[jj*bnnz],
2555 -1.0, block_size);
2556 }
2557 }
2558
2559 strong_f_marker--;
2560
2561 P_offd_i[i+1] = jj_counter_offd;
2562 }
2563 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
2564 hypre_TFree(P_marker_offd, HYPRE_MEMORY_HOST);
2565 }
2566
2567 /* Now create P - as a block matrix */
2568 P = hypre_ParCSRBlockMatrixCreate(comm, block_size,
2569 hypre_ParCSRBlockMatrixGlobalNumRows(A),
2570 total_global_cpts,
2571 hypre_ParCSRBlockMatrixColStarts(A),
2572 num_cpts_global,
2573 0,
2574 P_diag_i[n_fine],
2575 P_offd_i[n_fine]);
2576
2577
2578 P_diag = hypre_ParCSRBlockMatrixDiag(P);
2579 hypre_CSRBlockMatrixData(P_diag) = P_diag_data;
2580 hypre_CSRBlockMatrixI(P_diag) = P_diag_i;
2581 hypre_CSRBlockMatrixJ(P_diag) = P_diag_j;
2582
2583 P_offd = hypre_ParCSRBlockMatrixOffd(P);
2584 hypre_CSRBlockMatrixData(P_offd) = P_offd_data;
2585 hypre_CSRBlockMatrixI(P_offd) = P_offd_i;
2586 hypre_CSRBlockMatrixJ(P_offd) = P_offd_j;
2587
2588 /* Compress P, removing coefficients smaller than trunc_factor * Max */
2589 if (trunc_factor != 0.0 || max_elmts > 0)
2590 {
2591 hypre_BoomerAMGBlockInterpTruncation(P, trunc_factor, max_elmts);
2592 P_diag_data = hypre_CSRBlockMatrixData(P_diag);
2593 P_diag_i = hypre_CSRBlockMatrixI(P_diag);
2594 P_diag_j = hypre_CSRBlockMatrixJ(P_diag);
2595 P_offd_data = hypre_CSRBlockMatrixData(P_offd);
2596 P_offd_i = hypre_CSRBlockMatrixI(P_offd);
2597 P_offd_j = hypre_CSRBlockMatrixJ(P_offd);
2598 P_diag_size = P_diag_i[n_fine];
2599 P_offd_size = P_offd_i[n_fine];
2600 }
2601
2602
2603 num_cols_P_offd = 0;
2604 if (P_offd_size)
2605 {
2606 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
2607
2608 for (i=0; i < num_cols_A_offd; i++)
2609 P_marker[i] = 0;
2610
2611 num_cols_P_offd = 0;
2612 for (i=0; i < P_offd_size; i++)
2613 {
2614 index = P_offd_j[i];
2615 if (!P_marker[index])
2616 {
2617 num_cols_P_offd++;
2618 P_marker[index] = 1;
2619 }
2620 }
2621
2622 tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
2623 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
2624
2625 index = 0;
2626 for (i=0; i < num_cols_P_offd; i++)
2627 {
2628 while (P_marker[index]==0) index++;
2629 tmp_map_offd[i] = index++;
2630 }
2631
2632 for (i=0; i < P_offd_size; i++)
2633 P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
2634 P_offd_j[i],
2635 num_cols_P_offd);
2636 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
2637 }
2638
2639 for (i=0; i < n_fine; i++)
2640 if (CF_marker[i] == -3) CF_marker[i] = -1;
2641
2642 if (num_cols_P_offd)
2643 {
2644 hypre_ParCSRBlockMatrixColMapOffd(P) = col_map_offd_P;
2645 hypre_CSRBlockMatrixNumCols(P_offd) = num_cols_P_offd;
2646 }
2647
2648 /* use block version */
2649 hypre_GetCommPkgBlockRTFromCommPkgBlockA(P,A, tmp_map_offd, fine_to_coarse_offd);
2650
2651
2652 *P_ptr = P;
2653
2654 hypre_TFree(sign, HYPRE_MEMORY_HOST);
2655
2656
2657 hypre_TFree(zero_block, HYPRE_MEMORY_HOST);
2658 hypre_TFree(identity_block, HYPRE_MEMORY_HOST);
2659 hypre_TFree(diagonal_block, HYPRE_MEMORY_HOST);
2660 hypre_TFree(sum_block, HYPRE_MEMORY_HOST);
2661 hypre_TFree(distribute_block, HYPRE_MEMORY_HOST);
2662
2663 hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
2664 hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
2665 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
2666 hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
2667 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
2668 hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
2669 hypre_TFree(coarse_counter, HYPRE_MEMORY_HOST);
2670 hypre_TFree(jj_count, HYPRE_MEMORY_HOST);
2671 hypre_TFree(jj_count_offd, HYPRE_MEMORY_HOST);
2672
2673 if (num_procs > 1) hypre_CSRBlockMatrixDestroy(A_ext);
2674
2675 return(0);
2676
2677 }
2678
2679
2680 /*---------------------------------------------------------------------------
2681 * hypre_BoomerAMGBlockBuildInterpRV
2682
2683 Here we are modifying the block interp like in Ruge's elasticity paper
2684 (applied math comp '86) - only we don't include the diagonal
2685 for dist. the f-connect
2686
2687
2688 - when we do the distribution of the f-connection, we only distribute the error
2689 to like unknowns - this has the effect of only using the diagonal of the
2690 matrix for the f-distributions. In addition, we will not differentiate
2691 between the strength of the f-connections (so nothing is added to the diag)
2692
2693 *--------------------------------------------------------------------------*/
2694
2695 HYPRE_Int
hypre_BoomerAMGBuildBlockInterpRV(hypre_ParCSRBlockMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRBlockMatrix ** P_ptr)2696 hypre_BoomerAMGBuildBlockInterpRV( hypre_ParCSRBlockMatrix *A,
2697 HYPRE_Int *CF_marker,
2698 hypre_ParCSRMatrix *S,
2699 HYPRE_BigInt *num_cpts_global,
2700 HYPRE_Int num_functions,
2701 HYPRE_Int *dof_func,
2702 HYPRE_Int debug_flag,
2703 HYPRE_Real trunc_factor,
2704 HYPRE_Int max_elmts,
2705 hypre_ParCSRBlockMatrix **P_ptr)
2706 {
2707
2708 MPI_Comm comm = hypre_ParCSRBlockMatrixComm(A);
2709 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
2710 hypre_ParCSRCommHandle *comm_handle;
2711
2712 hypre_CSRBlockMatrix *A_diag = hypre_ParCSRBlockMatrixDiag(A);
2713 HYPRE_Real *A_diag_data = hypre_CSRBlockMatrixData(A_diag);
2714 HYPRE_Int *A_diag_i = hypre_CSRBlockMatrixI(A_diag);
2715 HYPRE_Int *A_diag_j = hypre_CSRBlockMatrixJ(A_diag);
2716
2717 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(A_diag);
2718 HYPRE_Int bnnz = block_size*block_size;
2719
2720 hypre_CSRBlockMatrix *A_offd = hypre_ParCSRBlockMatrixOffd(A);
2721 HYPRE_Real *A_offd_data = hypre_CSRBlockMatrixData(A_offd);
2722 HYPRE_Int *A_offd_i = hypre_CSRBlockMatrixI(A_offd);
2723 HYPRE_Int *A_offd_j = hypre_CSRBlockMatrixJ(A_offd);
2724 HYPRE_Int num_cols_A_offd = hypre_CSRBlockMatrixNumCols(A_offd);
2725 HYPRE_BigInt *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(A);
2726
2727 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
2728 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
2729 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
2730
2731 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
2732 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
2733 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
2734
2735 hypre_ParCSRBlockMatrix *P;
2736 HYPRE_BigInt *col_map_offd_P;
2737 HYPRE_Int *tmp_map_offd = NULL;
2738
2739 HYPRE_Int *CF_marker_offd = NULL;
2740
2741 hypre_CSRBlockMatrix *A_ext;
2742
2743 HYPRE_Real *A_ext_data = NULL;
2744 HYPRE_Int *A_ext_i = NULL;
2745 HYPRE_BigInt *A_ext_j = NULL;
2746
2747 hypre_CSRBlockMatrix *P_diag;
2748 hypre_CSRBlockMatrix *P_offd;
2749
2750 HYPRE_Real *P_diag_data;
2751 HYPRE_Int *P_diag_i;
2752 HYPRE_Int *P_diag_j;
2753 HYPRE_Real *P_offd_data;
2754 HYPRE_Int *P_offd_i;
2755 HYPRE_Int *P_offd_j;
2756
2757 HYPRE_Int P_diag_size, P_offd_size;
2758
2759 HYPRE_Int *P_marker, *P_marker_offd = NULL;
2760
2761 HYPRE_Int jj_counter,jj_counter_offd;
2762 HYPRE_Int *jj_count, *jj_count_offd = NULL;
2763 HYPRE_Int jj_begin_row,jj_begin_row_offd;
2764 HYPRE_Int jj_end_row,jj_end_row_offd;
2765
2766 HYPRE_Int start_indexing = 0; /* start indexing for P_data at 0 */
2767
2768 HYPRE_Int n_fine = hypre_CSRBlockMatrixNumRows(A_diag);
2769
2770 HYPRE_Int strong_f_marker;
2771
2772 HYPRE_Int *fine_to_coarse;
2773 HYPRE_BigInt *fine_to_coarse_offd = NULL;
2774 HYPRE_Int *coarse_counter;
2775 HYPRE_Int coarse_shift;
2776 HYPRE_BigInt total_global_cpts;
2777 HYPRE_Int num_cols_P_offd;
2778 HYPRE_BigInt my_first_cpt;
2779
2780 HYPRE_Int bd;
2781
2782 HYPRE_Int i,i1,i2;
2783 HYPRE_Int j,jl,jj,jj1;
2784 HYPRE_Int kc;
2785 HYPRE_BigInt big_k;
2786 HYPRE_Int start;
2787
2788 HYPRE_Int c_num;
2789
2790 HYPRE_Int my_id;
2791 HYPRE_Int num_procs;
2792 HYPRE_Int num_threads;
2793 HYPRE_Int num_sends;
2794 HYPRE_Int index;
2795 HYPRE_Int ns, ne, size, rest;
2796 HYPRE_Int *int_buf_data = NULL;
2797 HYPRE_BigInt *big_buf_data = NULL;
2798
2799 HYPRE_BigInt col_1 = hypre_ParCSRBlockMatrixFirstRowIndex(A);
2800 HYPRE_Int local_numrows = hypre_CSRBlockMatrixNumRows(A_diag);
2801 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt)local_numrows;
2802
2803 HYPRE_Real wall_time; /* for debugging instrumentation */
2804
2805
2806 HYPRE_Real *identity_block;
2807 HYPRE_Real *zero_block;
2808 HYPRE_Real *diagonal_block;
2809 HYPRE_Real *sum_block;
2810 HYPRE_Real *distribute_block;
2811
2812 hypre_MPI_Comm_size(comm, &num_procs);
2813 hypre_MPI_Comm_rank(comm,&my_id);
2814 num_threads = hypre_NumThreads();
2815
2816 my_first_cpt = num_cpts_global[0];
2817 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
2818 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
2819
2820 /*-------------------------------------------------------------------
2821 * Get the CF_marker data for the off-processor columns
2822 *-------------------------------------------------------------------*/
2823
2824 if (debug_flag==4) wall_time = time_getWallclockSeconds();
2825
2826 CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
2827
2828
2829 if (!comm_pkg)
2830 {
2831 hypre_BlockMatvecCommPkgCreate(A);
2832 comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
2833 }
2834
2835 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2836 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
2837 num_sends), HYPRE_MEMORY_HOST);
2838
2839 index = 0;
2840 for (i = 0; i < num_sends; i++)
2841 {
2842 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
2843 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
2844 {
2845 int_buf_data[index++]
2846 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2847 }
2848
2849 }
2850
2851 /* we do not need the block version of comm handle - because
2852 CF_marker corresponds to the nodal matrix. This call populates
2853 CF_marker_offd */
2854 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
2855 CF_marker_offd);
2856
2857 hypre_ParCSRCommHandleDestroy(comm_handle);
2858
2859
2860 if (debug_flag==4)
2861 {
2862 wall_time = time_getWallclockSeconds() - wall_time;
2863 hypre_printf("Proc = %d Interp: Comm 1 CF_marker = %f\n",
2864 my_id, wall_time);
2865 fflush(NULL);
2866 }
2867
2868 /*----------------------------------------------------------------------
2869 * Get the ghost rows of A
2870 *---------------------------------------------------------------------*/
2871
2872 if (debug_flag==4) wall_time = time_getWallclockSeconds();
2873
2874 if (num_procs > 1)
2875 {
2876 A_ext = hypre_ParCSRBlockMatrixExtractBExt(A, A, 1);
2877 A_ext_i = hypre_CSRBlockMatrixI(A_ext);
2878 A_ext_j = hypre_CSRBlockMatrixBigJ(A_ext);
2879 A_ext_data = hypre_CSRBlockMatrixData(A_ext);
2880 }
2881
2882 index = 0;
2883 for (i=0; i < num_cols_A_offd; i++)
2884 {
2885 for (j=A_ext_i[i]; j < A_ext_i[i+1]; j++)
2886 {
2887 big_k = A_ext_j[j];
2888 if (big_k >= col_1 && big_k < col_n)
2889 {
2890 A_ext_j[index] = big_k - col_1;
2891 /* for the data field we must get all of the blocbig_k data */
2892 for (bd = 0; bd < bnnz; bd++)
2893 {
2894 A_ext_data[index*bnnz + bd] = A_ext_data[j*bnnz + bd];
2895 }
2896 index++;
2897 }
2898 else
2899 {
2900 kc = hypre_BigBinarySearch(col_map_offd, big_k, num_cols_A_offd);
2901 if (kc > -1)
2902 {
2903 A_ext_j[index] = (HYPRE_BigInt)(-kc-1);
2904 for (bd = 0; bd < bnnz; bd++)
2905 {
2906 A_ext_data[index*bnnz + bd] = A_ext_data[j*bnnz + bd];
2907 }
2908 index++;
2909 }
2910 }
2911 }
2912 A_ext_i[i] = index;
2913 }
2914 for (i = num_cols_A_offd; i > 0; i--)
2915 A_ext_i[i] = A_ext_i[i-1];
2916 if (num_procs > 1) A_ext_i[0] = 0;
2917
2918 if (debug_flag==4)
2919 {
2920 wall_time = time_getWallclockSeconds() - wall_time;
2921 hypre_printf("Proc = %d Interp: Comm 2 Get A_ext = %f\n",
2922 my_id, wall_time);
2923 fflush(NULL);
2924 }
2925
2926
2927 /*-----------------------------------------------------------------------
2928 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
2929 *-----------------------------------------------------------------------*/
2930
2931 /*-----------------------------------------------------------------------
2932 * Intialize counters and allocate mapping vector.
2933 *-----------------------------------------------------------------------*/
2934
2935 coarse_counter = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
2936 jj_count = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
2937 jj_count_offd = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
2938
2939 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
2940
2941 for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1;
2942
2943 jj_counter = start_indexing;
2944 jj_counter_offd = start_indexing;
2945
2946 /*-----------------------------------------------------------------------
2947 * Loop over fine grid.
2948 *-----------------------------------------------------------------------*/
2949
2950 for (j = 0; j < num_threads; j++)
2951 {
2952 size = n_fine/num_threads;
2953 rest = n_fine - size*num_threads;
2954 if (j < rest)
2955 {
2956 ns = j*size+j;
2957 ne = (j+1)*size+j+1;
2958 }
2959 else
2960 {
2961 ns = j*size+rest;
2962 ne = (j+1)*size+rest;
2963 }
2964
2965
2966 /* loop over the fine grid points */
2967 for (i = ns; i < ne; i++)
2968 {
2969
2970 /*--------------------------------------------------------------------
2971 * If i is a C-point, interpolation is the identity. Also set up
2972 * mapping vector (fine_to_coarse is the mapping vector).
2973 *--------------------------------------------------------------------*/
2974
2975 if (CF_marker[i] >= 0)
2976 {
2977 jj_count[j]++;
2978 fine_to_coarse[i] = coarse_counter[j];
2979 coarse_counter[j]++;
2980 }
2981
2982 /*--------------------------------------------------------------------
2983 * If i is an F-point, interpolation is from the C-points that
2984 * strongly influence i.
2985 *--------------------------------------------------------------------*/
2986
2987 else
2988 {
2989 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2990 {
2991 i1 = S_diag_j[jj];
2992 if (CF_marker[i1] >= 0)
2993 {
2994 jj_count[j]++;
2995 }
2996 }
2997
2998 if (num_procs > 1)
2999 {
3000 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
3001 {
3002 i1 = S_offd_j[jj];
3003 if (CF_marker_offd[i1] >= 0)
3004 {
3005 jj_count_offd[j]++;
3006 }
3007 }
3008 }
3009 }
3010 }
3011 }
3012
3013 /*-----------------------------------------------------------------------
3014 * Allocate arrays.
3015 *-----------------------------------------------------------------------*/
3016
3017 for (i=0; i < num_threads-1; i++)
3018 {
3019 coarse_counter[i+1] += coarse_counter[i];
3020 jj_count[i+1] += jj_count[i];
3021 jj_count_offd[i+1] += jj_count_offd[i];
3022 }
3023 i = num_threads-1;
3024 jj_counter = jj_count[i];
3025 jj_counter_offd = jj_count_offd[i];
3026
3027 P_diag_size = jj_counter;
3028
3029 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
3030 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
3031 /* we need to include the size of the blocks in the data size */
3032 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size*bnnz, HYPRE_MEMORY_HOST);
3033
3034 P_diag_i[n_fine] = jj_counter;
3035
3036
3037 P_offd_size = jj_counter_offd;
3038
3039 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
3040 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
3041 /* we need to include the size of the blocks in the data size */
3042 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size*bnnz, HYPRE_MEMORY_HOST);
3043
3044 /*-----------------------------------------------------------------------
3045 * Intialize some stuff.
3046 *-----------------------------------------------------------------------*/
3047
3048 jj_counter = start_indexing;
3049 jj_counter_offd = start_indexing;
3050
3051 if (debug_flag==4)
3052 {
3053 wall_time = time_getWallclockSeconds() - wall_time;
3054 hypre_printf("Proc = %d Interp: Internal work 1 = %f\n",
3055 my_id, wall_time);
3056 fflush(NULL);
3057 }
3058
3059 /* we need a block identity and a block of zeros*/
3060 identity_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
3061 zero_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
3062
3063 for(i = 0; i < block_size; i++)
3064 {
3065 identity_block[i*block_size + i] = 1.0;
3066 }
3067
3068
3069 /* we also need a block to keep track of the diagonal values and a sum */
3070 diagonal_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
3071 sum_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
3072 distribute_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
3073
3074 /*-----------------------------------------------------------------------
3075 * Send and receive fine_to_coarse info.
3076 *-----------------------------------------------------------------------*/
3077
3078 if (debug_flag==4) wall_time = time_getWallclockSeconds();
3079
3080 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_A_offd, HYPRE_MEMORY_HOST);
3081 big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
3082 num_sends), HYPRE_MEMORY_HOST);
3083
3084 for (j = 0; j < num_threads; j++)
3085 {
3086 coarse_shift = 0;
3087 if (j > 0) coarse_shift = coarse_counter[j-1];
3088 size = n_fine/num_threads;
3089 rest = n_fine - size*num_threads;
3090 if (j < rest)
3091 {
3092 ns = j*size+j;
3093 ne = (j+1)*size+j+1;
3094 }
3095 else
3096 {
3097 ns = j*size+rest;
3098 ne = (j+1)*size+rest;
3099 }
3100 for (i = ns; i < ne; i++)
3101 fine_to_coarse[i] += coarse_shift;
3102 }
3103 index = 0;
3104 for (i = 0; i < num_sends; i++)
3105 {
3106 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
3107 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
3108 big_buf_data[index++] = my_first_cpt
3109 + fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
3110 }
3111
3112 /* again, we do not need to use the block version of comm handle since
3113 the fine to coarse mapping is size of the nodes */
3114
3115 comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data,
3116 fine_to_coarse_offd);
3117
3118 hypre_ParCSRCommHandleDestroy(comm_handle);
3119
3120 if (debug_flag==4)
3121 {
3122 wall_time = time_getWallclockSeconds() - wall_time;
3123 hypre_printf("Proc = %d Interp: Comm 4 FineToCoarse = %f\n",
3124 my_id, wall_time);
3125 fflush(NULL);
3126 }
3127
3128 if (debug_flag==4) wall_time = time_getWallclockSeconds();
3129
3130 for (i = 0; i < n_fine; i++) fine_to_coarse[i] -= my_first_cpt;
3131
3132 /*-----------------------------------------------------------------------
3133 * Loop over fine grid points.
3134 *-----------------------------------------------------------------------*/
3135
3136 for (jl = 0; jl < num_threads; jl++)
3137 {
3138 size = n_fine/num_threads;
3139 rest = n_fine - size*num_threads;
3140 if (jl < rest)
3141 {
3142 ns = jl*size+jl;
3143 ne = (jl+1)*size+jl+1;
3144 }
3145 else
3146 {
3147 ns = jl*size+rest;
3148 ne = (jl+1)*size+rest;
3149 }
3150 jj_counter = 0;
3151 if (jl > 0) jj_counter = jj_count[jl-1];
3152 jj_counter_offd = 0;
3153 if (jl > 0) jj_counter_offd = jj_count_offd[jl-1];
3154
3155 P_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
3156 P_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
3157
3158 for (i = 0; i < n_fine; i++)
3159 {
3160 P_marker[i] = -1;
3161 }
3162 for (i = 0; i < num_cols_A_offd; i++)
3163 {
3164 P_marker_offd[i] = -1;
3165 }
3166 strong_f_marker = -2;
3167
3168 for (i = ns; i < ne; i++)
3169 {
3170
3171 /*--------------------------------------------------------------------
3172 * If i is a c-point, interpolation is the identity.
3173 *--------------------------------------------------------------------*/
3174
3175 if (CF_marker[i] >= 0)
3176 {
3177 P_diag_i[i] = jj_counter;
3178 P_diag_j[jj_counter] = fine_to_coarse[i];
3179 /* P_diag_data[jj_counter] = one; */
3180 hypre_CSRBlockMatrixBlockCopyData(identity_block,
3181 &P_diag_data[jj_counter*bnnz],
3182 1.0, block_size);
3183 jj_counter++;
3184 }
3185
3186 /*--------------------------------------------------------------------
3187 * If i is an F-point, build interpolation.
3188 *--------------------------------------------------------------------*/
3189
3190 else
3191 {
3192 /* Diagonal part of P */
3193 P_diag_i[i] = jj_counter;
3194 jj_begin_row = jj_counter;
3195
3196 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
3197 {
3198 i1 = S_diag_j[jj];
3199
3200 /*--------------------------------------------------------------
3201 * If neighbor i1 is a C-point, set column number in P_diag_j
3202 * and initialize interpolation weight to zero.
3203 *--------------------------------------------------------------*/
3204
3205 if (CF_marker[i1] >= 0)
3206 {
3207 P_marker[i1] = jj_counter;
3208 P_diag_j[jj_counter] = fine_to_coarse[i1];
3209 /* P_diag_data[jj_counter] = zero; */
3210 hypre_CSRBlockMatrixBlockCopyData(zero_block,
3211 &P_diag_data[jj_counter*bnnz],
3212 1.0, block_size);
3213 jj_counter++;
3214 }
3215
3216 /*--------------------------------------------------------------
3217 * If neighbor i1 is an F-point, mark it as a strong F-point
3218 * whose connection needs to be distributed.
3219 *--------------------------------------------------------------*/
3220
3221 else if (CF_marker[i1] != -3)
3222 {
3223 P_marker[i1] = strong_f_marker;
3224 }
3225 }
3226 jj_end_row = jj_counter;
3227
3228 /* Off-Diagonal part of P */
3229 P_offd_i[i] = jj_counter_offd;
3230 jj_begin_row_offd = jj_counter_offd;
3231
3232
3233 if (num_procs > 1)
3234 {
3235 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
3236 {
3237 i1 = S_offd_j[jj];
3238
3239 /*-----------------------------------------------------------
3240 * If neighbor i1 is a C-point, set column number in P_offd_j
3241 * and initialize interpolation weight to zero.
3242 *-----------------------------------------------------------*/
3243
3244 if (CF_marker_offd[i1] >= 0)
3245 {
3246 P_marker_offd[i1] = jj_counter_offd;
3247 P_offd_j[jj_counter_offd] = i1;
3248 /* P_offd_data[jj_counter_offd] = zero; */
3249 hypre_CSRBlockMatrixBlockCopyData(zero_block,
3250 &P_offd_data[jj_counter_offd*bnnz],
3251 1.0, block_size);
3252
3253 jj_counter_offd++;
3254 }
3255
3256 /*-----------------------------------------------------------
3257 * If neighbor i1 is an F-point, mark it as a strong F-point
3258 * whose connection needs to be distributed.
3259 *-----------------------------------------------------------*/
3260
3261 else if (CF_marker_offd[i1] != -3)
3262 {
3263 P_marker_offd[i1] = strong_f_marker;
3264 }
3265 }
3266 }
3267
3268 jj_end_row_offd = jj_counter_offd;
3269
3270
3271 /* get the diagonal block */
3272 /* diagonal = A_diag_data[A_diag_i[i]]; */
3273 hypre_CSRBlockMatrixBlockCopyData(&A_diag_data[A_diag_i[i]*bnnz], diagonal_block,
3274 1.0, block_size);
3275
3276
3277
3278 /* Here we go through the neighborhood of this grid point */
3279
3280 /* Loop over ith row of A. First, the diagonal part of A */
3281
3282 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
3283 {
3284 i1 = A_diag_j[jj];
3285
3286 /*--------------------------------------------------------------
3287 * Case 1: neighbor i1 is a C-point and strongly influences i,
3288 * accumulate a_{i,i1} into the interpolation weight.
3289 *--------------------------------------------------------------*/
3290
3291 if (P_marker[i1] >= jj_begin_row)
3292 {
3293 /* P_diag_data[P_marker[i1]] += A_diag_data[jj]; */
3294 hypre_CSRBlockMatrixBlockAddAccumulate(&A_diag_data[jj*bnnz],
3295 &P_diag_data[P_marker[i1]*bnnz],
3296 block_size);
3297
3298 }
3299
3300 /*--------------------------------------------------------------
3301 * Case 2: neighbor i1 is an F-point (MAY or MAY NOT strongly influences i),
3302 * distribute a_{i,i1} to C-points that strongly infuence i.
3303 * Note: currently no distribution to the diagonal in this case.
3304 *--------------------------------------------------------------*/
3305
3306 else if (P_marker[i1] == strong_f_marker || CF_marker[i1] != -3)
3307 {
3308 /* initialize sum to zero */
3309 /* sum = zero; */
3310 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block, 1.0,
3311 block_size);
3312
3313
3314 /*-----------------------------------------------------------
3315 * Loop over row of A for point i1 and calculate the sum
3316 * of the connections to c-points that strongly influence i.-
3317
3318 HERE WE ONLY WANT THE DIAG CONTIRBUTIONS (intra-unknown)
3319
3320 *-----------------------------------------------------------*/
3321
3322 /* Diagonal block part of row i1 */
3323 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
3324 {
3325 i2 = A_diag_j[jj1];
3326 if (P_marker[i2] >= jj_begin_row)
3327 {
3328 /* add diag data to sum */
3329 /* sum += A_diag_data[jj1]; */
3330 hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_diag_data[jj1*bnnz],
3331 sum_block, block_size);
3332 }
3333 }
3334
3335 /* Off-Diagonal block part of row i1 */
3336 if (num_procs > 1)
3337 {
3338 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
3339 {
3340 i2 = A_offd_j[jj1];
3341 if (P_marker_offd[i2] >= jj_begin_row_offd )
3342 {
3343 /* add off diag data to sum */
3344 /*sum += A_offd_data[jj1];*/
3345 hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_offd_data[jj1*bnnz],
3346 sum_block, block_size);
3347
3348 }
3349 }
3350 }
3351 /* check whether sum_block is singular (NOW SUM IS A DIAG MATRIX)*/
3352 /* distribute = A_diag_data[jj] / sum; (if a diag element is 0 then
3353 that col is scaled by 1 instead of 1/diag) - doesn'treturn 0*/
3354 if (hypre_CSRBlockMatrixBlockInvMultDiag2(&A_diag_data[jj*bnnz], sum_block,
3355 distribute_block, block_size) == 0)
3356 {
3357
3358 /*-----------------------------------------------------------
3359 * Loop over row of A for point i1 and do the distribution.-
3360 HERE AGAIN WE ONLY WANT TO DIST W/IN A LIKE UNKNOWN
3361
3362 *-----------------------------------------------------------*/
3363
3364 /* Diagonal block part of row i1 */
3365 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
3366 {
3367 i2 = A_diag_j[jj1];
3368 if (P_marker[i2] >= jj_begin_row )
3369 {
3370
3371 /* P_diag_data[P_marker[i2]]
3372 += distribute * A_diag_data[jj1];*/
3373
3374 /* multiply - result in sum_block */
3375 hypre_CSRBlockMatrixBlockMultAddDiag2(distribute_block,
3376 &A_diag_data[jj1*bnnz], 0.0,
3377 sum_block, block_size);
3378
3379
3380 /* add result to p_diag_data */
3381 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
3382 &P_diag_data[P_marker[i2]*bnnz],
3383 block_size);
3384
3385 }
3386 }
3387
3388 /* Off-Diagonal block part of row i1 */
3389 if (num_procs > 1)
3390 {
3391 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
3392 {
3393 i2 = A_offd_j[jj1];
3394 if (P_marker_offd[i2] >= jj_begin_row_offd)
3395 {
3396 /* P_offd_data[P_marker_offd[i2]]
3397 += distribute * A_offd_data[jj1]; */
3398
3399 /* multiply - result in sum_block */
3400 hypre_CSRBlockMatrixBlockMultAddDiag2(distribute_block,
3401 &A_offd_data[jj1*bnnz], 0.0,
3402 sum_block, block_size);
3403
3404
3405 /* add result to p_offd_data */
3406 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
3407 &P_offd_data[P_marker_offd[i2]*bnnz],
3408 block_size);
3409 }
3410 }
3411 }
3412 } /* end of if sum */
3413 }/* end of case 1 or case 2*/
3414
3415 }/* end of loop of diag part */
3416
3417
3418 /*----------------------------------------------------------------
3419 * Still looping over ith row of A. Next, loop over the
3420 * off-diagonal part of A
3421 *---------------------------------------------------------------*/
3422
3423 if (num_procs > 1)
3424 {
3425 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
3426 {
3427 i1 = A_offd_j[jj];
3428
3429 /*--------------------------------------------------------------
3430 * Case 1: neighbor i1 is a C-point and strongly influences i,
3431 * accumulate a_{i,i1} into the interpolation weight.
3432 *--------------------------------------------------------------*/
3433
3434 if (P_marker_offd[i1] >= jj_begin_row_offd)
3435 {
3436 /* P_offd_data[P_marker_offd[i1]] += A_offd_data[jj]; */
3437 hypre_CSRBlockMatrixBlockAddAccumulate( &A_offd_data[jj*bnnz],
3438 &P_offd_data[P_marker_offd[i1]*bnnz],
3439 block_size);
3440 }
3441
3442 /*------------------------------------------------------------
3443 * Case 2: neighbor i1 is an F-point and (MAY or MAY NOT strongly influences i),
3444 * distribute a_{i,i1} to C-points that strongly infuence i.
3445 * Note: currently no distribution to the diagonal in this case.
3446 *-----------------------------------------------------------*/
3447
3448 else if (P_marker_offd[i1] == strong_f_marker || CF_marker[i1] != -3 )
3449 {
3450
3451 /* initialize sum to zero */
3452 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block,
3453 1.0, block_size);
3454
3455 /*---------------------------------------------------------
3456 * Loop over row of A_ext for point i1 and calculate the sum
3457 * of the connections to c-points that strongly influence i.
3458
3459
3460 HERE WE ONLY WANT THE DIAG CONTIRBUTIONS (intra-unknown)
3461
3462 *---------------------------------------------------------*/
3463
3464 /* find row number */
3465 c_num = A_offd_j[jj];
3466
3467 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
3468 {
3469 i2 = (HYPRE_Int)A_ext_j[jj1];
3470
3471 if (i2 > -1)
3472 {
3473 /* in the diagonal block */
3474 if (P_marker[i2] >= jj_begin_row)
3475 {
3476 /* sum += A_ext_data[jj1]; */
3477 hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_ext_data[jj1*bnnz],
3478 sum_block, block_size);
3479 }
3480 }
3481 else
3482 {
3483 /* in the off_diagonal block */
3484 if (P_marker_offd[-i2-1] >= jj_begin_row_offd)
3485 {
3486 /* sum += A_ext_data[jj1]; */
3487 hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_ext_data[jj1*bnnz],
3488 sum_block, block_size);
3489
3490 }
3491 }
3492 }
3493
3494 /* check whether sum_block is singular */
3495
3496
3497 /* distribute = A_offd_data[jj] / sum; */
3498 /* here we want: A_offd_data * sum^(-1) */
3499 if (hypre_CSRBlockMatrixBlockInvMultDiag2(&A_offd_data[jj*bnnz], sum_block,
3500 distribute_block, block_size) == 0)
3501 {
3502
3503 /*---------------------------------------------------------
3504 * Loop over row of A_ext for point i1 and do
3505 * the distribution.
3506
3507 HERE AGAIN WE ONLY WANT TO DIST W/IN A LIKE UNKNOWN
3508
3509 *--------------------------------------------------------*/
3510
3511 /* Diagonal block part of row i1 */
3512
3513 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
3514 {
3515 i2 = (HYPRE_Int)A_ext_j[jj1];
3516
3517 if (i2 > -1) /* in the diagonal block */
3518 {
3519 if (P_marker[i2] >= jj_begin_row)
3520 {
3521 /* P_diag_data[P_marker[i2]]
3522 += distribute * A_ext_data[jj1]; */
3523
3524 /* multiply - result in sum_block */
3525 hypre_CSRBlockMatrixBlockMultAddDiag2(distribute_block,
3526 &A_ext_data[jj1*bnnz], 0.0,
3527 sum_block, block_size);
3528
3529
3530 /* add result to p_diag_data */
3531 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
3532 &P_diag_data[P_marker[i2]*bnnz],
3533 block_size);
3534
3535 }
3536 }
3537 else
3538 {
3539 /* in the off_diagonal block */
3540 if (P_marker_offd[-i2-1] >= jj_begin_row_offd)
3541
3542 /*P_offd_data[P_marker_offd[-i2-1]]
3543 += distribute * A_ext_data[jj1];*/
3544 {
3545
3546 /* multiply - result in sum_block */
3547 hypre_CSRBlockMatrixBlockMultAddDiag2(distribute_block,
3548 &A_ext_data[jj1*bnnz], 0.0,
3549 sum_block, block_size);
3550
3551
3552 /* add result to p_offd_data */
3553 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
3554 &P_offd_data[P_marker_offd[-i2-1]*bnnz],
3555 block_size);
3556 }
3557 }
3558 }
3559 }
3560 }
3561 }
3562 }
3563
3564 /*-----------------------------------------------------------------
3565 * Set interpolation weight by dividing by the diagonal.
3566 *-----------------------------------------------------------------*/
3567
3568 for (jj = jj_begin_row; jj < jj_end_row; jj++)
3569 {
3570
3571 /* P_diag_data[jj] /= -diagonal; */
3572
3573 /* want diagonal^(-1)*P_diag_data */
3574 /* do division - put in sum_block */
3575 if ( hypre_CSRBlockMatrixBlockInvMult(diagonal_block, &P_diag_data[jj*bnnz],
3576 sum_block, block_size) == 0)
3577 {
3578 /* now copy to P_diag_data[jj] and make negative */
3579 hypre_CSRBlockMatrixBlockCopyData(sum_block, &P_diag_data[jj*bnnz],
3580 -1.0, block_size);
3581 }
3582 else
3583 {
3584 /* hypre_printf(" Warning! singular diagonal block! Proc id %d row %d\n", my_id,i); */
3585 /* just make P_diag_data negative since diagonal is singular) */
3586 hypre_CSRBlockMatrixBlockCopyData(&P_diag_data[jj*bnnz], &P_diag_data[jj*bnnz],
3587 -1.0, block_size);
3588
3589 }
3590 }
3591
3592 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
3593 {
3594 /* P_offd_data[jj] /= -diagonal; */
3595
3596 /* do division - put in sum_block */
3597 hypre_CSRBlockMatrixBlockInvMult(diagonal_block, &P_offd_data[jj*bnnz],
3598 sum_block, block_size);
3599
3600 /* now copy to P_offd_data[jj] and make negative */
3601 hypre_CSRBlockMatrixBlockCopyData(sum_block, &P_offd_data[jj*bnnz],
3602 -1.0, block_size);
3603
3604
3605
3606 }
3607
3608 }
3609
3610 strong_f_marker--;
3611
3612 P_offd_i[i+1] = jj_counter_offd;
3613 }
3614 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
3615 hypre_TFree(P_marker_offd, HYPRE_MEMORY_HOST);
3616 }
3617
3618 /* Now create P - as a block matrix */
3619 P = hypre_ParCSRBlockMatrixCreate(comm, block_size,
3620 hypre_ParCSRBlockMatrixGlobalNumRows(A),
3621 total_global_cpts,
3622 hypre_ParCSRBlockMatrixColStarts(A),
3623 num_cpts_global,
3624 0,
3625 P_diag_i[n_fine],
3626 P_offd_i[n_fine]);
3627
3628 P_diag = hypre_ParCSRBlockMatrixDiag(P);
3629 hypre_CSRBlockMatrixData(P_diag) = P_diag_data;
3630 hypre_CSRBlockMatrixI(P_diag) = P_diag_i;
3631 hypre_CSRBlockMatrixJ(P_diag) = P_diag_j;
3632
3633 P_offd = hypre_ParCSRBlockMatrixOffd(P);
3634 hypre_CSRBlockMatrixData(P_offd) = P_offd_data;
3635 hypre_CSRBlockMatrixI(P_offd) = P_offd_i;
3636 hypre_CSRBlockMatrixJ(P_offd) = P_offd_j;
3637
3638 /* Compress P, removing coefficients smaller than trunc_factor * Max */
3639 if (trunc_factor != 0.0 || max_elmts > 0)
3640 {
3641 hypre_BoomerAMGBlockInterpTruncation(P, trunc_factor, max_elmts);
3642 P_diag_data = hypre_CSRBlockMatrixData(P_diag);
3643 P_diag_i = hypre_CSRBlockMatrixI(P_diag);
3644 P_diag_j = hypre_CSRBlockMatrixJ(P_diag);
3645 P_offd_data = hypre_CSRBlockMatrixData(P_offd);
3646 P_offd_i = hypre_CSRBlockMatrixI(P_offd);
3647 P_offd_j = hypre_CSRBlockMatrixJ(P_offd);
3648 P_diag_size = P_diag_i[n_fine];
3649 P_offd_size = P_offd_i[n_fine];
3650 }
3651
3652
3653 num_cols_P_offd = 0;
3654 if (P_offd_size)
3655 {
3656 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
3657
3658
3659 for (i=0; i < num_cols_A_offd; i++)
3660 P_marker[i] = 0;
3661
3662 num_cols_P_offd = 0;
3663 for (i=0; i < P_offd_size; i++)
3664 {
3665 index = P_offd_j[i];
3666 if (!P_marker[index])
3667 {
3668 num_cols_P_offd++;
3669 P_marker[index] = 1;
3670 }
3671 }
3672
3673 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
3674 tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
3675
3676 index = 0;
3677 for (i=0; i < num_cols_P_offd; i++)
3678 {
3679 while (P_marker[index]==0) index++;
3680 tmp_map_offd[i] = index++;
3681 }
3682
3683 for (i=0; i < P_offd_size; i++)
3684 P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
3685 P_offd_j[i],
3686 num_cols_P_offd);
3687 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
3688 }
3689
3690 for (i=0; i < n_fine; i++)
3691 if (CF_marker[i] == -3) CF_marker[i] = -1;
3692
3693 if (num_cols_P_offd)
3694 {
3695 hypre_ParCSRBlockMatrixColMapOffd(P) = col_map_offd_P;
3696 hypre_CSRBlockMatrixNumCols(P_offd) = num_cols_P_offd;
3697 }
3698
3699 /* use block version */
3700 hypre_GetCommPkgBlockRTFromCommPkgBlockA(P,A, tmp_map_offd, fine_to_coarse_offd);
3701
3702
3703 *P_ptr = P;
3704
3705
3706 hypre_TFree(zero_block, HYPRE_MEMORY_HOST);
3707 hypre_TFree(identity_block, HYPRE_MEMORY_HOST);
3708 hypre_TFree(diagonal_block, HYPRE_MEMORY_HOST);
3709 hypre_TFree(sum_block, HYPRE_MEMORY_HOST);
3710 hypre_TFree(distribute_block, HYPRE_MEMORY_HOST);
3711
3712 hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
3713 hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
3714 hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
3715 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
3716 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
3717 hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
3718 hypre_TFree(coarse_counter, HYPRE_MEMORY_HOST);
3719 hypre_TFree(jj_count, HYPRE_MEMORY_HOST);
3720 hypre_TFree(jj_count_offd, HYPRE_MEMORY_HOST);
3721
3722 if (num_procs > 1) hypre_CSRBlockMatrixDestroy(A_ext);
3723
3724 return(0);
3725
3726 }
3727
3728 /*---------------------------------------------------------------------------
3729 * hypre_BoomerAMGBlockBuildInterpRV2
3730
3731 Here we are modifying the block interp like in Ruge's elasticity paper as above
3732 (applied math comp '86), only instead of using just the diagonals of the
3733 scaling matrices (for the fine connections), we use a diagonal matrix
3734 whose diag entries are the row sumes (like suggested in Tanya Clees thesis
3735 for direct interp)
3736
3737 -again there is no differentiation for weak/strong f-connections
3738
3739 *--------------------------------------------------------------------------*/
3740
3741 HYPRE_Int
hypre_BoomerAMGBuildBlockInterpRV2(hypre_ParCSRBlockMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRBlockMatrix ** P_ptr)3742 hypre_BoomerAMGBuildBlockInterpRV2( hypre_ParCSRBlockMatrix *A,
3743 HYPRE_Int *CF_marker,
3744 hypre_ParCSRMatrix *S,
3745 HYPRE_BigInt *num_cpts_global,
3746 HYPRE_Int num_functions,
3747 HYPRE_Int *dof_func,
3748 HYPRE_Int debug_flag,
3749 HYPRE_Real trunc_factor,
3750 HYPRE_Int max_elmts,
3751 hypre_ParCSRBlockMatrix **P_ptr)
3752 {
3753
3754 MPI_Comm comm = hypre_ParCSRBlockMatrixComm(A);
3755 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
3756 hypre_ParCSRCommHandle *comm_handle;
3757
3758 hypre_CSRBlockMatrix *A_diag = hypre_ParCSRBlockMatrixDiag(A);
3759 HYPRE_Real *A_diag_data = hypre_CSRBlockMatrixData(A_diag);
3760 HYPRE_Int *A_diag_i = hypre_CSRBlockMatrixI(A_diag);
3761 HYPRE_Int *A_diag_j = hypre_CSRBlockMatrixJ(A_diag);
3762
3763 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(A_diag);
3764 HYPRE_Int bnnz = block_size*block_size;
3765
3766 hypre_CSRBlockMatrix *A_offd = hypre_ParCSRBlockMatrixOffd(A);
3767 HYPRE_Real *A_offd_data = hypre_CSRBlockMatrixData(A_offd);
3768 HYPRE_Int *A_offd_i = hypre_CSRBlockMatrixI(A_offd);
3769 HYPRE_Int *A_offd_j = hypre_CSRBlockMatrixJ(A_offd);
3770 HYPRE_Int num_cols_A_offd = hypre_CSRBlockMatrixNumCols(A_offd);
3771 HYPRE_BigInt *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(A);
3772
3773 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
3774 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
3775 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
3776
3777 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
3778 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
3779 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
3780
3781 hypre_ParCSRBlockMatrix *P;
3782 HYPRE_BigInt *col_map_offd_P;
3783 HYPRE_Int *tmp_map_offd = NULL;
3784
3785 HYPRE_Int *CF_marker_offd = NULL;
3786
3787 hypre_CSRBlockMatrix *A_ext;
3788
3789 HYPRE_Real *A_ext_data = NULL;
3790 HYPRE_Int *A_ext_i = NULL;
3791 HYPRE_BigInt *A_ext_j = NULL;
3792
3793 hypre_CSRBlockMatrix *P_diag;
3794 hypre_CSRBlockMatrix *P_offd;
3795
3796 HYPRE_Real *P_diag_data;
3797 HYPRE_Int *P_diag_i;
3798 HYPRE_Int *P_diag_j;
3799 HYPRE_Real *P_offd_data;
3800 HYPRE_Int *P_offd_i;
3801 HYPRE_Int *P_offd_j;
3802
3803 HYPRE_Int P_diag_size, P_offd_size;
3804
3805 HYPRE_Int *P_marker, *P_marker_offd = NULL;
3806
3807 HYPRE_Int jj_counter,jj_counter_offd;
3808 HYPRE_Int *jj_count, *jj_count_offd = NULL;
3809 HYPRE_Int jj_begin_row,jj_begin_row_offd;
3810 HYPRE_Int jj_end_row,jj_end_row_offd;
3811
3812 HYPRE_Int start_indexing = 0; /* start indexing for P_data at 0 */
3813
3814 HYPRE_Int n_fine = hypre_CSRBlockMatrixNumRows(A_diag);
3815
3816 HYPRE_Int strong_f_marker;
3817
3818 HYPRE_Int *fine_to_coarse;
3819 HYPRE_BigInt *fine_to_coarse_offd = NULL;
3820 HYPRE_Int *coarse_counter;
3821 HYPRE_Int coarse_shift;
3822 HYPRE_BigInt total_global_cpts;
3823 HYPRE_Int num_cols_P_offd;
3824 HYPRE_BigInt my_first_cpt;
3825
3826 HYPRE_Int bd;
3827
3828 HYPRE_Int i,i1,i2;
3829 HYPRE_Int j,jl,jj,jj1;
3830 HYPRE_Int kc;
3831 HYPRE_BigInt big_k;
3832 HYPRE_Int start;
3833
3834 HYPRE_Int c_num;
3835
3836 HYPRE_Int my_id;
3837 HYPRE_Int num_procs;
3838 HYPRE_Int num_threads;
3839 HYPRE_Int num_sends;
3840 HYPRE_Int index;
3841 HYPRE_Int ns, ne, size, rest;
3842 HYPRE_Int *int_buf_data = NULL;
3843 HYPRE_BigInt *big_buf_data = NULL;
3844
3845 HYPRE_BigInt col_1 = hypre_ParCSRBlockMatrixFirstRowIndex(A);
3846 HYPRE_Int local_numrows = hypre_CSRBlockMatrixNumRows(A_diag);
3847 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt)local_numrows;
3848
3849 HYPRE_Real wall_time; /* for debugging instrumentation */
3850
3851
3852 HYPRE_Real *identity_block;
3853 HYPRE_Real *zero_block;
3854 HYPRE_Real *diagonal_block;
3855 HYPRE_Real *sum_block;
3856 HYPRE_Real *distribute_block;
3857
3858 hypre_MPI_Comm_size(comm, &num_procs);
3859 hypre_MPI_Comm_rank(comm,&my_id);
3860 num_threads = hypre_NumThreads();
3861
3862 my_first_cpt = num_cpts_global[0];
3863 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
3864 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
3865
3866 /*-------------------------------------------------------------------
3867 * Get the CF_marker data for the off-processor columns
3868 *-------------------------------------------------------------------*/
3869
3870 if (debug_flag==4) wall_time = time_getWallclockSeconds();
3871
3872 CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
3873
3874
3875 if (!comm_pkg)
3876 {
3877 hypre_BlockMatvecCommPkgCreate(A);
3878 comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
3879 }
3880
3881 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
3882 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
3883 num_sends), HYPRE_MEMORY_HOST);
3884
3885 index = 0;
3886 for (i = 0; i < num_sends; i++)
3887 {
3888 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
3889 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
3890 {
3891 int_buf_data[index++]
3892 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
3893 }
3894
3895 }
3896
3897 /* we do not need the block version of comm handle - because
3898 CF_marker corresponds to the nodal matrix. This call populates
3899 CF_marker_offd */
3900 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
3901 CF_marker_offd);
3902
3903 hypre_ParCSRCommHandleDestroy(comm_handle);
3904
3905
3906 if (debug_flag==4)
3907 {
3908 wall_time = time_getWallclockSeconds() - wall_time;
3909 hypre_printf("Proc = %d Interp: Comm 1 CF_marker = %f\n",
3910 my_id, wall_time);
3911 fflush(NULL);
3912 }
3913
3914 /*----------------------------------------------------------------------
3915 * Get the ghost rows of A
3916 *---------------------------------------------------------------------*/
3917
3918 if (debug_flag==4) wall_time = time_getWallclockSeconds();
3919
3920 if (num_procs > 1)
3921 {
3922 A_ext = hypre_ParCSRBlockMatrixExtractBExt(A, A, 1);
3923 A_ext_i = hypre_CSRBlockMatrixI(A_ext);
3924 A_ext_j = hypre_CSRBlockMatrixBigJ(A_ext);
3925 A_ext_data = hypre_CSRBlockMatrixData(A_ext);
3926 }
3927
3928 index = 0;
3929 for (i=0; i < num_cols_A_offd; i++)
3930 {
3931 for (j=A_ext_i[i]; j < A_ext_i[i+1]; j++)
3932 {
3933 big_k = A_ext_j[j];
3934 if (big_k >= col_1 && big_k < col_n)
3935 {
3936 A_ext_j[index] = big_k - col_1;
3937 /* for the data field we must get all of the blocbig_k data */
3938 for (bd = 0; bd < bnnz; bd++)
3939 {
3940 A_ext_data[index*bnnz + bd] = A_ext_data[j*bnnz + bd];
3941 }
3942 index++;
3943 }
3944 else
3945 {
3946 kc = hypre_BigBinarySearch(col_map_offd, big_k, num_cols_A_offd);
3947 if (kc > -1)
3948 {
3949 A_ext_j[index] = (HYPRE_BigInt)(-kc-1);
3950 for (bd = 0; bd < bnnz; bd++)
3951 {
3952 A_ext_data[index*bnnz + bd] = A_ext_data[j*bnnz + bd];
3953 }
3954 index++;
3955 }
3956 }
3957 }
3958 A_ext_i[i] = index;
3959 }
3960 for (i = num_cols_A_offd; i > 0; i--)
3961 A_ext_i[i] = A_ext_i[i-1];
3962 if (num_procs > 1) A_ext_i[0] = 0;
3963
3964 if (debug_flag==4)
3965 {
3966 wall_time = time_getWallclockSeconds() - wall_time;
3967 hypre_printf("Proc = %d Interp: Comm 2 Get A_ext = %f\n",
3968 my_id, wall_time);
3969 fflush(NULL);
3970 }
3971
3972
3973 /*-----------------------------------------------------------------------
3974 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
3975 *-----------------------------------------------------------------------*/
3976
3977 /*-----------------------------------------------------------------------
3978 * Intialize counters and allocate mapping vector.
3979 *-----------------------------------------------------------------------*/
3980
3981 coarse_counter = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
3982 jj_count = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
3983 jj_count_offd = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
3984
3985 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
3986
3987 for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1;
3988
3989 jj_counter = start_indexing;
3990 jj_counter_offd = start_indexing;
3991
3992 /*-----------------------------------------------------------------------
3993 * Loop over fine grid.
3994 *-----------------------------------------------------------------------*/
3995
3996
3997 for (j = 0; j < num_threads; j++)
3998 {
3999 size = n_fine/num_threads;
4000 rest = n_fine - size*num_threads;
4001 if (j < rest)
4002 {
4003 ns = j*size+j;
4004 ne = (j+1)*size+j+1;
4005 }
4006 else
4007 {
4008 ns = j*size+rest;
4009 ne = (j+1)*size+rest;
4010 }
4011
4012
4013 /* loop over the fine grid points */
4014 for (i = ns; i < ne; i++)
4015 {
4016
4017 /*--------------------------------------------------------------------
4018 * If i is a C-point, interpolation is the identity. Also set up
4019 * mapping vector (fine_to_coarse is the mapping vector).
4020 *--------------------------------------------------------------------*/
4021
4022 if (CF_marker[i] >= 0)
4023 {
4024 jj_count[j]++;
4025 fine_to_coarse[i] = coarse_counter[j];
4026 coarse_counter[j]++;
4027 }
4028
4029 /*--------------------------------------------------------------------
4030 * If i is an F-point, interpolation is from the C-points that
4031 * strongly influence i.
4032 *--------------------------------------------------------------------*/
4033
4034 else
4035 {
4036 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
4037 {
4038 i1 = S_diag_j[jj];
4039 if (CF_marker[i1] >= 0)
4040 {
4041 jj_count[j]++;
4042 }
4043 }
4044
4045 if (num_procs > 1)
4046 {
4047 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
4048 {
4049 i1 = S_offd_j[jj];
4050 if (CF_marker_offd[i1] >= 0)
4051 {
4052 jj_count_offd[j]++;
4053 }
4054 }
4055 }
4056 }
4057 }
4058 }
4059
4060 /*-----------------------------------------------------------------------
4061 * Allocate arrays.
4062 *-----------------------------------------------------------------------*/
4063
4064 for (i=0; i < num_threads-1; i++)
4065 {
4066 coarse_counter[i+1] += coarse_counter[i];
4067 jj_count[i+1] += jj_count[i];
4068 jj_count_offd[i+1] += jj_count_offd[i];
4069 }
4070 i = num_threads-1;
4071 jj_counter = jj_count[i];
4072 jj_counter_offd = jj_count_offd[i];
4073
4074 P_diag_size = jj_counter;
4075
4076 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
4077 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
4078 /* we need to include the size of the blocks in the data size */
4079 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size*bnnz, HYPRE_MEMORY_HOST);
4080
4081 P_diag_i[n_fine] = jj_counter;
4082
4083
4084 P_offd_size = jj_counter_offd;
4085
4086 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
4087 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
4088 /* we need to include the size of the blocks in the data size */
4089 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size*bnnz, HYPRE_MEMORY_HOST);
4090
4091 /*-----------------------------------------------------------------------
4092 * Intialize some stuff.
4093 *-----------------------------------------------------------------------*/
4094
4095 jj_counter = start_indexing;
4096 jj_counter_offd = start_indexing;
4097
4098 if (debug_flag==4)
4099 {
4100 wall_time = time_getWallclockSeconds() - wall_time;
4101 hypre_printf("Proc = %d Interp: Internal work 1 = %f\n",
4102 my_id, wall_time);
4103 fflush(NULL);
4104 }
4105
4106 /* we need a block identity and a block of zeros*/
4107 identity_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
4108 zero_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
4109
4110 for(i = 0; i < block_size; i++)
4111 {
4112 identity_block[i*block_size + i] = 1.0;
4113 }
4114
4115
4116 /* we also need a block to keep track of the diagonal values and a sum */
4117 diagonal_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
4118 sum_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
4119 distribute_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
4120
4121 /*-----------------------------------------------------------------------
4122 * Send and receive fine_to_coarse info.
4123 *-----------------------------------------------------------------------*/
4124
4125 if (debug_flag==4) wall_time = time_getWallclockSeconds();
4126
4127 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_A_offd, HYPRE_MEMORY_HOST);
4128 big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
4129 num_sends), HYPRE_MEMORY_HOST);
4130
4131 for (j = 0; j < num_threads; j++)
4132 {
4133 coarse_shift = 0;
4134 if (j > 0) coarse_shift = coarse_counter[j-1];
4135 size = n_fine/num_threads;
4136 rest = n_fine - size*num_threads;
4137 if (j < rest)
4138 {
4139 ns = j*size+j;
4140 ne = (j+1)*size+j+1;
4141 }
4142 else
4143 {
4144 ns = j*size+rest;
4145 ne = (j+1)*size+rest;
4146 }
4147 for (i = ns; i < ne; i++)
4148 fine_to_coarse[i] += coarse_shift;
4149 }
4150 index = 0;
4151 for (i = 0; i < num_sends; i++)
4152 {
4153 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
4154 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
4155 big_buf_data[index++] = my_first_cpt
4156 + fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
4157 }
4158
4159 /* again, we do not need to use the block version of comm handle since
4160 the fine to coarse mapping is size of the nodes */
4161
4162 comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data,
4163 fine_to_coarse_offd);
4164
4165 hypre_ParCSRCommHandleDestroy(comm_handle);
4166
4167 if (debug_flag==4)
4168 {
4169 wall_time = time_getWallclockSeconds() - wall_time;
4170 hypre_printf("Proc = %d Interp: Comm 4 FineToCoarse = %f\n",
4171 my_id, wall_time);
4172 fflush(NULL);
4173 }
4174
4175 if (debug_flag==4) wall_time = time_getWallclockSeconds();
4176
4177
4178 /*-----------------------------------------------------------------------
4179 * Loop over fine grid points.
4180 *-----------------------------------------------------------------------*/
4181
4182
4183 for (jl = 0; jl < num_threads; jl++)
4184 {
4185 size = n_fine/num_threads;
4186 rest = n_fine - size*num_threads;
4187 if (jl < rest)
4188 {
4189 ns = jl*size+jl;
4190 ne = (jl+1)*size+jl+1;
4191 }
4192 else
4193 {
4194 ns = jl*size+rest;
4195 ne = (jl+1)*size+rest;
4196 }
4197 jj_counter = 0;
4198 if (jl > 0) jj_counter = jj_count[jl-1];
4199 jj_counter_offd = 0;
4200 if (jl > 0) jj_counter_offd = jj_count_offd[jl-1];
4201
4202 P_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
4203 P_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
4204
4205 for (i = 0; i < n_fine; i++)
4206 {
4207 P_marker[i] = -1;
4208 }
4209 for (i = 0; i < num_cols_A_offd; i++)
4210 {
4211 P_marker_offd[i] = -1;
4212 }
4213 strong_f_marker = -2;
4214
4215 for (i = ns; i < ne; i++)
4216 {
4217
4218 /*--------------------------------------------------------------------
4219 * If i is a c-point, interpolation is the identity.
4220 *--------------------------------------------------------------------*/
4221
4222 if (CF_marker[i] >= 0)
4223 {
4224 P_diag_i[i] = jj_counter;
4225 P_diag_j[jj_counter] = fine_to_coarse[i];
4226 /* P_diag_data[jj_counter] = one; */
4227 hypre_CSRBlockMatrixBlockCopyData(identity_block,
4228 &P_diag_data[jj_counter*bnnz],
4229 1.0, block_size);
4230 jj_counter++;
4231 }
4232
4233 /*--------------------------------------------------------------------
4234 * If i is an F-point, build interpolation.
4235 *--------------------------------------------------------------------*/
4236
4237 else
4238 {
4239 /* Diagonal part of P */
4240 P_diag_i[i] = jj_counter;
4241 jj_begin_row = jj_counter;
4242
4243 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
4244 {
4245 i1 = S_diag_j[jj];
4246
4247 /*--------------------------------------------------------------
4248 * If neighbor i1 is a C-point, set column number in P_diag_j
4249 * and initialize interpolation weight to zero.
4250 *--------------------------------------------------------------*/
4251
4252 if (CF_marker[i1] >= 0)
4253 {
4254 P_marker[i1] = jj_counter;
4255 P_diag_j[jj_counter] = fine_to_coarse[i1];
4256 /* P_diag_data[jj_counter] = zero; */
4257 hypre_CSRBlockMatrixBlockCopyData(zero_block,
4258 &P_diag_data[jj_counter*bnnz],
4259 1.0, block_size);
4260 jj_counter++;
4261 }
4262
4263 /*--------------------------------------------------------------
4264 * If neighbor i1 is an F-point, mark it as a strong F-point
4265 * whose connection needs to be distributed.
4266 *--------------------------------------------------------------*/
4267
4268 else if (CF_marker[i1] != -3)
4269 {
4270 P_marker[i1] = strong_f_marker;
4271 }
4272 }
4273 jj_end_row = jj_counter;
4274
4275 /* Off-Diagonal part of P */
4276 P_offd_i[i] = jj_counter_offd;
4277 jj_begin_row_offd = jj_counter_offd;
4278
4279
4280 if (num_procs > 1)
4281 {
4282 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
4283 {
4284 i1 = S_offd_j[jj];
4285
4286 /*-----------------------------------------------------------
4287 * If neighbor i1 is a C-point, set column number in P_offd_j
4288 * and initialize interpolation weight to zero.
4289 *-----------------------------------------------------------*/
4290
4291 if (CF_marker_offd[i1] >= 0)
4292 {
4293 P_marker_offd[i1] = jj_counter_offd;
4294 P_offd_j[jj_counter_offd] = i1;
4295 /* P_offd_data[jj_counter_offd] = zero; */
4296 hypre_CSRBlockMatrixBlockCopyData(zero_block,
4297 &P_offd_data[jj_counter_offd*bnnz],
4298 1.0, block_size);
4299
4300 jj_counter_offd++;
4301 }
4302
4303 /*-----------------------------------------------------------
4304 * If neighbor i1 is an F-point, mark it as a strong F-point
4305 * whose connection needs to be distributed.
4306 *-----------------------------------------------------------*/
4307
4308 else if (CF_marker_offd[i1] != -3)
4309 {
4310 P_marker_offd[i1] = strong_f_marker;
4311 }
4312 }
4313 }
4314
4315 jj_end_row_offd = jj_counter_offd;
4316
4317
4318 /* get the diagonal block */
4319 /* diagonal = A_diag_data[A_diag_i[i]]; */
4320 hypre_CSRBlockMatrixBlockCopyData(&A_diag_data[A_diag_i[i]*bnnz], diagonal_block,
4321 1.0, block_size);
4322
4323
4324
4325 /* Here we go through the neighborhood of this grid point */
4326
4327 /* Loop over ith row of A. First, the diagonal part of A */
4328
4329 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
4330 {
4331 i1 = A_diag_j[jj];
4332
4333 /*--------------------------------------------------------------
4334 * Case 1: neighbor i1 is a C-point and strongly influences i,
4335 * accumulate a_{i,i1} into the interpolation weight.
4336 *--------------------------------------------------------------*/
4337
4338 if (P_marker[i1] >= jj_begin_row)
4339 {
4340 /* P_diag_data[P_marker[i1]] += A_diag_data[jj]; */
4341 hypre_CSRBlockMatrixBlockAddAccumulate(&A_diag_data[jj*bnnz],
4342 &P_diag_data[P_marker[i1]*bnnz],
4343 block_size);
4344
4345 }
4346
4347 /*--------------------------------------------------------------
4348 * Case 2: neighbor i1 is an F-point (MAY or MAY NOT strongly influences i),
4349 * distribute a_{i,i1} to C-points that strongly infuence i.
4350 * Note: currently no distribution to the diagonal in this case.
4351 *--------------------------------------------------------------*/
4352
4353 else if (P_marker[i1] == strong_f_marker || CF_marker[i1] != -3)
4354 {
4355 /* initialize sum to zero */
4356 /* sum = zero; */
4357 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block, 1.0,
4358 block_size);
4359
4360 /*-----------------------------------------------------------
4361 * Loop over row of A for point i1 and calculate the sum
4362 * of the connections to c-points that strongly influence i.-
4363
4364 HERE WE ONLY WANT THE DIAG CONTIRBUTIONS (intra-unknown)
4365
4366 *-----------------------------------------------------------*/
4367
4368 /* Diagonal block part of row i1 */
4369 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
4370 {
4371 i2 = A_diag_j[jj1];
4372 if (P_marker[i2] >= jj_begin_row)
4373 {
4374 /* add diag data to sum */
4375 /* sum += A_diag_data[jj1]; */
4376 hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_diag_data[jj1*bnnz],
4377 sum_block, block_size);
4378 }
4379 }
4380
4381 /* Off-Diagonal block part of row i1 */
4382 if (num_procs > 1)
4383 {
4384 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
4385 {
4386 i2 = A_offd_j[jj1];
4387 if (P_marker_offd[i2] >= jj_begin_row_offd )
4388 {
4389 /* add off diag data to sum */
4390 /*sum += A_offd_data[jj1];*/
4391 hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_offd_data[jj1*bnnz],
4392 sum_block, block_size);
4393 }
4394 }
4395 }
4396 /* check whether sum_block is singular (NOW SUM IS A DIAG MATRIX WHOSE
4397 ENTRIES ARE THE ROW SUMS)*/
4398 /* distribute = A_diag_data[jj] / sum; (if a diag element is 0 then
4399 that col is scaled by 1 instead of 1/diag) - doesn'treturn 0*/
4400 if (hypre_CSRBlockMatrixBlockInvMultDiag3(&A_diag_data[jj*bnnz], sum_block,
4401 distribute_block, block_size) == 0)
4402 {
4403
4404 /*-----------------------------------------------------------
4405 * Loop over row of A for point i1 and do the distribution.-
4406 (here we we use row-sums for the nodes recv. the distribution)
4407 *-----------------------------------------------------------*/
4408
4409 /* Diagonal block part of row i1 */
4410 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
4411 {
4412 i2 = A_diag_j[jj1];
4413 if (P_marker[i2] >= jj_begin_row )
4414 {
4415
4416 /* P_diag_data[P_marker[i2]]
4417 += distribute * A_diag_data[jj1];*/
4418
4419 /* multiply - result in sum_block */
4420 hypre_CSRBlockMatrixBlockMultAddDiag3(distribute_block,
4421 &A_diag_data[jj1*bnnz], 0.0,
4422 sum_block, block_size);
4423
4424 /* add result to p_diag_data */
4425 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
4426 &P_diag_data[P_marker[i2]*bnnz],
4427 block_size);
4428 }
4429 }
4430
4431 /* Off-Diagonal block part of row i1 */
4432 if (num_procs > 1)
4433 {
4434 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
4435 {
4436 i2 = A_offd_j[jj1];
4437 if (P_marker_offd[i2] >= jj_begin_row_offd)
4438 {
4439 /* P_offd_data[P_marker_offd[i2]]
4440 += distribute * A_offd_data[jj1]; */
4441
4442 /* multiply - result in sum_block */
4443 hypre_CSRBlockMatrixBlockMultAddDiag3(distribute_block,
4444 &A_offd_data[jj1*bnnz], 0.0,
4445 sum_block, block_size);
4446
4447
4448 /* add result to p_offd_data */
4449 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
4450 &P_offd_data[P_marker_offd[i2]*bnnz],
4451 block_size);
4452 }
4453 }
4454 }
4455 }
4456 }
4457 }
4458
4459
4460 /*----------------------------------------------------------------
4461 * Still looping over ith row of A. Next, loop over the
4462 * off-diagonal part of A
4463 *---------------------------------------------------------------*/
4464
4465 if (num_procs > 1)
4466 {
4467 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
4468 {
4469 i1 = A_offd_j[jj];
4470
4471 /*--------------------------------------------------------------
4472 * Case 1: neighbor i1 is a C-point and strongly influences i,
4473 * accumulate a_{i,i1} into the interpolation weight.
4474 *--------------------------------------------------------------*/
4475
4476 if (P_marker_offd[i1] >= jj_begin_row_offd)
4477 {
4478 /* P_offd_data[P_marker_offd[i1]] += A_offd_data[jj]; */
4479 hypre_CSRBlockMatrixBlockAddAccumulate( &A_offd_data[jj*bnnz],
4480 &P_offd_data[P_marker_offd[i1]*bnnz],
4481 block_size);
4482 }
4483
4484 /*------------------------------------------------------------
4485 * Case 2: neighbor i1 is an F-point and (MAY or MAY NOT strongly influences i),
4486 * distribute a_{i,i1} to C-points that strongly infuence i.
4487 * Note: currently no distribution to the diagonal in this case.
4488 *-----------------------------------------------------------*/
4489
4490 else if (P_marker_offd[i1] == strong_f_marker || CF_marker[i1] != -3 )
4491 {
4492
4493 /* initialize sum to zero */
4494 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block,
4495 1.0, block_size);
4496
4497 /*---------------------------------------------------------
4498 * Loop over row of A_ext for point i1 and calculate the sum
4499 * of the connections to c-points that strongly influence i.
4500
4501
4502 HERE WE ONLY WANT THE DIAG CONTIRBUTIONS (intra-unknown)
4503
4504 *---------------------------------------------------------*/
4505
4506 /* find row number */
4507 c_num = A_offd_j[jj];
4508
4509 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
4510 {
4511 i2 = (HYPRE_Int)A_ext_j[jj1];
4512
4513 if (i2 > -1)
4514 {
4515 /* in the diagonal block */
4516 if (P_marker[i2] >= jj_begin_row)
4517 {
4518 /* sum += A_ext_data[jj1]; */
4519 hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_ext_data[jj1*bnnz],
4520 sum_block, block_size);
4521 }
4522 }
4523 else
4524 {
4525 /* in the off_diagonal block */
4526 if (P_marker_offd[-i2-1] >= jj_begin_row_offd)
4527 {
4528 /* sum += A_ext_data[jj1]; */
4529 hypre_CSRBlockMatrixBlockAddAccumulateDiag(&A_ext_data[jj1*bnnz],
4530 sum_block, block_size);
4531
4532 }
4533 }
4534 }
4535
4536 /* check whether sum_block is singular */
4537
4538
4539 /* distribute = A_offd_data[jj] / sum; */
4540 /* here we want: A_offd_data * sum^(-1) - use the row sums as the
4541 diag for sum*/
4542 if (hypre_CSRBlockMatrixBlockInvMultDiag3(&A_offd_data[jj*bnnz], sum_block,
4543 distribute_block, block_size) == 0)
4544 {
4545
4546 /*---------------------------------------------------------
4547 * Loop over row of A_ext for point i1 and do
4548 * the distribution.
4549
4550
4551 *--------------------------------------------------------*/
4552
4553 /* Diagonal block part of row i1 */
4554
4555 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
4556 {
4557 i2 = (HYPRE_Int)A_ext_j[jj1];
4558
4559 if (i2 > -1) /* in the diagonal block */
4560 {
4561 if (P_marker[i2] >= jj_begin_row)
4562 {
4563 /* P_diag_data[P_marker[i2]]
4564 += distribute * A_ext_data[jj1]; */
4565
4566 /* multiply - result in sum_block */
4567 hypre_CSRBlockMatrixBlockMultAddDiag3(distribute_block,
4568 &A_ext_data[jj1*bnnz], 0.0,
4569 sum_block, block_size);
4570
4571
4572 /* add result to p_diag_data */
4573 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
4574 &P_diag_data[P_marker[i2]*bnnz],
4575 block_size);
4576 }
4577 }
4578 else
4579 {
4580 /* in the off_diagonal block */
4581 if (P_marker_offd[-i2-1] >= jj_begin_row_offd)
4582
4583 /*P_offd_data[P_marker_offd[-i2-1]]
4584 += distribute * A_ext_data[jj1];*/
4585 {
4586
4587 /* multiply - result in sum_block */
4588 hypre_CSRBlockMatrixBlockMultAddDiag3(distribute_block,
4589 &A_ext_data[jj1*bnnz], 0.0,
4590 sum_block, block_size);
4591
4592 /* add result to p_offd_data */
4593 hypre_CSRBlockMatrixBlockAddAccumulate(sum_block,
4594 &P_offd_data[P_marker_offd[-i2-1]*bnnz],
4595 block_size);
4596 }
4597 }
4598 }
4599 }
4600 }
4601 }
4602 }
4603
4604 /*-----------------------------------------------------------------
4605 * Set interpolation weight by dividing by the diagonal.
4606 *-----------------------------------------------------------------*/
4607
4608 for (jj = jj_begin_row; jj < jj_end_row; jj++)
4609 {
4610
4611 /* P_diag_data[jj] /= -diagonal; */
4612
4613 /* want diagonal^(-1)*P_diag_data */
4614 /* do division - put in sum_block */
4615 if ( hypre_CSRBlockMatrixBlockInvMult(diagonal_block, &P_diag_data[jj*bnnz],
4616 sum_block, block_size) == 0)
4617 {
4618 /* now copy to P_diag_data[jj] and make negative */
4619 hypre_CSRBlockMatrixBlockCopyData(sum_block, &P_diag_data[jj*bnnz],
4620 -1.0, block_size);
4621 }
4622 else
4623 {
4624 /* hypre_printf(" Warning! singular diagonal block! Proc id %d row %d\n", my_id,i); */
4625 /* just make P_diag_data negative since diagonal is singular) */
4626 hypre_CSRBlockMatrixBlockCopyData(&P_diag_data[jj*bnnz], &P_diag_data[jj*bnnz],
4627 -1.0, block_size);
4628 }
4629 }
4630
4631 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
4632 {
4633 /* P_offd_data[jj] /= -diagonal; */
4634
4635 /* do division - put in sum_block */
4636 hypre_CSRBlockMatrixBlockInvMult(diagonal_block, &P_offd_data[jj*bnnz],
4637 sum_block, block_size);
4638
4639 /* now copy to P_offd_data[jj] and make negative */
4640 hypre_CSRBlockMatrixBlockCopyData(sum_block, &P_offd_data[jj*bnnz],
4641 -1.0, block_size);
4642 }
4643 }
4644
4645 strong_f_marker--;
4646
4647 P_offd_i[i+1] = jj_counter_offd;
4648 }
4649 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
4650 hypre_TFree(P_marker_offd, HYPRE_MEMORY_HOST);
4651 }
4652
4653 /* Now create P - as a block matrix */
4654 P = hypre_ParCSRBlockMatrixCreate(comm, block_size,
4655 hypre_ParCSRBlockMatrixGlobalNumRows(A),
4656 total_global_cpts,
4657 hypre_ParCSRBlockMatrixColStarts(A),
4658 num_cpts_global,
4659 0,
4660 P_diag_i[n_fine],
4661 P_offd_i[n_fine]);
4662
4663
4664 P_diag = hypre_ParCSRBlockMatrixDiag(P);
4665 hypre_CSRBlockMatrixData(P_diag) = P_diag_data;
4666 hypre_CSRBlockMatrixI(P_diag) = P_diag_i;
4667 hypre_CSRBlockMatrixJ(P_diag) = P_diag_j;
4668
4669 P_offd = hypre_ParCSRBlockMatrixOffd(P);
4670 hypre_CSRBlockMatrixData(P_offd) = P_offd_data;
4671 hypre_CSRBlockMatrixI(P_offd) = P_offd_i;
4672 hypre_CSRBlockMatrixJ(P_offd) = P_offd_j;
4673
4674 /* Compress P, removing coefficients smaller than trunc_factor * Max */
4675 if (trunc_factor != 0.0 || max_elmts > 0)
4676 {
4677 hypre_BoomerAMGBlockInterpTruncation(P, trunc_factor, max_elmts);
4678 P_diag_data = hypre_CSRBlockMatrixData(P_diag);
4679 P_diag_i = hypre_CSRBlockMatrixI(P_diag);
4680 P_diag_j = hypre_CSRBlockMatrixJ(P_diag);
4681 P_offd_data = hypre_CSRBlockMatrixData(P_offd);
4682 P_offd_i = hypre_CSRBlockMatrixI(P_offd);
4683 P_offd_j = hypre_CSRBlockMatrixJ(P_offd);
4684 P_diag_size = P_diag_i[n_fine];
4685 P_offd_size = P_offd_i[n_fine];
4686 }
4687
4688 num_cols_P_offd = 0;
4689 if (P_offd_size)
4690 {
4691 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
4692
4693 for (i=0; i < num_cols_A_offd; i++)
4694 P_marker[i] = 0;
4695
4696 num_cols_P_offd = 0;
4697 for (i=0; i < P_offd_size; i++)
4698 {
4699 index = P_offd_j[i];
4700 if (!P_marker[index])
4701 {
4702 num_cols_P_offd++;
4703 P_marker[index] = 1;
4704 }
4705 }
4706
4707 tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
4708 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
4709
4710 index = 0;
4711 for (i=0; i < num_cols_P_offd; i++)
4712 {
4713 while (P_marker[index]==0) index++;
4714 tmp_map_offd[i] = index++;
4715 }
4716
4717 for (i=0; i < P_offd_size; i++)
4718 P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
4719 P_offd_j[i],
4720 num_cols_P_offd);
4721 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
4722 }
4723
4724 for (i=0; i < n_fine; i++)
4725 if (CF_marker[i] == -3) CF_marker[i] = -1;
4726
4727 if (num_cols_P_offd)
4728 {
4729 hypre_ParCSRBlockMatrixColMapOffd(P) = col_map_offd_P;
4730 hypre_CSRBlockMatrixNumCols(P_offd) = num_cols_P_offd;
4731 }
4732
4733 /* use block version */
4734 hypre_GetCommPkgBlockRTFromCommPkgBlockA(P,A, tmp_map_offd,fine_to_coarse_offd);
4735
4736
4737 *P_ptr = P;
4738
4739
4740 hypre_TFree(zero_block, HYPRE_MEMORY_HOST);
4741 hypre_TFree(identity_block, HYPRE_MEMORY_HOST);
4742 hypre_TFree(diagonal_block, HYPRE_MEMORY_HOST);
4743 hypre_TFree(sum_block, HYPRE_MEMORY_HOST);
4744 hypre_TFree(distribute_block, HYPRE_MEMORY_HOST);
4745
4746 hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
4747 hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
4748 hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
4749 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
4750 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
4751 hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
4752 hypre_TFree(coarse_counter, HYPRE_MEMORY_HOST);
4753 hypre_TFree(jj_count, HYPRE_MEMORY_HOST);
4754 hypre_TFree(jj_count_offd, HYPRE_MEMORY_HOST);
4755
4756 if (num_procs > 1) hypre_CSRBlockMatrixDestroy(A_ext);
4757
4758 return(0);
4759
4760 }
4761
4762 /*---------------------------------------------------------------------------
4763 * hypre_BoomerAMGBuildBlockDirInterp
4764 *--------------------------------------------------------------------------*/
4765
4766 HYPRE_Int
hypre_BoomerAMGBuildBlockDirInterp(hypre_ParCSRBlockMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRBlockMatrix ** P_ptr)4767 hypre_BoomerAMGBuildBlockDirInterp( hypre_ParCSRBlockMatrix *A,
4768 HYPRE_Int *CF_marker,
4769 hypre_ParCSRMatrix *S,
4770 HYPRE_BigInt *num_cpts_global,
4771 HYPRE_Int num_functions,
4772 HYPRE_Int *dof_func,
4773 HYPRE_Int debug_flag,
4774 HYPRE_Real trunc_factor,
4775 HYPRE_Int max_elmts,
4776 hypre_ParCSRBlockMatrix **P_ptr)
4777 {
4778
4779 MPI_Comm comm = hypre_ParCSRBlockMatrixComm(A);
4780 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
4781 hypre_ParCSRCommHandle *comm_handle;
4782
4783 hypre_CSRBlockMatrix *A_diag = hypre_ParCSRBlockMatrixDiag(A);
4784 HYPRE_Real *A_diag_data = hypre_CSRBlockMatrixData(A_diag);
4785 HYPRE_Int *A_diag_i = hypre_CSRBlockMatrixI(A_diag);
4786 HYPRE_Int *A_diag_j = hypre_CSRBlockMatrixJ(A_diag);
4787
4788 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(A_diag);
4789 HYPRE_Int bnnz = block_size*block_size;
4790
4791
4792 hypre_CSRBlockMatrix *A_offd = hypre_ParCSRBlockMatrixOffd(A);
4793 HYPRE_Real *A_offd_data = hypre_CSRBlockMatrixData(A_offd);
4794 HYPRE_Int *A_offd_i = hypre_CSRBlockMatrixI(A_offd);
4795 HYPRE_Int *A_offd_j = hypre_CSRBlockMatrixJ(A_offd);
4796 HYPRE_Int num_cols_A_offd = hypre_CSRBlockMatrixNumCols(A_offd);
4797
4798 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
4799 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
4800 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
4801
4802 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
4803 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
4804 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
4805
4806 hypre_ParCSRBlockMatrix *P;
4807 HYPRE_BigInt *col_map_offd_P;
4808 HYPRE_Int *tmp_map_offd = NULL;
4809
4810 HYPRE_Int *CF_marker_offd = NULL;
4811 HYPRE_Int *dof_func_offd = NULL;
4812
4813 hypre_CSRBlockMatrix *P_diag;
4814 hypre_CSRBlockMatrix *P_offd;
4815
4816 HYPRE_Real *P_diag_data;
4817 HYPRE_Int *P_diag_i;
4818 HYPRE_Int *P_diag_j;
4819 HYPRE_Real *P_offd_data;
4820 HYPRE_Int *P_offd_i;
4821 HYPRE_Int *P_offd_j;
4822
4823 HYPRE_Int P_diag_size, P_offd_size;
4824
4825 HYPRE_Int *P_marker, *P_marker_offd = NULL;
4826
4827 HYPRE_Int jj_counter,jj_counter_offd;
4828 HYPRE_Int *jj_count, *jj_count_offd = NULL;
4829 HYPRE_Int jj_begin_row,jj_begin_row_offd;
4830 HYPRE_Int jj_end_row,jj_end_row_offd;
4831
4832 HYPRE_Int start_indexing = 0; /* start indexing for P_data at 0 */
4833
4834 HYPRE_Int n_fine = hypre_CSRBlockMatrixNumRows(A_diag);
4835
4836 HYPRE_Int *fine_to_coarse;
4837 HYPRE_BigInt *fine_to_coarse_offd = NULL;
4838 HYPRE_Int *coarse_counter;
4839 HYPRE_Int coarse_shift;
4840 HYPRE_BigInt total_global_cpts;
4841 HYPRE_Int num_cols_P_offd;
4842 HYPRE_BigInt my_first_cpt;
4843
4844 HYPRE_Int i,i1;
4845 HYPRE_Int j,jl,jj;
4846 HYPRE_Int start;
4847
4848 HYPRE_Int my_id;
4849 HYPRE_Int num_procs;
4850 HYPRE_Int num_threads;
4851 HYPRE_Int num_sends;
4852 HYPRE_Int index;
4853 HYPRE_Int ns, ne, size, rest;
4854 HYPRE_Int *int_buf_data = NULL;
4855 HYPRE_BigInt *big_buf_data = NULL;
4856
4857 HYPRE_Real wall_time; /* for debugging instrumentation */
4858
4859 HYPRE_Real *identity_block;
4860 HYPRE_Real *zero_block;
4861 HYPRE_Real *diagonal_block;
4862 HYPRE_Real *sum_block_p;
4863 HYPRE_Real *sum_block_n;
4864 HYPRE_Real *r_block;
4865
4866 hypre_MPI_Comm_size(comm, &num_procs);
4867 hypre_MPI_Comm_rank(comm,&my_id);
4868 num_threads = hypre_NumThreads();
4869
4870 my_first_cpt = num_cpts_global[0];
4871 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
4872 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
4873
4874 /*-------------------------------------------------------------------
4875 * Get the CF_marker data for the off-processor columns
4876 *-------------------------------------------------------------------*/
4877
4878 if (debug_flag==4) wall_time = time_getWallclockSeconds();
4879
4880 CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
4881
4882
4883 if (!comm_pkg)
4884 {
4885 hypre_BlockMatvecCommPkgCreate(A);
4886 comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
4887 }
4888
4889 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
4890 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
4891 num_sends), HYPRE_MEMORY_HOST);
4892
4893 index = 0;
4894 for (i = 0; i < num_sends; i++)
4895 {
4896 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
4897 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
4898 int_buf_data[index++]
4899 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
4900 }
4901
4902 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
4903 CF_marker_offd);
4904
4905 hypre_ParCSRCommHandleDestroy(comm_handle);
4906
4907 if (debug_flag==4)
4908 {
4909 wall_time = time_getWallclockSeconds() - wall_time;
4910 hypre_printf("Proc = %d Interp: Comm 1 CF_marker = %f\n",
4911 my_id, wall_time);
4912 fflush(NULL);
4913 }
4914
4915 /*-----------------------------------------------------------------------
4916 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
4917 *-----------------------------------------------------------------------*/
4918
4919 /*-----------------------------------------------------------------------
4920 * Intialize counters and allocate mapping vector.
4921 *-----------------------------------------------------------------------*/
4922
4923 coarse_counter = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
4924 jj_count = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
4925 jj_count_offd = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
4926
4927 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
4928
4929 for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1;
4930
4931 jj_counter = start_indexing;
4932 jj_counter_offd = start_indexing;
4933
4934 /*-----------------------------------------------------------------------
4935 * Loop over fine grid.
4936 *-----------------------------------------------------------------------*/
4937
4938
4939 for (j = 0; j < num_threads; j++)
4940 {
4941 size = n_fine/num_threads;
4942 rest = n_fine - size*num_threads;
4943 if (j < rest)
4944 {
4945 ns = j*size+j;
4946 ne = (j+1)*size+j+1;
4947 }
4948 else
4949 {
4950 ns = j*size+rest;
4951 ne = (j+1)*size+rest;
4952 }
4953
4954
4955 /* loop over the fine grid points */
4956 for (i = ns; i < ne; i++)
4957 {
4958
4959 /*--------------------------------------------------------------------
4960 * If i is a C-point, interpolation is the identity. Also set up
4961 * mapping vector (fine_to_coarse is the mapping vector).
4962 *--------------------------------------------------------------------*/
4963
4964 if (CF_marker[i] >= 0)
4965 {
4966 jj_count[j]++;
4967 fine_to_coarse[i] = coarse_counter[j];
4968 coarse_counter[j]++;
4969 }
4970
4971 /*--------------------------------------------------------------------
4972 * If i is an F-point, interpolation is from the C-points that
4973 * strongly influence i.
4974 *--------------------------------------------------------------------*/
4975
4976 else
4977 {
4978 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
4979 {
4980 i1 = S_diag_j[jj];
4981 if (CF_marker[i1] > 0)
4982 {
4983 jj_count[j]++;
4984 }
4985 }
4986
4987 if (num_procs > 1)
4988 {
4989 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
4990 {
4991 i1 = S_offd_j[jj];
4992 if (CF_marker_offd[i1] > 0)
4993 {
4994 jj_count_offd[j]++;
4995 }
4996 }
4997 }
4998 }
4999 }
5000 }
5001
5002 /*-----------------------------------------------------------------------
5003 * Allocate arrays.
5004 *-----------------------------------------------------------------------*/
5005
5006 for (i=0; i < num_threads-1; i++)
5007 {
5008 coarse_counter[i+1] += coarse_counter[i];
5009 jj_count[i+1] += jj_count[i];
5010 jj_count_offd[i+1] += jj_count_offd[i];
5011 }
5012 i = num_threads-1;
5013 jj_counter = jj_count[i];
5014 jj_counter_offd = jj_count_offd[i];
5015
5016 P_diag_size = jj_counter;
5017
5018 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
5019 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
5020 /* we need to include the size of the blocks in the data size */
5021 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size*bnnz, HYPRE_MEMORY_HOST);
5022
5023 P_diag_i[n_fine] = jj_counter;
5024
5025
5026 P_offd_size = jj_counter_offd;
5027
5028 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
5029 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
5030 /* we need to include the size of the blocks in the data size */
5031 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size*bnnz, HYPRE_MEMORY_HOST);
5032
5033 /*-----------------------------------------------------------------------
5034 * Intialize some stuff.
5035 *-----------------------------------------------------------------------*/
5036
5037 jj_counter = start_indexing;
5038 jj_counter_offd = start_indexing;
5039
5040 if (debug_flag==4)
5041 {
5042 wall_time = time_getWallclockSeconds() - wall_time;
5043 hypre_printf("Proc = %d Interp: Internal work 1 = %f\n",
5044 my_id, wall_time);
5045 fflush(NULL);
5046 }
5047
5048 /* we need a block identity and a block of zeros*/
5049 identity_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
5050 zero_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
5051
5052 for(i = 0; i < block_size; i++)
5053 {
5054 identity_block[i*block_size + i] = 1.0;
5055 }
5056 /* we also need a block to keep track of the diagonal values and a sum */
5057 diagonal_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
5058 sum_block_p = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
5059 sum_block_n = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
5060 r_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
5061
5062 /*-----------------------------------------------------------------------
5063 * Send and receive fine_to_coarse info.
5064 *-----------------------------------------------------------------------*/
5065
5066 if (debug_flag==4) wall_time = time_getWallclockSeconds();
5067
5068 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_A_offd, HYPRE_MEMORY_HOST);
5069 big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
5070 num_sends), HYPRE_MEMORY_HOST);
5071
5072 for (j = 0; j < num_threads; j++)
5073 {
5074 coarse_shift = 0;
5075 if (j > 0) coarse_shift = coarse_counter[j-1];
5076 size = n_fine/num_threads;
5077 rest = n_fine - size*num_threads;
5078 if (j < rest)
5079 {
5080 ns = j*size+j;
5081 ne = (j+1)*size+j+1;
5082 }
5083 else
5084 {
5085 ns = j*size+rest;
5086 ne = (j+1)*size+rest;
5087 }
5088 for (i = ns; i < ne; i++)
5089 fine_to_coarse[i] += coarse_shift;
5090 }
5091 index = 0;
5092 for (i = 0; i < num_sends; i++)
5093 {
5094 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
5095 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
5096 big_buf_data[index++] = my_first_cpt
5097 + fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
5098 }
5099
5100 comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data,
5101 fine_to_coarse_offd);
5102
5103 hypre_ParCSRCommHandleDestroy(comm_handle);
5104
5105 if (debug_flag==4)
5106 {
5107 wall_time = time_getWallclockSeconds() - wall_time;
5108 hypre_printf("Proc = %d Interp: Comm 4 FineToCoarse = %f\n",
5109 my_id, wall_time);
5110 fflush(NULL);
5111 }
5112
5113 if (debug_flag==4) wall_time = time_getWallclockSeconds();
5114
5115 /*-----------------------------------------------------------------------
5116 * Loop over fine grid points.
5117 *-----------------------------------------------------------------------*/
5118
5119 for (jl = 0; jl < num_threads; jl++)
5120 {
5121 size = n_fine/num_threads;
5122 rest = n_fine - size*num_threads;
5123 if (jl < rest)
5124 {
5125 ns = jl*size+jl;
5126 ne = (jl+1)*size+jl+1;
5127 }
5128 else
5129 {
5130 ns = jl*size+rest;
5131 ne = (jl+1)*size+rest;
5132 }
5133 jj_counter = 0;
5134 if (jl > 0) jj_counter = jj_count[jl-1];
5135 jj_counter_offd = 0;
5136 if (jl > 0) jj_counter_offd = jj_count_offd[jl-1];
5137
5138 P_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
5139 P_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
5140
5141 for (i = 0; i < n_fine; i++)
5142 {
5143 P_marker[i] = -1;
5144 }
5145 for (i = 0; i < num_cols_A_offd; i++)
5146 {
5147 P_marker_offd[i] = -1;
5148 }
5149
5150 for (i = ns; i < ne; i++)
5151 {
5152
5153 /*--------------------------------------------------------------------
5154 * If i is a c-point, interpolation is the identity.
5155 *--------------------------------------------------------------------*/
5156
5157 if (CF_marker[i] >= 0)
5158 {
5159 P_diag_i[i] = jj_counter;
5160 P_diag_j[jj_counter] = fine_to_coarse[i];
5161 /* P_diag_data[jj_counter] = one; */
5162 hypre_CSRBlockMatrixBlockCopyData(identity_block,
5163 &P_diag_data[jj_counter*bnnz],
5164 1.0, block_size);
5165 jj_counter++;
5166 }
5167
5168 /*--------------------------------------------------------------------
5169 * If i is an F-point, build interpolation.
5170 *--------------------------------------------------------------------*/
5171
5172 else
5173 {
5174 /* Diagonal part of P */
5175 P_diag_i[i] = jj_counter;
5176 jj_begin_row = jj_counter;
5177
5178 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
5179 {
5180 i1 = S_diag_j[jj];
5181
5182 /*--------------------------------------------------------------
5183 * If neighbor i1 is a C-point, set column number in P_diag_j
5184 * and initialize interpolation weight to zero.
5185 *--------------------------------------------------------------*/
5186
5187 if (CF_marker[i1] >= 0)
5188 {
5189 P_marker[i1] = jj_counter;
5190 P_diag_j[jj_counter] = fine_to_coarse[i1];
5191 /* P_diag_data[jj_counter] = zero; */
5192 hypre_CSRBlockMatrixBlockCopyData(zero_block,
5193 &P_diag_data[jj_counter*bnnz],
5194 1.0, block_size);
5195 jj_counter++;
5196 }
5197
5198 }
5199 jj_end_row = jj_counter;
5200
5201 /* Off-Diagonal part of P */
5202 P_offd_i[i] = jj_counter_offd;
5203 jj_begin_row_offd = jj_counter_offd;
5204
5205
5206 if (num_procs > 1)
5207 {
5208 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
5209 {
5210 i1 = S_offd_j[jj];
5211
5212 /*-----------------------------------------------------------
5213 * If neighbor i1 is a C-point, set column number in P_offd_j
5214 * and initialize interpolation weight to zero.
5215 *-----------------------------------------------------------*/
5216
5217 if (CF_marker_offd[i1] >= 0)
5218 {
5219 P_marker_offd[i1] = jj_counter_offd;
5220 P_offd_j[jj_counter_offd] = i1;
5221 /* P_offd_data[jj_counter_offd] = zero; */
5222 hypre_CSRBlockMatrixBlockCopyData(zero_block,
5223 &P_offd_data[jj_counter_offd*bnnz],
5224 1.0, block_size);
5225 jj_counter_offd++;
5226
5227 }
5228 }
5229 }
5230
5231 jj_end_row_offd = jj_counter_offd;
5232 /* get the diagonal block */
5233 /* diagonal = A_diag_data[A_diag_i[i]];*/
5234 hypre_CSRBlockMatrixBlockCopyData(&A_diag_data[A_diag_i[i]*bnnz], diagonal_block,
5235 1.0, block_size);
5236
5237
5238 /* Loop over ith row of A. First, the diagonal part of A */
5239 /*sum_N_pos = 0;
5240 sum_N_neg = 0;
5241 sum_P_pos = 0;
5242 sum_P_neg = 0;*/
5243 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block_p, 1.0,
5244 block_size);
5245 hypre_CSRBlockMatrixBlockCopyData(zero_block, sum_block_n, 1.0,
5246 block_size);
5247
5248
5249 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
5250 {
5251 i1 = A_diag_j[jj];
5252
5253 /*if (A_diag_data[jj] > 0)
5254 sum_N_pos += A_diag_data[jj];
5255 else
5256 sum_N_neg += A_diag_data[jj];*/
5257
5258 hypre_CSRBlockMatrixBlockAddAccumulate(&A_diag_data[jj*bnnz],
5259 sum_block_n, block_size);
5260
5261 /*--------------------------------------------------------------
5262 * Case 1: neighbor i1 is a C-point and strongly influences i,
5263 * accumulate a_{i,i1} into the interpolation weight.
5264 *--------------------------------------------------------------*/
5265
5266 if (P_marker[i1] >= jj_begin_row)
5267 {
5268
5269
5270 /* P_diag_data[P_marker[i1]] += A_diag_data[jj];*/
5271 hypre_CSRBlockMatrixBlockAddAccumulate(&A_diag_data[jj*bnnz],
5272 &P_diag_data[P_marker[i1]*bnnz],
5273 block_size);
5274
5275
5276 /*if (A_diag_data[jj] > 0)
5277 sum_P_pos += A_diag_data[jj];
5278 else
5279 sum_P_neg += A_diag_data[jj];*/
5280 hypre_CSRBlockMatrixBlockAddAccumulate(&A_diag_data[jj*bnnz],
5281 sum_block_p, block_size);
5282
5283 }
5284 }
5285
5286 /*----------------------------------------------------------------
5287 * Still looping over ith row of A. Next, loop over the
5288 * off-diagonal part of A
5289 *---------------------------------------------------------------*/
5290
5291 if (num_procs > 1)
5292 {
5293 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
5294 {
5295 i1 = A_offd_j[jj];
5296
5297 /*if (A_offd_data[jj] > 0)
5298 sum_N_pos += A_offd_data[jj];
5299 else
5300 sum_N_neg += A_offd_data[jj];*/
5301 hypre_CSRBlockMatrixBlockAddAccumulate(&A_offd_data[jj*bnnz],
5302 sum_block_n, block_size);
5303
5304 /*--------------------------------------------------------------
5305 * Case 1: neighbor i1 is a C-point and strongly influences i,
5306 * accumulate a_{i,i1} into the interpolation weight.
5307 *--------------------------------------------------------------*/
5308
5309 if (P_marker_offd[i1] >= jj_begin_row_offd)
5310 {
5311 /* P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];*/
5312 hypre_CSRBlockMatrixBlockAddAccumulate( &A_offd_data[jj*bnnz],
5313 &P_offd_data[P_marker_offd[i1]*bnnz],
5314 block_size);
5315 /*if (A_offd_data[jj] > 0)
5316 sum_P_pos += A_offd_data[jj];
5317 else
5318 sum_P_neg += A_offd_data[jj];*/
5319 hypre_CSRBlockMatrixBlockAddAccumulate(&A_offd_data[jj*bnnz],
5320 sum_block_p, block_size);
5321
5322 }
5323 }
5324 }
5325
5326
5327 /*if (sum_P_neg) alfa = sum_N_neg/sum_P_neg/diagonal;
5328 if (sum_P_pos) beta = sum_N_pos/sum_P_pos/diagonal;*/
5329
5330 /*r_block = sum_block_n*sum_block_p^-1*/
5331 hypre_CSRBlockMatrixBlockMultInv(sum_block_p, sum_block_n,
5332 r_block, block_size);
5333
5334 /* sum_block_n= diagonal^-1*r_block */
5335 hypre_CSRBlockMatrixBlockInvMult(diagonal_block, r_block,
5336 sum_block_n, block_size);
5337
5338 /*-----------------------------------------------------------------
5339 * Set interpolation weight by dividing by the diagonal.
5340 *-----------------------------------------------------------------*/
5341
5342 for (jj = jj_begin_row; jj < jj_end_row; jj++)
5343 {
5344 /*if (P_diag_data[jj]> 0)
5345 P_diag_data[jj] *= -beta;
5346 else
5347 P_diag_data[jj] *= -alfa;*/
5348
5349 hypre_CSRBlockMatrixBlockCopyData( &P_diag_data[jj*bnnz],
5350 r_block, -1.0, block_size);
5351
5352
5353 hypre_CSRBlockMatrixBlockMultAdd(sum_block_n, r_block, 0.0,
5354 &P_diag_data[jj*bnnz], block_size);
5355 }
5356
5357 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
5358 {
5359 /*if (P_offd_data[jj]> 0)
5360 P_offd_data[jj] *= -beta;
5361 else
5362 P_offd_data[jj] *= -alfa;*/
5363
5364 hypre_CSRBlockMatrixBlockCopyData( &P_offd_data[jj*bnnz],
5365 r_block, -1.0, block_size);
5366
5367 hypre_CSRBlockMatrixBlockMultAdd(sum_block_n, r_block, 0.0,
5368 &P_offd_data[jj*bnnz], block_size);
5369 }
5370 }
5371
5372 P_offd_i[i+1] = jj_counter_offd;
5373 }
5374 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
5375 hypre_TFree(P_marker_offd, HYPRE_MEMORY_HOST);
5376 }
5377
5378 /* Now create P - as a block matrix */
5379 P = hypre_ParCSRBlockMatrixCreate(comm,block_size,
5380 hypre_ParCSRBlockMatrixGlobalNumRows(A),
5381 total_global_cpts,
5382 hypre_ParCSRBlockMatrixColStarts(A),
5383 num_cpts_global,
5384 0,
5385 P_diag_i[n_fine],
5386 P_offd_i[n_fine]);
5387
5388 P_diag = hypre_ParCSRBlockMatrixDiag(P);
5389 hypre_CSRBlockMatrixData(P_diag) = P_diag_data;
5390 hypre_CSRBlockMatrixI(P_diag) = P_diag_i;
5391 hypre_CSRBlockMatrixJ(P_diag) = P_diag_j;
5392
5393 P_offd = hypre_ParCSRBlockMatrixOffd(P);
5394 hypre_CSRBlockMatrixData(P_offd) = P_offd_data;
5395 hypre_CSRBlockMatrixI(P_offd) = P_offd_i;
5396 hypre_CSRBlockMatrixJ(P_offd) = P_offd_j;
5397
5398 /* Compress P, removing coefficients smaller than trunc_factor * Max */
5399
5400 if (trunc_factor != 0.0 || max_elmts > 0)
5401 {
5402 hypre_BoomerAMGBlockInterpTruncation(P, trunc_factor, max_elmts);
5403 P_diag_data = hypre_CSRBlockMatrixData(P_diag);
5404 P_diag_i = hypre_CSRBlockMatrixI(P_diag);
5405 P_diag_j = hypre_CSRBlockMatrixJ(P_diag);
5406 P_offd_data = hypre_CSRBlockMatrixData(P_offd);
5407 P_offd_i = hypre_CSRBlockMatrixI(P_offd);
5408 P_offd_j = hypre_CSRBlockMatrixJ(P_offd);
5409 P_diag_size = P_diag_i[n_fine];
5410 P_offd_size = P_offd_i[n_fine];
5411 }
5412
5413 num_cols_P_offd = 0;
5414 if (P_offd_size)
5415 {
5416 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
5417
5418 for (i=0; i < num_cols_A_offd; i++)
5419 P_marker[i] = 0;
5420
5421 num_cols_P_offd = 0;
5422 for (i=0; i < P_offd_size; i++)
5423 {
5424 index = P_offd_j[i];
5425 if (!P_marker[index])
5426 {
5427 num_cols_P_offd++;
5428 P_marker[index] = 1;
5429 }
5430 }
5431
5432 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
5433 tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
5434
5435 index = 0;
5436 for (i=0; i < num_cols_P_offd; i++)
5437 {
5438 while (P_marker[index]==0) index++;
5439 tmp_map_offd[i] = index++;
5440 }
5441
5442 for (i=0; i < P_offd_size; i++)
5443 P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
5444 P_offd_j[i],
5445 num_cols_P_offd);
5446 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
5447 }
5448
5449 for (i=0; i < n_fine; i++)
5450 if (CF_marker[i] == -3) CF_marker[i] = -1;
5451
5452 if (num_cols_P_offd)
5453 {
5454 hypre_ParCSRBlockMatrixColMapOffd(P) = col_map_offd_P;
5455 hypre_CSRBlockMatrixNumCols(P_offd) = num_cols_P_offd;
5456 }
5457
5458 hypre_GetCommPkgBlockRTFromCommPkgBlockA(P,A,tmp_map_offd,fine_to_coarse_offd);
5459
5460 *P_ptr = P;
5461
5462 hypre_TFree(zero_block, HYPRE_MEMORY_HOST);
5463 hypre_TFree(identity_block, HYPRE_MEMORY_HOST);
5464 hypre_TFree(diagonal_block, HYPRE_MEMORY_HOST);
5465 hypre_TFree(sum_block_n, HYPRE_MEMORY_HOST);
5466 hypre_TFree(sum_block_p, HYPRE_MEMORY_HOST);
5467 hypre_TFree(r_block, HYPRE_MEMORY_HOST);
5468
5469
5470 hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
5471 hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
5472 hypre_TFree(dof_func_offd, HYPRE_MEMORY_HOST);
5473 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
5474 hypre_TFree(big_buf_data, HYPRE_MEMORY_HOST);
5475 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
5476 hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
5477 hypre_TFree(coarse_counter, HYPRE_MEMORY_HOST);
5478 hypre_TFree(jj_count, HYPRE_MEMORY_HOST);
5479 hypre_TFree(jj_count_offd, HYPRE_MEMORY_HOST);
5480
5481 return(0);
5482
5483 }
5484
5485 #if 0 /* not finished yet! */
5486
5487 /*---------------------------------------------------------------------------
5488 * hypre_BoomerAMGBuildBlockStdInterp
5489 * Comment: The interpolatory weighting can be changed with the sep_weight
5490 * variable. This can enable not separating negative and positive
5491 * off diagonals in the weight formula.
5492 *--------------------------------------------------------------------------*/
5493
5494 HYPRE_Int
5495 hypre_BoomerAMGBuildBlockStdInterp(hypre_ParCSRBlockMatrix *A,
5496 HYPRE_Int *CF_marker,
5497 hypre_ParCSRMatrix *S,
5498 HYPRE_Int *num_cpts_global,
5499 HYPRE_Int num_functions,
5500 HYPRE_Int *dof_func,
5501 HYPRE_Int debug_flag,
5502 HYPRE_Real trunc_factor,
5503 HYPRE_Int max_elmts,
5504 HYPRE_Int sep_weight,
5505 hypre_ParCSRBlockMatrix **P_ptr)
5506 {
5507 /* Communication Variables */
5508 MPI_Comm comm = hypre_ParCSRBlockMatrixComm(A);
5509 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
5510 HYPRE_Int my_id, num_procs;
5511
5512 /* Variables to store input variables */
5513 hypre_CSRBlockMatrix *A_diag = hypre_ParCSRBlockMatrixDiag(A);
5514 HYPRE_Real *A_diag_data = hypre_CSRBlockMatrixData(A_diag);
5515 HYPRE_Int *A_diag_i = hypre_CSRBlockMatrixI(A_diag);
5516 HYPRE_Int *A_diag_j = hypre_CSRBlockMatrixJ(A_diag);
5517
5518 hypre_CSRBlockMatrix *A_offd = hypre_ParCSRBlockMatrixOffd(A);
5519 HYPRE_Real *A_offd_data = hypre_CSRBlockMatrixData(A_offd);
5520 HYPRE_Int *A_offd_i = hypre_CSRBlockMatrixI(A_offd);
5521 HYPRE_Int *A_offd_j = hypre_CSRBlockMatrixJ(A_offd);
5522
5523 HYPRE_Int num_cols_A_offd = hypre_CSRBlockMatrixNumCols(A_offd);
5524 HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(A);
5525 HYPRE_Int n_fine = hypre_CSRBlockMatrixNumRows(A_diag);
5526 HYPRE_Int col_1 = hypre_ParCSRBlockMatrixFirstRowIndex(A);
5527 HYPRE_Int local_numrows = hypre_CSRBlockMatrixNumRows(A_diag);
5528 HYPRE_Int col_n = col_1 + local_numrows;
5529 HYPRE_Int total_global_cpts, my_first_cpt;
5530
5531 /* Variables to store strong connection matrix info */
5532 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
5533 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
5534 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
5535
5536 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
5537 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
5538 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
5539
5540 /* Interpolation matrix P */
5541 hypre_ParCSRBlockMatrix *P;
5542 hypre_CSRBlockMatrix *P_diag;
5543 hypre_CSRBlockMatrix *P_offd;
5544
5545 HYPRE_Real *P_diag_data;
5546 HYPRE_Int *P_diag_i, *P_diag_j;
5547 HYPRE_Real *P_offd_data;
5548 HYPRE_Int *P_offd_i, *P_offd_j;
5549
5550 HYPRE_Int *col_map_offd_P;
5551 HYPRE_Int P_diag_size;
5552 HYPRE_Int P_offd_size;
5553 HYPRE_Int *P_marker;
5554 HYPRE_Int *P_marker_offd = NULL;
5555 HYPRE_Int *CF_marker_offd = NULL;
5556 HYPRE_Int *tmp_CF_marker_offd = NULL;
5557 HYPRE_Int *dof_func_offd = NULL;
5558
5559 /* Full row information for columns of A that are off diag*/
5560 hypre_CSRBlockMatrix *A_ext;
5561 HYPRE_Real *A_ext_data;
5562 HYPRE_Int *A_ext_i;
5563 HYPRE_Int *A_ext_j;
5564
5565 HYPRE_Int *fine_to_coarse;
5566 HYPRE_Int *fine_to_coarse_offd = NULL;
5567 HYPRE_Int *found;
5568
5569 HYPRE_Int num_cols_P_offd;
5570 HYPRE_Int newoff, loc_col;
5571 HYPRE_Int A_ext_rows, full_off_procNodes;
5572
5573 hypre_CSRMatrix *Sop;
5574 HYPRE_Int *Sop_i;
5575 HYPRE_Int *Sop_j;
5576
5577 HYPRE_Int Soprows;
5578
5579 /* Variables to keep count of interpolatory points */
5580 HYPRE_Int jj_counter, jj_counter_offd;
5581 HYPRE_Int jj_begin_row, jj_end_row;
5582 HYPRE_Int jj_begin_row_offd = 0;
5583 HYPRE_Int jj_end_row_offd = 0;
5584 HYPRE_Int coarse_counter, coarse_counter_offd;
5585 HYPRE_Int *ihat, *ihat_offd = NULL;
5586 HYPRE_Int *ipnt, *ipnt_offd = NULL;
5587 HYPRE_Int strong_f_marker = -2;
5588
5589 /* Interpolation weight variables */
5590 HYPRE_Real *ahat, *ahat_offd = NULL;
5591 HYPRE_Real sum_pos, sum_pos_C, sum_neg, sum_neg_C, sum, sum_C;
5592 HYPRE_Real diagonal, distribute;
5593 HYPRE_Real alfa, beta;
5594
5595 /* Loop variables */
5596 HYPRE_Int index;
5597 HYPRE_Int start_indexing = 0;
5598 HYPRE_Int i, i1, j, j1, jj, kk, k1;
5599 HYPRE_Int cnt_c, cnt_f, cnt_c_offd, cnt_f_offd, indx;
5600
5601 /* Definitions */
5602 HYPRE_Real zero = 0.0;
5603 HYPRE_Real one = 1.0;
5604 HYPRE_Real wall_time;
5605 HYPRE_Real wall_1 = 0;
5606 HYPRE_Real wall_2 = 0;
5607 HYPRE_Real wall_3 = 0;
5608
5609
5610 hypre_ParCSRCommPkg *extend_comm_pkg = NULL;
5611
5612 HYPRE_Real *identity_block;
5613 HYPRE_Real *zero_block;
5614 HYPRE_Real *diagonal_block;
5615 HYPRE_Real *sum_block;
5616 HYPRE_Real *distribute_block;
5617
5618 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(A_diag);
5619 HYPRE_Int bnnz = block_size*block_size;
5620
5621
5622
5623 if (debug_flag== 4) wall_time = time_getWallclockSeconds();
5624
5625 /* BEGIN */
5626 hypre_MPI_Comm_size(comm, &num_procs);
5627 hypre_MPI_Comm_rank(comm,&my_id);
5628
5629 my_first_cpt = num_cpts_global[0];
5630 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
5631 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_INT, num_procs-1, comm);
5632
5633 if (!comm_pkg)
5634 {
5635 hypre_BlockMatvecCommPkgCreate(A);
5636 comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
5637 }
5638
5639 /* Set up off processor information (specifically for neighbors of
5640 * neighbors */
5641 newoff = 0;
5642 full_off_procNodes = 0;
5643 if (num_procs > 1)
5644 {
5645 /*----------------------------------------------------------------------
5646 * Get the off processors rows for A and S, associated with columns in
5647 * A_offd and S_offd.
5648 *---------------------------------------------------------------------*/
5649 A_ext = hypre_ParCSRBlockMatrixExtractBExt(A,A,1);
5650 A_ext_i = hypre_CSRBlockMatrixI(A_ext);
5651 A_ext_j = hypre_CSRBlockMatrixJ(A_ext);
5652 A_ext_data = hypre_CSRBlockMatrixData(A_ext);
5653 A_ext_rows = hypre_CSRBlockMatrixNumRows(A_ext);
5654
5655
5656 /* FIX THIS! - Sop - block or ???*/
5657
5658 Sop = hypre_ParCSRMatrixExtractBExt(S,A,0);
5659 Sop_i = hypre_CSRMatrixI(Sop);
5660 Sop_j = hypre_CSRMatrixJ(Sop);
5661 Soprows = hypre_CSRMatrixNumRows(Sop);
5662
5663 /* Find nodes that are neighbors of neighbors, not found in offd */
5664 newoff = new_offd_nodes(&found, A_ext_rows, A_ext_i, A_ext_j,
5665 Soprows, col_map_offd, col_1, col_n,
5666 Sop_i, Sop_j, CF_marker, comm_pkg);
5667 if(newoff >= 0)
5668 full_off_procNodes = newoff + num_cols_A_offd;
5669 else
5670 return(1);
5671
5672 /* Possibly add new points and new processors to the comm_pkg, all
5673 * processors need new_comm_pkg */
5674
5675 /* AHB - create a new comm package just for extended info -
5676 this will work better with the assumed partition*/
5677
5678 /* FIX THIS: Block version of this? */
5679 hypre_ParCSRFindExtendCommPkg(A, newoff, found,
5680 &extend_comm_pkg);
5681
5682 CF_marker_offd = hypre_CTAlloc(HYPRE_Int, full_off_procNodes, HYPRE_MEMORY_HOST);
5683
5684 if (num_functions > 1 && full_off_procNodes > 0)
5685 dof_func_offd = hypre_CTAlloc(HYPRE_Int, full_off_procNodes, HYPRE_MEMORY_HOST);
5686
5687 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, CF_marker,
5688 full_off_procNodes, CF_marker_offd);
5689
5690 if(num_functions > 1)
5691 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, dof_func,
5692 full_off_procNodes, dof_func_offd);
5693 }
5694
5695
5696 /*-----------------------------------------------------------------------
5697 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
5698 *-----------------------------------------------------------------------*/
5699
5700 /*-----------------------------------------------------------------------
5701 * Intialize counters and allocate mapping vector.
5702 *-----------------------------------------------------------------------*/
5703 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
5704 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
5705
5706 fine_to_coarse = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
5707
5708 P_marker = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
5709
5710
5711 /* FIX THIS - figure out sizes - need bnnz? */
5712 if (full_off_procNodes)
5713 {
5714 P_marker_offd = hypre_CTAlloc(HYPRE_Int, full_off_procNodes, HYPRE_MEMORY_HOST);
5715 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_Int, full_off_procNodes, HYPRE_MEMORY_HOST);
5716 tmp_CF_marker_offd = hypre_CTAlloc(HYPRE_Int, full_off_procNodes, HYPRE_MEMORY_HOST);
5717 }
5718
5719 initialize_vecs(n_fine, full_off_procNodes, fine_to_coarse,
5720 fine_to_coarse_offd, P_marker, P_marker_offd,
5721 tmp_CF_marker_offd);
5722
5723
5724 /* stuff for blocks */
5725 /* we need a block identity and a block of zeros*/
5726 identity_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
5727 zero_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
5728
5729 for(i = 0; i < block_size; i++)
5730 {
5731 identity_block[i*block_size + i] = 1.0;
5732 }
5733 /* we also need a block to keep track of the diagonal values and a sum */
5734 diagonal_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
5735 sum_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
5736 distribute_block = hypre_CTAlloc(HYPRE_Real, bnnz, HYPRE_MEMORY_HOST);
5737
5738
5739
5740 jj_counter = start_indexing;
5741 jj_counter_offd = start_indexing;
5742 coarse_counter = 0;
5743 coarse_counter_offd = 0;
5744
5745 /*-----------------------------------------------------------------------
5746 * Loop over fine grid.
5747 *-----------------------------------------------------------------------*/
5748 for (i = 0; i < n_fine; i++)
5749 {
5750 P_diag_i[i] = jj_counter;
5751 if (num_procs > 1)
5752 P_offd_i[i] = jj_counter_offd;
5753
5754 if (CF_marker[i] >= 0)
5755 {
5756 jj_counter++;
5757 fine_to_coarse[i] = coarse_counter;
5758 coarse_counter++;
5759 }
5760
5761 /*--------------------------------------------------------------------
5762 * If i is an F-point, interpolation is from the C-points that
5763 * strongly influence i, or C-points that stronly influence F-points
5764 * that strongly influence i.
5765 *--------------------------------------------------------------------*/
5766 else if (CF_marker[i] != -3)
5767 {
5768 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
5769 {
5770 i1 = S_diag_j[jj];
5771 if (CF_marker[i1] >= 0)
5772 { /* i1 is a C point */
5773 if (P_marker[i1] < P_diag_i[i])
5774 {
5775 P_marker[i1] = jj_counter;
5776 jj_counter++;
5777 }
5778 }
5779 else if (CF_marker[i1] != -3)
5780 { /* i1 is a F point, loop through it's strong neighbors */
5781 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
5782 {
5783 k1 = S_diag_j[kk];
5784 if (CF_marker[k1] >= 0)
5785 {
5786 if(P_marker[k1] < P_diag_i[i])
5787 {
5788 P_marker[k1] = jj_counter;
5789 jj_counter++;
5790 }
5791 }
5792 }
5793 if(num_procs > 1)
5794 {
5795 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
5796 {
5797 k1 = S_offd_j[kk];
5798 if (CF_marker_offd[k1] >= 0)
5799 {
5800 if(P_marker_offd[k1] < P_offd_i[i])
5801 {
5802 tmp_CF_marker_offd[k1] = 1;
5803 P_marker_offd[k1] = jj_counter_offd;
5804 jj_counter_offd++;
5805 }
5806 }
5807 }
5808 }
5809 }
5810 }
5811 /* Look at off diag strong connections of i */
5812 if (num_procs > 1)
5813 {
5814 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
5815 {
5816 i1 = S_offd_j[jj];
5817 if (CF_marker_offd[i1] >= 0)
5818 {
5819 if(P_marker_offd[i1] < P_offd_i[i])
5820 {
5821 tmp_CF_marker_offd[i1] = 1;
5822 P_marker_offd[i1] = jj_counter_offd;
5823 jj_counter_offd++;
5824 }
5825 }
5826 else if (CF_marker_offd[i1] != -3)
5827 { /* F point; look at neighbors of i1. Sop contains global col
5828 * numbers and entries that could be in S_diag or S_offd or
5829 * neither. */
5830 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
5831 {
5832 k1 = Sop_j[kk];
5833 if(k1 >= col_1 && k1 < col_n)
5834 { /* In S_diag */
5835 loc_col = k1-col_1;
5836 if(CF_marker[loc_col] >= 0)
5837 {
5838 if(P_marker[loc_col] < P_diag_i[i])
5839 {
5840 P_marker[loc_col] = jj_counter;
5841 jj_counter++;
5842 }
5843 }
5844 }
5845 else
5846 {
5847 loc_col = -k1 - 1;
5848 if(CF_marker_offd[loc_col] >= 0)
5849 {
5850 if(P_marker_offd[loc_col] < P_offd_i[i])
5851 {
5852 P_marker_offd[loc_col] = jj_counter_offd;
5853 tmp_CF_marker_offd[loc_col] = 1;
5854 jj_counter_offd++;
5855 }
5856 }
5857 }
5858 }
5859 }
5860 }
5861 }
5862 }
5863 }
5864
5865 if (debug_flag==4)
5866 {
5867 wall_time = time_getWallclockSeconds() - wall_time;
5868 hypre_printf("Proc = %d determine structure %f\n",
5869 my_id, wall_time);
5870 fflush(NULL);
5871 }
5872 /*-----------------------------------------------------------------------
5873 * Allocate arrays.
5874 *-----------------------------------------------------------------------*/
5875
5876
5877 P_diag_size = jj_counter;
5878 P_offd_size = jj_counter_offd;
5879
5880 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
5881 /* we need to include the size of the blocks in the data size */
5882 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, HYPRE_MEMORY_HOST)*bnnz;
5883
5884 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
5885 /* we need to include the size of the blocks in the data size */
5886 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size*bnnz, HYPRE_MEMORY_HOST);
5887
5888 P_diag_i[n_fine] = jj_counter;
5889 P_offd_i[n_fine] = jj_counter_offd;
5890
5891 jj_counter = start_indexing;
5892 jj_counter_offd = start_indexing;
5893
5894 /* Fine to coarse mapping */
5895 if(num_procs > 1)
5896 {
5897 for (i = 0; i < n_fine; i++)
5898 fine_to_coarse[i] += my_first_cpt;
5899
5900 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, fine_to_coarse,
5901 full_off_procNodes,
5902 fine_to_coarse_offd);
5903
5904 for (i = 0; i < n_fine; i++)
5905 fine_to_coarse[i] -= my_first_cpt;
5906 }
5907
5908 /* Initialize ahat, which is a modification to a, used in the standard
5909 * interpolation routine. */
5910 ahat = hypre_CTAlloc(HYPRE_Real, n_fine*bnnz, HYPRE_MEMORY_HOST); /* this is data array */
5911 ihat = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
5912 ipnt = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST);
5913
5914
5915 if (full_off_procNodes)
5916 {
5917 ahat_offd = hypre_CTAlloc(HYPRE_Real, full_off_procNodes*bnnz, HYPRE_MEMORY_HOST); /* this is data array */
5918 ihat_offd = hypre_CTAlloc(HYPRE_Int, full_off_procNodes, HYPRE_MEMORY_HOST);
5919 ipnt_offd = hypre_CTAlloc(HYPRE_Int, full_off_procNodes, HYPRE_MEMORY_HOST);
5920 }
5921
5922 for (i = 0; i < n_fine; i++)
5923 {
5924 P_marker[i] = -1;
5925 ahat[i] = 0;
5926 ihat[i] = -1;
5927 }
5928 for (i = 0; i < full_off_procNodes; i++)
5929 {
5930 P_marker_offd[i] = -1;
5931 ahat_offd[i] = 0;
5932 ihat_offd[i] = -1;
5933 }
5934
5935 /*-----------------------------------------------------------------------
5936 * Loop over fine grid points.
5937 *-----------------------------------------------------------------------*/
5938 for (i = 0; i < n_fine; i++)
5939 {
5940 jj_begin_row = jj_counter;
5941 if(num_procs > 1)
5942 jj_begin_row_offd = jj_counter_offd;
5943
5944 /*--------------------------------------------------------------------
5945 * If i is a c-point, interpolation is the identity.
5946 *--------------------------------------------------------------------*/
5947
5948 if (CF_marker[i] >= 0)
5949 {
5950 P_diag_j[jj_counter] = fine_to_coarse[i];
5951
5952
5953 /* P_diag_data[jj_counter] = one; */
5954 hypre_CSRBlockMatrixBlockCopyData(identity_block,
5955 &P_diag_data[jj_counter*bnnz],
5956 1.0, block_size);
5957
5958 jj_counter++;
5959 }
5960
5961 /*--------------------------------------------------------------------
5962 * If i is an F-point, build interpolation.
5963 *--------------------------------------------------------------------*/
5964
5965 else if (CF_marker[i] != -3)
5966 {
5967 if (debug_flag==4) wall_time = time_getWallclockSeconds();
5968 strong_f_marker--;
5969 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
5970 {
5971 i1 = S_diag_j[jj];
5972
5973 /*--------------------------------------------------------------
5974 * If neighbor i1 is a C-point, set column number in P_diag_j
5975 * and initialize interpolation weight to zero.
5976 *--------------------------------------------------------------*/
5977
5978 if (CF_marker[i1] >= 0)
5979 {
5980 if (P_marker[i1] < jj_begin_row)
5981 {
5982 P_marker[i1] = jj_counter;
5983 P_diag_j[jj_counter] = i1;
5984 /* P_diag_data[jj_counter] = zero; */
5985 hypre_CSRBlockMatrixBlockCopyData(zero_block,
5986 &P_diag_data[jj_counter*bnnz],
5987 1.0, block_size);
5988
5989 jj_counter++;
5990 }
5991 }
5992 else if (CF_marker[i1] != -3)
5993 {
5994 P_marker[i1] = strong_f_marker;
5995 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
5996 {
5997 k1 = S_diag_j[kk];
5998 if (CF_marker[k1] >= 0)
5999 {
6000 if(P_marker[k1] < jj_begin_row)
6001 {
6002 P_marker[k1] = jj_counter;
6003 P_diag_j[jj_counter] = k1;
6004 /* P_diag_data[jj_counter] = zero; */
6005 hypre_CSRBlockMatrixBlockCopyData(zero_block,
6006 &P_diag_data[jj_counter*bnnz],
6007 1.0, block_size);
6008
6009 jj_counter++;
6010 }
6011 }
6012 }
6013 if(num_procs > 1)
6014 {
6015 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
6016 {
6017 k1 = S_offd_j[kk];
6018 if(CF_marker_offd[k1] >= 0)
6019 {
6020 if(P_marker_offd[k1] < jj_begin_row_offd)
6021 {
6022 P_marker_offd[k1] = jj_counter_offd;
6023 P_offd_j[jj_counter_offd] = k1;
6024 /* P_offd_data[jj_counter_offd] = zero; */
6025 hypre_CSRBlockMatrixBlockCopyData(zero_block,
6026 &P_offd_data[jj_counter_offd*bnnz],
6027 1.0, block_size);
6028
6029 jj_counter_offd++;
6030 }
6031 }
6032 }
6033 }
6034 }
6035 }
6036
6037 if ( num_procs > 1)
6038 {
6039 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
6040 {
6041 i1 = S_offd_j[jj];
6042 if ( CF_marker_offd[i1] >= 0)
6043 {
6044 if(P_marker_offd[i1] < jj_begin_row_offd)
6045 {
6046 P_marker_offd[i1] = jj_counter_offd;
6047 P_offd_j[jj_counter_offd]=i1;
6048 /* P_offd_data[jj_counter_offd] = zero;*/
6049 hypre_CSRBlockMatrixBlockCopyData(zero_block,
6050 &P_offd_data[jj_counter_offd*bnnz],
6051 1.0, block_size);
6052
6053 jj_counter_offd++;
6054 }
6055 }
6056 else if (CF_marker_offd[i1] != -3)
6057 {
6058 P_marker_offd[i1] = strong_f_marker;
6059 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
6060 {
6061 k1 = Sop_j[kk];
6062 if(k1 >= col_1 && k1 < col_n)
6063 {
6064 loc_col = k1-col_1;
6065 if(CF_marker[loc_col] >= 0)
6066 {
6067 if(P_marker[loc_col] < jj_begin_row)
6068 {
6069 P_marker[loc_col] = jj_counter;
6070 P_diag_j[jj_counter] = loc_col;
6071 /* P_diag_data[jj_counter] = zero;*/
6072 hypre_CSRBlockMatrixBlockCopyData(zero_block,
6073 &P_diag_data[jj_counter*bnnz],
6074 1.0, block_size);
6075 jj_counter++;
6076 }
6077 }
6078 }
6079 else
6080 {
6081 loc_col = -k1 - 1;
6082 if(CF_marker_offd[loc_col] >= 0)
6083 {
6084 if(P_marker_offd[loc_col] < jj_begin_row_offd)
6085 {
6086 P_marker_offd[loc_col] = jj_counter_offd;
6087 P_offd_j[jj_counter_offd]=loc_col;
6088 /* P_offd_data[jj_counter_offd] = zero;*/
6089 hypre_CSRBlockMatrixBlockCopyData(zero_block,
6090 &P_offd_data[jj_counter_offd*bnnz],
6091 1.0, block_size);
6092 jj_counter_offd++;
6093 }
6094 }
6095 }
6096 }
6097 }
6098 }
6099 }
6100
6101 jj_end_row = jj_counter;
6102 jj_end_row_offd = jj_counter_offd;
6103
6104 if (debug_flag==4)
6105 {
6106 wall_time = time_getWallclockSeconds() - wall_time;
6107 wall_1 += wall_time;
6108 fflush(NULL);
6109 }
6110
6111 /* FIX THIS - is a_hat - need to copy block data to ahat */
6112
6113 if (debug_flag==4) wall_time = time_getWallclockSeconds();
6114 cnt_c = 0;
6115 cnt_f = jj_end_row-jj_begin_row;
6116 cnt_c_offd = 0;
6117 cnt_f_offd = jj_end_row_offd-jj_begin_row_offd;
6118 ihat[i] = cnt_f;
6119 ipnt[cnt_f] = i;
6120 ahat[cnt_f++] = A_diag_data[A_diag_i[i]];
6121 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
6122 { /* i1 is direct neighbor */
6123 i1 = A_diag_j[jj];
6124 if (P_marker[i1] != strong_f_marker)
6125 {
6126 indx = ihat[i1];
6127 if (indx > -1)
6128 ahat[indx] += A_diag_data[jj];
6129 else if (P_marker[i1] >= jj_begin_row)
6130 {
6131 ihat[i1] = cnt_c;
6132 ipnt[cnt_c] = i1;
6133 ahat[cnt_c++] += A_diag_data[jj];
6134 }
6135 else if (CF_marker[i1] != -3)
6136 {
6137 ihat[i1] = cnt_f;
6138 ipnt[cnt_f] = i1;
6139 ahat[cnt_f++] += A_diag_data[jj];
6140 }
6141 }
6142 else
6143 {
6144 if(num_functions == 1 || dof_func[i] == dof_func[i1])
6145 {
6146 distribute = A_diag_data[jj]/A_diag_data[A_diag_i[i1]];
6147 for (kk = A_diag_i[i1]+1; kk < A_diag_i[i1+1]; kk++)
6148 {
6149 k1 = A_diag_j[kk];
6150 indx = ihat[k1];
6151 if (indx > -1)
6152 ahat[indx] -= A_diag_data[kk]*distribute;
6153 else if (P_marker[k1] >= jj_begin_row)
6154 {
6155 ihat[k1] = cnt_c;
6156 ipnt[cnt_c] = k1;
6157 ahat[cnt_c++] -= A_diag_data[kk]*distribute;
6158 }
6159 else
6160 {
6161 ihat[k1] = cnt_f;
6162 ipnt[cnt_f] = k1;
6163 ahat[cnt_f++] -= A_diag_data[kk]*distribute;
6164 }
6165 }
6166 if(num_procs > 1)
6167 {
6168 for (kk = A_offd_i[i1]; kk < A_offd_i[i1+1]; kk++)
6169 {
6170 k1 = A_offd_j[kk];
6171 indx = ihat_offd[k1];
6172 if(num_functions == 1 || dof_func[i1] == dof_func_offd[k1])
6173 {
6174 if (indx > -1)
6175 ahat_offd[indx] -= A_offd_data[kk]*distribute;
6176 else if (P_marker_offd[k1] >= jj_begin_row_offd)
6177 {
6178 ihat_offd[k1] = cnt_c_offd;
6179 ipnt_offd[cnt_c_offd] = k1;
6180 ahat_offd[cnt_c_offd++] -= A_offd_data[kk]*distribute;
6181 }
6182 else
6183 {
6184 ihat_offd[k1] = cnt_f_offd;
6185 ipnt_offd[cnt_f_offd] = k1;
6186 ahat_offd[cnt_f_offd++] -= A_offd_data[kk]*distribute;
6187 }
6188 }
6189 }
6190 }
6191 }
6192 }
6193 }
6194 if(num_procs > 1)
6195 {
6196 for(jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
6197 {
6198 i1 = A_offd_j[jj];
6199 if(P_marker_offd[i1] != strong_f_marker)
6200 {
6201 indx = ihat_offd[i1];
6202 if (indx > -1)
6203 ahat_offd[indx] += A_offd_data[jj];
6204 else if (P_marker_offd[i1] >= jj_begin_row_offd)
6205 {
6206 ihat_offd[i1] = cnt_c_offd;
6207 ipnt_offd[cnt_c_offd] = i1;
6208 ahat_offd[cnt_c_offd++] += A_offd_data[jj];
6209 }
6210 else if (CF_marker_offd[i1] != -3)
6211 {
6212 ihat_offd[i1] = cnt_f_offd;
6213 ipnt_offd[cnt_f_offd] = i1;
6214 ahat_offd[cnt_f_offd++] += A_offd_data[jj];
6215 }
6216 }
6217 else
6218 {
6219 if(num_functions == 1 || dof_func[i] == dof_func_offd[i1])
6220 {
6221 distribute = A_offd_data[jj]/A_ext_data[A_ext_i[i1]];
6222 for (kk = A_ext_i[i1]+1; kk < A_ext_i[i1+1]; kk++)
6223 {
6224 k1 = A_ext_j[kk];
6225 if(k1 >= col_1 && k1 < col_n)
6226 { /*diag*/
6227 loc_col = k1 - col_1;
6228 indx = ihat[loc_col];
6229 if (indx > -1)
6230 ahat[indx] -= A_ext_data[kk]*distribute;
6231 else if (P_marker[loc_col] >= jj_begin_row)
6232 {
6233 ihat[loc_col] = cnt_c;
6234 ipnt[cnt_c] = loc_col;
6235 ahat[cnt_c++] -= A_ext_data[kk]*distribute;
6236 }
6237 else
6238 {
6239 ihat[loc_col] = cnt_f;
6240 ipnt[cnt_f] = loc_col;
6241 ahat[cnt_f++] -= A_ext_data[kk]*distribute;
6242 }
6243 }
6244 else
6245 {
6246 loc_col = -k1 - 1;
6247 if(num_functions == 1 ||
6248 dof_func_offd[loc_col] == dof_func_offd[i1])
6249 {
6250 indx = ihat_offd[loc_col];
6251 if (indx > -1)
6252 ahat_offd[indx] -= A_ext_data[kk]*distribute;
6253 else if(P_marker_offd[loc_col] >= jj_begin_row_offd)
6254 {
6255 ihat_offd[loc_col] = cnt_c_offd;
6256 ipnt_offd[cnt_c_offd] = loc_col;
6257 ahat_offd[cnt_c_offd++] -= A_ext_data[kk]*distribute;
6258 }
6259 else
6260 {
6261 ihat_offd[loc_col] = cnt_f_offd;
6262 ipnt_offd[cnt_f_offd] = loc_col;
6263 ahat_offd[cnt_f_offd++] -= A_ext_data[kk]*distribute;
6264 }
6265 }
6266 }
6267 }
6268 }
6269 }
6270 }
6271 }
6272 if (debug_flag==4)
6273 {
6274 wall_time = time_getWallclockSeconds() - wall_time;
6275 wall_2 += wall_time;
6276 fflush(NULL);
6277 }
6278
6279 if (debug_flag==4) wall_time = time_getWallclockSeconds();
6280 diagonal = ahat[cnt_c];
6281 ahat[cnt_c] = 0;
6282 sum_pos = 0;
6283 sum_pos_C = 0;
6284 sum_neg = 0;
6285 sum_neg_C = 0;
6286 sum = 0;
6287 sum_C = 0;
6288 if(sep_weight == 1)
6289 {
6290 for (jj=0; jj < cnt_c; jj++)
6291 {
6292 if (ahat[jj] > 0)
6293 {
6294 sum_pos_C += ahat[jj];
6295 }
6296 else
6297 {
6298 sum_neg_C += ahat[jj];
6299 }
6300 }
6301 if(num_procs > 1)
6302 {
6303 for (jj=0; jj < cnt_c_offd; jj++)
6304 {
6305 if (ahat_offd[jj] > 0)
6306 {
6307 sum_pos_C += ahat_offd[jj];
6308 }
6309 else
6310 {
6311 sum_neg_C += ahat_offd[jj];
6312 }
6313 }
6314 }
6315 sum_pos = sum_pos_C;
6316 sum_neg = sum_neg_C;
6317 for (jj=cnt_c+1; jj < cnt_f; jj++)
6318 {
6319 if (ahat[jj] > 0)
6320 {
6321 sum_pos += ahat[jj];
6322 }
6323 else
6324 {
6325 sum_neg += ahat[jj];
6326 }
6327 ahat[jj] = 0;
6328 }
6329 if(num_procs > 1)
6330 {
6331 for (jj=cnt_c_offd; jj < cnt_f_offd; jj++)
6332 {
6333 if (ahat_offd[jj] > 0)
6334 {
6335 sum_pos += ahat_offd[jj];
6336 }
6337 else
6338 {
6339 sum_neg += ahat_offd[jj];
6340 }
6341 ahat_offd[jj] = 0;
6342 }
6343 }
6344 if (sum_neg_C) alfa = sum_neg/sum_neg_C/diagonal;
6345 if (sum_pos_C) beta = sum_pos/sum_pos_C/diagonal;
6346
6347 /*-----------------------------------------------------------------
6348 * Set interpolation weight by dividing by the diagonal.
6349 *-----------------------------------------------------------------*/
6350
6351 for (jj = jj_begin_row; jj < jj_end_row; jj++)
6352 {
6353 j1 = ihat[P_diag_j[jj]];
6354 if (ahat[j1] > 0)
6355 P_diag_data[jj] = -beta*ahat[j1];
6356 else
6357 P_diag_data[jj] = -alfa*ahat[j1];
6358
6359 P_diag_j[jj] = fine_to_coarse[P_diag_j[jj]];
6360 ahat[j1] = 0;
6361 }
6362 for (jj=0; jj < cnt_f; jj++)
6363 ihat[ipnt[jj]] = -1;
6364 if(num_procs > 1)
6365 {
6366 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
6367 {
6368 j1 = ihat_offd[P_offd_j[jj]];
6369 if (ahat_offd[j1] > 0)
6370 P_offd_data[jj] = -beta*ahat_offd[j1];
6371 else
6372 P_offd_data[jj] = -alfa*ahat_offd[j1];
6373
6374 ahat_offd[j1] = 0;
6375 }
6376 for (jj=0; jj < cnt_f_offd; jj++)
6377 ihat_offd[ipnt_offd[jj]] = -1;
6378 }
6379 }
6380 else
6381 {
6382 for (jj=0; jj < cnt_c; jj++)
6383 {
6384 sum_C += ahat[jj];
6385 }
6386 if(num_procs > 1)
6387 {
6388 for (jj=0; jj < cnt_c_offd; jj++)
6389 {
6390 sum_C += ahat_offd[jj];
6391 }
6392 }
6393 sum = sum_C;
6394 for (jj=cnt_c+1; jj < cnt_f; jj++)
6395 {
6396 sum += ahat[jj];
6397 ahat[jj] = 0;
6398 }
6399 if(num_procs > 1)
6400 {
6401 for (jj=cnt_c_offd; jj < cnt_f_offd; jj++)
6402 {
6403 sum += ahat_offd[jj];
6404 ahat_offd[jj] = 0;
6405 }
6406 }
6407 if (sum_C) alfa = sum/sum_C/diagonal;
6408
6409 /*-----------------------------------------------------------------
6410 * Set interpolation weight by dividing by the diagonal.
6411 *-----------------------------------------------------------------*/
6412
6413 for (jj = jj_begin_row; jj < jj_end_row; jj++)
6414 {
6415 j1 = ihat[P_diag_j[jj]];
6416 P_diag_data[jj] = -alfa*ahat[j1];
6417 P_diag_j[jj] = fine_to_coarse[P_diag_j[jj]];
6418 ahat[j1] = 0;
6419 }
6420 for (jj=0; jj < cnt_f; jj++)
6421 ihat[ipnt[jj]] = -1;
6422 if(num_procs > 1)
6423 {
6424 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
6425 {
6426 j1 = ihat_offd[P_offd_j[jj]];
6427 P_offd_data[jj] = -alfa*ahat_offd[j1];
6428 ahat_offd[j1] = 0;
6429 }
6430 for (jj=0; jj < cnt_f_offd; jj++)
6431 ihat_offd[ipnt_offd[jj]] = -1;
6432 }
6433 }
6434 if (debug_flag==4)
6435 {
6436 wall_time = time_getWallclockSeconds() - wall_time;
6437 wall_3 += wall_time;
6438 fflush(NULL);
6439 }
6440 }
6441 }
6442
6443 if (debug_flag==4)
6444 {
6445 hypre_printf("Proc = %d fill part 1 %f part 2 %f part 3 %f\n",
6446 my_id, wall_1, wall_2, wall_3);
6447 fflush(NULL);
6448 }
6449 P = hypre_ParCSRMatrixCreate(comm,
6450 hypre_ParCSRMatrixGlobalNumRows(A),
6451 total_global_cpts,
6452 hypre_ParCSRMatrixColStarts(A),
6453 num_cpts_global,
6454 0,
6455 P_diag_i[n_fine],
6456 P_offd_i[n_fine]);
6457
6458 P_diag = hypre_ParCSRMatrixDiag(P);
6459 hypre_CSRMatrixData(P_diag) = P_diag_data;
6460 hypre_CSRMatrixI(P_diag) = P_diag_i;
6461 hypre_CSRMatrixJ(P_diag) = P_diag_j;
6462 P_offd = hypre_ParCSRMatrixOffd(P);
6463 hypre_CSRMatrixData(P_offd) = P_offd_data;
6464 hypre_CSRMatrixI(P_offd) = P_offd_i;
6465 hypre_CSRMatrixJ(P_offd) = P_offd_j;
6466
6467 /* Compress P, removing coefficients smaller than trunc_factor * Max */
6468 if (trunc_factor != 0.0 || max_elmts > 0)
6469 {
6470 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
6471 P_diag_data = hypre_CSRMatrixData(P_diag);
6472 P_diag_i = hypre_CSRMatrixI(P_diag);
6473 P_diag_j = hypre_CSRMatrixJ(P_diag);
6474 P_offd_data = hypre_CSRMatrixData(P_offd);
6475 P_offd_i = hypre_CSRMatrixI(P_offd);
6476 P_offd_j = hypre_CSRMatrixJ(P_offd);
6477 P_diag_size = P_diag_i[n_fine];
6478 P_offd_size = P_offd_i[n_fine];
6479 }
6480
6481 /* This builds col_map, col_map should be monotone increasing and contain
6482 * global numbers. */
6483 num_cols_P_offd = 0;
6484 if(P_offd_size)
6485 {
6486 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
6487 P_marker = hypre_CTAlloc(HYPRE_Int, full_off_procNodes, HYPRE_MEMORY_HOST);
6488
6489 for (i=0; i < full_off_procNodes; i++)
6490 P_marker[i] = 0;
6491
6492 num_cols_P_offd = 0;
6493 for (i=0; i < P_offd_size; i++)
6494 {
6495 index = P_offd_j[i];
6496 if (!P_marker[index])
6497 {
6498 if(tmp_CF_marker_offd[index] >= 0)
6499 {
6500 num_cols_P_offd++;
6501 P_marker[index] = 1;
6502 }
6503 }
6504 }
6505
6506 col_map_offd_P = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
6507
6508 index = 0;
6509 for(i = 0; i < num_cols_P_offd; i++)
6510 {
6511 while( P_marker[index] == 0) index++;
6512 col_map_offd_P[i] = index++;
6513 }
6514 for(i = 0; i < P_offd_size; i++)
6515 P_offd_j[i] = hypre_BinarySearch(col_map_offd_P,
6516 P_offd_j[i],
6517 num_cols_P_offd);
6518
6519 index = 0;
6520 for(i = 0; i < num_cols_P_offd; i++)
6521 {
6522 while (P_marker[index] == 0) index++;
6523
6524 col_map_offd_P[i] = fine_to_coarse_offd[index];
6525 index++;
6526 }
6527
6528 /* Sort the col_map_offd_P and P_offd_j correctly */
6529 for(i = 0; i < num_cols_P_offd; i++)
6530 P_marker[i] = col_map_offd_P[i];
6531
6532 /* Check if sort actually changed anything */
6533 if(ssort(col_map_offd_P,num_cols_P_offd))
6534 {
6535 for(i = 0; i < P_offd_size; i++)
6536 for(j = 0; j < num_cols_P_offd; j++)
6537 if(P_marker[P_offd_j[i]] == col_map_offd_P[j])
6538 {
6539 P_offd_j[i] = j;
6540 j = num_cols_P_offd;
6541 }
6542 }
6543 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
6544 }
6545
6546 if (num_cols_P_offd)
6547 {
6548 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
6549 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
6550 }
6551
6552 hypre_MatvecCommPkgCreate(P);
6553
6554 for (i=0; i < n_fine; i++)
6555 if (CF_marker[i] == -3) CF_marker[i] = -1;
6556
6557 *P_ptr = P;
6558
6559 /* Deallocate memory */
6560 hypre_TFree(fine_to_coarse, HYPRE_MEMORY_HOST);
6561 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
6562 hypre_TFree(ahat, HYPRE_MEMORY_HOST);
6563 hypre_TFree(ihat, HYPRE_MEMORY_HOST);
6564 hypre_TFree(ipnt, HYPRE_MEMORY_HOST);
6565
6566 if (full_off_procNodes)
6567 {
6568 hypre_TFree(ahat_offd, HYPRE_MEMORY_HOST);
6569 hypre_TFree(ihat_offd, HYPRE_MEMORY_HOST);
6570 hypre_TFree(ipnt_offd, HYPRE_MEMORY_HOST);
6571 }
6572 if (num_procs > 1)
6573 {
6574 hypre_CSRMatrixDestroy(Sop);
6575 hypre_CSRMatrixDestroy(A_ext);
6576 hypre_TFree(fine_to_coarse_offd, HYPRE_MEMORY_HOST);
6577 hypre_TFree(P_marker_offd, HYPRE_MEMORY_HOST);
6578 hypre_TFree(CF_marker_offd, HYPRE_MEMORY_HOST);
6579 hypre_TFree(tmp_CF_marker_offd, HYPRE_MEMORY_HOST);
6580
6581 if(num_functions > 1)
6582 hypre_TFree(dof_func_offd, HYPRE_MEMORY_HOST);
6583 hypre_TFree(found, HYPRE_MEMORY_HOST);
6584
6585 hypre_MatvecCommPkgDestroy(extend_comm_pkg);
6586
6587 }
6588
6589
6590 return hypre_error_flag;
6591 }
6592
6593 #endif
6594