1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "_hypre_parcsr_ls.h"
9 #include "aux_interp.h"
10
11 /*---------------------------------------------------------------------------
12 * hypre_BoomerAMGBuildModExtInterp
13 * Comment:
14 *--------------------------------------------------------------------------*/
15 HYPRE_Int
hypre_BoomerAMGBuildModExtInterpHost(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRMatrix ** P_ptr)16 hypre_BoomerAMGBuildModExtInterpHost(hypre_ParCSRMatrix *A,
17 HYPRE_Int *CF_marker,
18 hypre_ParCSRMatrix *S,
19 HYPRE_BigInt *num_cpts_global,
20 HYPRE_Int num_functions,
21 HYPRE_Int *dof_func,
22 HYPRE_Int debug_flag,
23 HYPRE_Real trunc_factor,
24 HYPRE_Int max_elmts,
25 hypre_ParCSRMatrix **P_ptr)
26 {
27 /* Communication Variables */
28 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
29 HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
30 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
31 hypre_ParCSRCommHandle *comm_handle = NULL;
32 HYPRE_Int my_id, num_procs;
33
34 /* Variables to store input variables */
35 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
36 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
37 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
38 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
39
40 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
41 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
42 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
43 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
44
45 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
46 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
47 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
48
49 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
50 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
51 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
52
53 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
54 HYPRE_BigInt total_global_cpts;
55
56 /* Interpolation matrix P */
57 hypre_ParCSRMatrix *P;
58 hypre_CSRMatrix *P_diag;
59 hypre_CSRMatrix *P_offd;
60
61 HYPRE_Real *P_diag_data = NULL;
62 HYPRE_Int *P_diag_i, *P_diag_j = NULL;
63 HYPRE_Real *P_offd_data = NULL;
64 HYPRE_Int *P_offd_i, *P_offd_j = NULL;
65
66 /* Intermediate matrices */
67 hypre_ParCSRMatrix *As_FF, *As_FC, *W;
68 HYPRE_Real *D_q, *D_w;
69 hypre_CSRMatrix *As_FF_diag;
70 hypre_CSRMatrix *As_FF_offd;
71 hypre_CSRMatrix *As_FC_diag;
72 hypre_CSRMatrix *As_FC_offd;
73 hypre_CSRMatrix *W_diag;
74 hypre_CSRMatrix *W_offd;
75
76 HYPRE_Int *As_FF_diag_i;
77 HYPRE_Int *As_FF_offd_i;
78 HYPRE_Int *As_FC_diag_i;
79 HYPRE_Int *As_FC_offd_i;
80 HYPRE_Int *W_diag_i;
81 HYPRE_Int *W_offd_i;
82 HYPRE_Int *W_diag_j;
83 HYPRE_Int *W_offd_j;
84
85 HYPRE_Real *As_FF_diag_data;
86 HYPRE_Real *As_FF_offd_data;
87 HYPRE_Real *As_FC_diag_data;
88 HYPRE_Real *As_FC_offd_data;
89 HYPRE_Real *W_diag_data;
90 HYPRE_Real *W_offd_data;
91
92 HYPRE_BigInt *col_map_offd_P = NULL;
93 HYPRE_BigInt *new_col_map_offd = NULL;
94 HYPRE_Int P_diag_size;
95 HYPRE_Int P_offd_size;
96 HYPRE_Int new_ncols_P_offd;
97 HYPRE_Int num_cols_P_offd;
98 HYPRE_Int *P_marker = NULL;
99 HYPRE_Int *dof_func_offd = NULL;
100
101 /* Loop variables */
102 HYPRE_Int index;
103 HYPRE_Int i, j;
104 HYPRE_Int *cpt_array;
105 HYPRE_Int *start_array;
106 HYPRE_Int *startf_array;
107 HYPRE_Int start, stop, startf, stopf;
108 HYPRE_Int cnt_diag, cnt_offd, row, c_pt;
109
110 /* Definitions */
111 //HYPRE_Real wall_time;
112 HYPRE_Int n_Cpts, n_Fpts;
113 HYPRE_Int num_threads = hypre_NumThreads();
114
115 //if (debug_flag==4) wall_time = time_getWallclockSeconds();
116
117 /* BEGIN */
118 hypre_MPI_Comm_size(comm, &num_procs);
119 hypre_MPI_Comm_rank(comm,&my_id);
120
121 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
122 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
123 n_Cpts = num_cpts_global[1]-num_cpts_global[0];
124
125 hypre_ParCSRMatrixGenerateFFFC(A, CF_marker, num_cpts_global, S, &As_FC, &As_FF);
126
127 As_FC_diag = hypre_ParCSRMatrixDiag(As_FC);
128 As_FC_diag_i = hypre_CSRMatrixI(As_FC_diag);
129 As_FC_diag_data = hypre_CSRMatrixData(As_FC_diag);
130 As_FC_offd = hypre_ParCSRMatrixOffd(As_FC);
131 As_FC_offd_i = hypre_CSRMatrixI(As_FC_offd);
132 As_FC_offd_data = hypre_CSRMatrixData(As_FC_offd);
133 As_FF_diag = hypre_ParCSRMatrixDiag(As_FF);
134 As_FF_diag_i = hypre_CSRMatrixI(As_FF_diag);
135 As_FF_diag_data = hypre_CSRMatrixData(As_FF_diag);
136 As_FF_offd = hypre_ParCSRMatrixOffd(As_FF);
137 As_FF_offd_i = hypre_CSRMatrixI(As_FF_offd);
138 As_FF_offd_data = hypre_CSRMatrixData(As_FF_offd);
139 n_Fpts = hypre_CSRMatrixNumRows(As_FF_diag);
140
141 D_q = hypre_CTAlloc(HYPRE_Real, n_Fpts, HYPRE_MEMORY_HOST);
142 D_w = hypre_CTAlloc(HYPRE_Real, n_Fpts, HYPRE_MEMORY_HOST);
143 cpt_array = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
144 start_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
145 startf_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
146
147 #ifdef HYPRE_USING_OPENMP
148 #pragma omp parallel private(i,j,start,stop,startf,stopf,row)
149 #endif
150 {
151 HYPRE_Int my_thread_num = hypre_GetThreadNum();
152 HYPRE_Real beta, gamma;
153
154 start = (n_fine/num_threads)*my_thread_num;
155 if (my_thread_num == num_threads-1)
156 {
157 stop = n_fine;
158 }
159 else
160 {
161 stop = (n_fine/num_threads)*(my_thread_num+1);
162 }
163 start_array[my_thread_num+1] = stop;
164 for (i=start; i < stop; i++)
165 {
166 if (CF_marker[i] > 0)
167 {
168 cpt_array[my_thread_num]++;
169 }
170 }
171
172 #ifdef HYPRE_USING_OPENMP
173 #pragma omp barrier
174 #endif
175 if (my_thread_num == 0)
176 {
177 for (i=1; i < num_threads; i++)
178 {
179 cpt_array[i] += cpt_array[i-1];
180 }
181 if (num_functions > 1)
182 {
183 HYPRE_Int *int_buf_data = NULL;
184 HYPRE_Int num_sends, startc;
185 HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
186 dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
187 index = 0;
188 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
189 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
190 for (i = 0; i < num_sends; i++)
191 {
192 startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
193 for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
194 {
195 int_buf_data[index++] = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
196 }
197 }
198 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, dof_func_offd);
199 hypre_ParCSRCommHandleDestroy(comm_handle);
200 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
201 }
202 }
203
204 #ifdef HYPRE_USING_OPENMP
205 #pragma omp barrier
206 #endif
207 if (my_thread_num > 0)
208 startf = start - cpt_array[my_thread_num-1];
209 else
210 startf = 0;
211
212 if (my_thread_num < num_threads-1)
213 stopf = stop - cpt_array[my_thread_num];
214 else
215 stopf = n_Fpts;
216
217 startf_array[my_thread_num+1] = stopf;
218
219 /* Create D_q = D_beta */
220 for (i=startf; i < stopf; i++)
221 {
222 for (j=As_FC_diag_i[i]; j < As_FC_diag_i[i+1]; j++)
223 {
224 D_q[i] += As_FC_diag_data[j];
225 }
226 for (j=As_FC_offd_i[i]; j < As_FC_offd_i[i+1]; j++)
227 {
228 D_q[i] += As_FC_offd_data[j];
229 }
230 }
231
232 /* Create D_w = D_alpha + D_gamma */
233 row = startf;
234 for (i=start; i < stop; i++)
235 {
236 if (CF_marker[i] < 0)
237 {
238 if (num_functions > 1)
239 {
240 HYPRE_Int jA, jS, jC;
241 jC = A_diag_i[i];
242 for (j=S_diag_i[i]; j < S_diag_i[i+1]; j++)
243 {
244 jS = S_diag_j[j];
245 jA = A_diag_j[jC];
246 while (jA != jS)
247 {
248 if (dof_func[i] == dof_func[jA])
249 {
250 D_w[row] += A_diag_data[jC++];
251 }
252 else
253 jC++;
254 jA = A_diag_j[jC];
255 }
256 jC++;
257 }
258 for (j=jC; j < A_diag_i[i+1]; j++)
259 {
260 if (dof_func[i] == dof_func[A_diag_j[j]])
261 D_w[row] += A_diag_data[j];
262 }
263 jC = A_offd_i[i];
264 for (j=S_offd_i[i]; j < S_offd_i[i+1]; j++)
265 {
266 jS = S_offd_j[j];
267 jA = A_offd_j[jC];
268 while (jA != jS)
269 {
270 if (dof_func[i] == dof_func_offd[jA])
271 {
272 D_w[row] += A_offd_data[jC++];
273 }
274 else
275 jC++;
276 jA = A_offd_j[jC];
277 }
278 jC++;
279 }
280 for (j=jC; j < A_offd_i[i+1]; j++)
281 {
282 if (dof_func[i] == dof_func_offd[A_offd_j[j]])
283 D_w[row] += A_offd_data[j];
284 }
285 row++;
286 }
287 else
288 {
289 for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++)
290 {
291 D_w[row] += A_diag_data[j];
292 }
293 for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++)
294 {
295 D_w[row] += A_offd_data[j];
296 }
297 for (j=As_FF_diag_i[row]+1; j < As_FF_diag_i[row+1]; j++)
298 {
299 D_w[row] -= As_FF_diag_data[j];
300 }
301 for (j=As_FF_offd_i[row]; j < As_FF_offd_i[row+1]; j++)
302 {
303 D_w[row] -= As_FF_offd_data[j];
304 }
305 D_w[row] -= D_q[row];
306 row++;
307 }
308 }
309 }
310
311 for (i=startf; i<stopf; i++)
312 {
313 j = As_FF_diag_i[i];
314 if (D_w[i]) beta = 1.0/D_w[i];
315 else beta = 1.0;
316 As_FF_diag_data[j] = beta*D_q[i];
317 if (D_q[i]) gamma = -1.0/D_q[i];
318 else gamma = 1.0;
319 for (j=As_FF_diag_i[i]+1; j < As_FF_diag_i[i+1]; j++)
320 As_FF_diag_data[j] *= beta;
321 for (j=As_FF_offd_i[i]; j < As_FF_offd_i[i+1]; j++)
322 As_FF_offd_data[j] *= beta;
323 for (j=As_FC_diag_i[i]; j < As_FC_diag_i[i+1]; j++)
324 As_FC_diag_data[j] *= gamma;
325 for (j=As_FC_offd_i[i]; j < As_FC_offd_i[i+1]; j++)
326 As_FC_offd_data[j] *= gamma;
327 }
328
329 } /* end parallel region */
330
331 W = hypre_ParMatmul(As_FF, As_FC);
332 W_diag = hypre_ParCSRMatrixDiag(W);
333 W_offd = hypre_ParCSRMatrixOffd(W);
334 W_diag_i = hypre_CSRMatrixI(W_diag);
335 W_diag_j = hypre_CSRMatrixJ(W_diag);
336 W_diag_data = hypre_CSRMatrixData(W_diag);
337 W_offd_i = hypre_CSRMatrixI(W_offd);
338 W_offd_j = hypre_CSRMatrixJ(W_offd);
339 W_offd_data = hypre_CSRMatrixData(W_offd);
340 num_cols_P_offd = hypre_CSRMatrixNumCols(W_offd);
341 /*-----------------------------------------------------------------------
342 * Intialize data for P
343 *-----------------------------------------------------------------------*/
344 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
345 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
346
347 P_diag_size = n_Cpts + hypre_CSRMatrixI(W_diag)[n_Fpts];
348 P_offd_size = hypre_CSRMatrixI(W_offd)[n_Fpts];
349
350 if (P_diag_size)
351 {
352 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, memory_location_P);
353 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, memory_location_P);
354 }
355
356 if (P_offd_size)
357 {
358 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, memory_location_P);
359 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, memory_location_P);
360 }
361
362 #ifdef HYPRE_USING_OPENMP
363 #pragma omp parallel private(i,j,start,stop,startf,stopf,c_pt,row,cnt_diag,cnt_offd)
364 #endif
365 {
366 HYPRE_Int my_thread_num = hypre_GetThreadNum();
367 startf = startf_array[my_thread_num];
368 stopf = startf_array[my_thread_num+1];
369 start = start_array[my_thread_num];
370 stop = start_array[my_thread_num+1];
371
372 if (my_thread_num > 0)
373 c_pt = cpt_array[my_thread_num-1];
374 else
375 c_pt = 0;
376 cnt_diag = W_diag_i[startf]+c_pt;
377 cnt_offd = W_offd_i[startf];
378 row = startf;
379 for (i=start; i < stop; i++)
380 {
381 if (CF_marker[i] > 0)
382 {
383 P_diag_j[cnt_diag] = c_pt++;
384 P_diag_data[cnt_diag++] = 1.0;
385 }
386 else
387 {
388 for (j=W_diag_i[row]; j < W_diag_i[row+1]; j++)
389 {
390 P_diag_j[cnt_diag] = W_diag_j[j];
391 P_diag_data[cnt_diag++] = W_diag_data[j];
392 }
393 for (j=W_offd_i[row]; j < W_offd_i[row+1]; j++)
394 {
395 P_offd_j[cnt_offd] = W_offd_j[j];
396 P_offd_data[cnt_offd++] = W_offd_data[j];
397 }
398 row++;
399 }
400 P_diag_i[i+1] = cnt_diag;
401 P_offd_i[i+1] = cnt_offd;
402 }
403
404 } /* end parallel region */
405
406 /*-----------------------------------------------------------------------
407 * Create matrix
408 *-----------------------------------------------------------------------*/
409
410 P = hypre_ParCSRMatrixCreate(comm,
411 hypre_ParCSRMatrixGlobalNumRows(A),
412 total_global_cpts,
413 hypre_ParCSRMatrixColStarts(A),
414 num_cpts_global,
415 num_cols_P_offd,
416 P_diag_i[n_fine],
417 P_offd_i[n_fine]);
418
419 P_diag = hypre_ParCSRMatrixDiag(P);
420 hypre_CSRMatrixData(P_diag) = P_diag_data;
421 hypre_CSRMatrixI(P_diag) = P_diag_i;
422 hypre_CSRMatrixJ(P_diag) = P_diag_j;
423 P_offd = hypre_ParCSRMatrixOffd(P);
424 hypre_CSRMatrixData(P_offd) = P_offd_data;
425 hypre_CSRMatrixI(P_offd) = P_offd_i;
426 hypre_CSRMatrixJ(P_offd) = P_offd_j;
427 hypre_ParCSRMatrixColMapOffd(P) = hypre_ParCSRMatrixColMapOffd(W);
428 hypre_ParCSRMatrixColMapOffd(W) = NULL;
429
430 hypre_CSRMatrixMemoryLocation(P_diag) = memory_location_P;
431 hypre_CSRMatrixMemoryLocation(P_offd) = memory_location_P;
432
433 /* Compress P, removing coefficients smaller than trunc_factor * Max */
434 if (trunc_factor != 0.0 || max_elmts > 0)
435 {
436 HYPRE_Int *map;
437 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
438 P_diag_data = hypre_CSRMatrixData(P_diag);
439 P_diag_i = hypre_CSRMatrixI(P_diag);
440 P_diag_j = hypre_CSRMatrixJ(P_diag);
441 P_offd_data = hypre_CSRMatrixData(P_offd);
442 P_offd_i = hypre_CSRMatrixI(P_offd);
443 P_offd_j = hypre_CSRMatrixJ(P_offd);
444 P_diag_size = P_diag_i[n_fine];
445 P_offd_size = P_offd_i[n_fine];
446
447 col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
448 if (num_cols_P_offd)
449 {
450 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
451 for (i=0; i < P_offd_size; i++)
452 {
453 P_marker[P_offd_j[i]] = 1;
454 }
455
456 new_ncols_P_offd = 0;
457 for (i=0; i < num_cols_P_offd; i++)
458 {
459 if (P_marker[i]) new_ncols_P_offd++;
460 }
461
462 new_col_map_offd = hypre_CTAlloc(HYPRE_BigInt, new_ncols_P_offd, HYPRE_MEMORY_HOST);
463 map = hypre_CTAlloc(HYPRE_Int, new_ncols_P_offd, HYPRE_MEMORY_HOST);
464
465 index = 0;
466 for (i=0; i < num_cols_P_offd; i++)
467 if (P_marker[i])
468 {
469 new_col_map_offd[index] = col_map_offd_P[i];
470 map[index++] = i;
471 }
472 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
473
474
475 #ifdef HYPRE_USING_OPENMP
476 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
477 #endif
478 for (i=0; i < P_offd_size; i++)
479 {
480 P_offd_j[i] = hypre_BinarySearch(map, P_offd_j[i],
481 new_ncols_P_offd);
482 }
483
484 hypre_TFree(col_map_offd_P, HYPRE_MEMORY_HOST);
485 hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
486 hypre_CSRMatrixNumCols(P_offd) = new_ncols_P_offd;
487 hypre_TFree(map, HYPRE_MEMORY_HOST);
488 }
489 }
490
491 hypre_MatvecCommPkgCreate(P);
492
493 *P_ptr = P;
494
495 /* Deallocate memory */
496 hypre_TFree(D_q, HYPRE_MEMORY_HOST);
497 hypre_TFree(D_w, HYPRE_MEMORY_HOST);
498 hypre_TFree(cpt_array, HYPRE_MEMORY_HOST);
499 hypre_TFree(start_array, HYPRE_MEMORY_HOST);
500 hypre_TFree(startf_array, HYPRE_MEMORY_HOST);
501 hypre_TFree(dof_func_offd, HYPRE_MEMORY_HOST);
502 hypre_ParCSRMatrixDestroy(As_FF);
503 hypre_ParCSRMatrixDestroy(As_FC);
504 hypre_ParCSRMatrixDestroy(W);
505
506 return hypre_error_flag;
507 }
508
509 /*-----------------------------------------------------------------------*
510 * Modularized Extended Interpolation
511 *-----------------------------------------------------------------------*/
512 HYPRE_Int
hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRMatrix ** P_ptr)513 hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A,
514 HYPRE_Int *CF_marker,
515 hypre_ParCSRMatrix *S,
516 HYPRE_BigInt *num_cpts_global,
517 HYPRE_Int num_functions,
518 HYPRE_Int *dof_func,
519 HYPRE_Int debug_flag,
520 HYPRE_Real trunc_factor,
521 HYPRE_Int max_elmts,
522 hypre_ParCSRMatrix **P_ptr)
523 {
524 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
525 hypre_GpuProfilingPushRange("ModExtInterp");
526 #endif
527
528 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
529
530 HYPRE_Int ierr = 0;
531
532 if (exec == HYPRE_EXEC_HOST)
533 {
534 ierr = hypre_BoomerAMGBuildModExtInterpHost(A,CF_marker,S,num_cpts_global,num_functions,dof_func,
535 debug_flag,trunc_factor,max_elmts,P_ptr);
536 }
537 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
538 else
539 {
540 ierr = hypre_BoomerAMGBuildExtInterpDevice(A,CF_marker,S,num_cpts_global,1,NULL,
541 debug_flag,trunc_factor,max_elmts,P_ptr);
542 }
543 #endif
544
545 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
546 hypre_GpuProfilingPopRange();
547 #endif
548
549 return ierr;
550 }
551
552
553 /*---------------------------------------------------------------------------
554 * hypre_BoomerAMGBuildModExtPIInterp
555 * Comment:
556 *--------------------------------------------------------------------------*/
557 HYPRE_Int
hypre_BoomerAMGBuildModExtPIInterpHost(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int debug_flag,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRMatrix ** P_ptr)558 hypre_BoomerAMGBuildModExtPIInterpHost(hypre_ParCSRMatrix *A,
559 HYPRE_Int *CF_marker,
560 hypre_ParCSRMatrix *S,
561 HYPRE_BigInt *num_cpts_global,
562 HYPRE_Int debug_flag,
563 HYPRE_Int num_functions,
564 HYPRE_Int *dof_func,
565 HYPRE_Real trunc_factor,
566 HYPRE_Int max_elmts,
567 hypre_ParCSRMatrix **P_ptr)
568 {
569 /* Communication Variables */
570 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
571 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
572 hypre_ParCSRCommHandle *comm_handle = NULL;
573 HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
574
575 HYPRE_Int my_id, num_procs;
576
577 /* Variables to store input variables */
578 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
579 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
580 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
581 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
582
583 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
584 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
585 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
586 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
587
588 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
589 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
590 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
591
592 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
593 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
594 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
595
596 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
597 HYPRE_BigInt total_global_cpts;
598
599 hypre_CSRMatrix *As_FF_ext = NULL;
600 HYPRE_Real *As_FF_ext_data = NULL;
601 HYPRE_Int *As_FF_ext_i = NULL;
602 HYPRE_BigInt *As_FF_ext_j = NULL;
603
604 /* Interpolation matrix P */
605 hypre_ParCSRMatrix *P;
606 hypre_CSRMatrix *P_diag;
607 hypre_CSRMatrix *P_offd;
608
609 HYPRE_Real *P_diag_data = NULL;
610 HYPRE_Int *P_diag_i, *P_diag_j = NULL;
611 HYPRE_Real *P_offd_data = NULL;
612 HYPRE_Int *P_offd_i, *P_offd_j = NULL;
613
614 /* Intermediate matrices */
615 hypre_ParCSRMatrix *As_FF, *As_FC, *W;
616 HYPRE_Real *D_q, *D_w, *D_theta, *D_q_offd = NULL;
617 hypre_CSRMatrix *As_FF_diag;
618 hypre_CSRMatrix *As_FF_offd;
619 hypre_CSRMatrix *As_FC_diag;
620 hypre_CSRMatrix *As_FC_offd;
621 hypre_CSRMatrix *W_diag;
622 hypre_CSRMatrix *W_offd;
623
624 HYPRE_Int *As_FF_diag_i;
625 HYPRE_Int *As_FF_diag_j;
626 HYPRE_Int *As_FF_offd_i;
627 HYPRE_Int *As_FF_offd_j = NULL;
628 HYPRE_Int *As_FC_diag_i;
629 HYPRE_Int *As_FC_offd_i;
630 HYPRE_Int *W_diag_i;
631 HYPRE_Int *W_offd_i;
632 HYPRE_Int *W_diag_j;
633 HYPRE_Int *W_offd_j = NULL;
634
635 HYPRE_Real *As_FF_diag_data;
636 HYPRE_Real *As_FF_offd_data = NULL;
637 HYPRE_Real *As_FC_diag_data;
638 HYPRE_Real *As_FC_offd_data = NULL;
639 HYPRE_Real *W_diag_data;
640 HYPRE_Real *W_offd_data = NULL;
641 HYPRE_Real *buf_data = NULL;
642 HYPRE_Real *tmp_FF_diag_data = NULL;
643
644 HYPRE_BigInt *col_map_offd_P = NULL;
645 HYPRE_BigInt *new_col_map_offd = NULL;
646 HYPRE_BigInt first_index;
647 HYPRE_Int P_diag_size;
648 HYPRE_Int P_offd_size;
649 HYPRE_Int new_ncols_P_offd;
650 HYPRE_Int num_cols_P_offd;
651 HYPRE_Int *P_marker = NULL;
652 HYPRE_Int *dof_func_offd = NULL;
653
654 /* Loop variables */
655 HYPRE_Int index, startc, num_sends;
656 HYPRE_Int i, j, jj, k, kk;
657 HYPRE_Int *cpt_array;
658 HYPRE_Int *start_array;
659 HYPRE_Int *startf_array;
660 HYPRE_Int start, stop, startf, stopf;
661 HYPRE_Int cnt_diag, cnt_offd, row, c_pt;
662 HYPRE_Int num_cols_A_FF_offd;
663 HYPRE_Real value, value1, theta;
664
665 /* Definitions */
666 //HYPRE_Real wall_time;
667 HYPRE_Int n_Cpts, n_Fpts;
668 HYPRE_Int num_threads = hypre_NumThreads();
669
670 //if (debug_flag==4) wall_time = time_getWallclockSeconds();
671
672 /* BEGIN */
673 hypre_MPI_Comm_size(comm, &num_procs);
674 hypre_MPI_Comm_rank(comm,&my_id);
675
676 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
677 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
678 n_Cpts = num_cpts_global[1]-num_cpts_global[0];
679
680 hypre_ParCSRMatrixGenerateFFFC(A, CF_marker, num_cpts_global, S, &As_FC, &As_FF);
681
682 if (num_procs > 1)
683 {
684 As_FF_ext = hypre_ParCSRMatrixExtractBExt(As_FF,As_FF,1);
685 As_FF_ext_i = hypre_CSRMatrixI(As_FF_ext);
686 As_FF_ext_j = hypre_CSRMatrixBigJ(As_FF_ext);
687 As_FF_ext_data = hypre_CSRMatrixData(As_FF_ext);
688 }
689
690 As_FC_diag = hypre_ParCSRMatrixDiag(As_FC);
691 As_FC_diag_i = hypre_CSRMatrixI(As_FC_diag);
692 As_FC_diag_data = hypre_CSRMatrixData(As_FC_diag);
693 As_FC_offd = hypre_ParCSRMatrixOffd(As_FC);
694 As_FC_offd_i = hypre_CSRMatrixI(As_FC_offd);
695 As_FC_offd_data = hypre_CSRMatrixData(As_FC_offd);
696 As_FF_diag = hypre_ParCSRMatrixDiag(As_FF);
697 As_FF_diag_i = hypre_CSRMatrixI(As_FF_diag);
698 As_FF_diag_j = hypre_CSRMatrixJ(As_FF_diag);
699 As_FF_diag_data = hypre_CSRMatrixData(As_FF_diag);
700 As_FF_offd = hypre_ParCSRMatrixOffd(As_FF);
701 As_FF_offd_i = hypre_CSRMatrixI(As_FF_offd);
702 As_FF_offd_j = hypre_CSRMatrixJ(As_FF_offd);
703 As_FF_offd_data = hypre_CSRMatrixData(As_FF_offd);
704 n_Fpts = hypre_CSRMatrixNumRows(As_FF_diag);
705 num_cols_A_FF_offd = hypre_CSRMatrixNumCols(As_FF_offd);
706 first_index = hypre_ParCSRMatrixRowStarts(As_FF)[0];
707 tmp_FF_diag_data = hypre_CTAlloc(HYPRE_Real, As_FF_diag_i[n_Fpts], HYPRE_MEMORY_HOST);
708
709 D_q = hypre_CTAlloc(HYPRE_Real, n_Fpts, HYPRE_MEMORY_HOST);
710 D_theta = hypre_CTAlloc(HYPRE_Real, n_Fpts, HYPRE_MEMORY_HOST);
711 D_w = hypre_CTAlloc(HYPRE_Real, n_Fpts, HYPRE_MEMORY_HOST);
712 cpt_array = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
713 start_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
714 startf_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
715
716 #ifdef HYPRE_USING_OPENMP
717 #pragma omp parallel private(i,j,jj,k,kk,start,stop,startf,stopf,row,theta,value,value1)
718 #endif
719 {
720 HYPRE_Int my_thread_num = hypre_GetThreadNum();
721
722 start = (n_fine/num_threads)*my_thread_num;
723 if (my_thread_num == num_threads-1)
724 {
725 stop = n_fine;
726 }
727 else
728 {
729 stop = (n_fine/num_threads)*(my_thread_num+1);
730 }
731 start_array[my_thread_num+1] = stop;
732 for (i=start; i < stop; i++)
733 {
734 if (CF_marker[i] > 0)
735 {
736 cpt_array[my_thread_num]++;
737 }
738 }
739
740 #ifdef HYPRE_USING_OPENMP
741 #pragma omp barrier
742 #endif
743 if (my_thread_num == 0)
744 {
745 for (i=1; i < num_threads; i++)
746 {
747 cpt_array[i] += cpt_array[i-1];
748 }
749 }
750 #ifdef HYPRE_USING_OPENMP
751 #pragma omp barrier
752 #endif
753 if (my_thread_num > 0)
754 startf = start - cpt_array[my_thread_num-1];
755 else
756 startf = 0;
757
758 if (my_thread_num < num_threads-1)
759 stopf = stop - cpt_array[my_thread_num];
760 else
761 stopf = n_Fpts;
762
763 startf_array[my_thread_num+1] = stopf;
764
765 for (i=startf; i < stopf; i++)
766 {
767 for (j=As_FC_diag_i[i]; j < As_FC_diag_i[i+1]; j++)
768 {
769 D_q[i] += As_FC_diag_data[j];
770 }
771 for (j=As_FC_offd_i[i]; j < As_FC_offd_i[i+1]; j++)
772 {
773 D_q[i] += As_FC_offd_data[j];
774 }
775 }
776
777 for (j = As_FF_diag_i[startf]; j < As_FF_diag_i[stopf]; j++)
778 {
779 tmp_FF_diag_data[j] = As_FF_diag_data[j];
780 }
781
782
783 #ifdef HYPRE_USING_OPENMP
784 #pragma omp barrier
785 #endif
786 if (my_thread_num == 0)
787 {
788 if (num_cols_A_FF_offd)
789 {
790 D_q_offd = hypre_CTAlloc(HYPRE_Real, num_cols_A_FF_offd, HYPRE_MEMORY_HOST);
791 }
792 index = 0;
793 comm_pkg = hypre_ParCSRMatrixCommPkg(As_FF);
794 if (!comm_pkg)
795 {
796 hypre_MatvecCommPkgCreate(As_FF);
797 comm_pkg = hypre_ParCSRMatrixCommPkg(As_FF);
798 }
799 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
800 buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
801 for (i = 0; i < num_sends; i++)
802 {
803 startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
804 for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
805 {
806 buf_data[index++] = D_q[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
807 }
808 }
809
810 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, buf_data, D_q_offd);
811 hypre_ParCSRCommHandleDestroy(comm_handle);
812
813 if (num_functions > 1)
814 {
815 HYPRE_Int *int_buf_data = NULL;
816 HYPRE_Int num_sends, startc;
817 HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
818 dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
819 index = 0;
820 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
821 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
822 for (i = 0; i < num_sends; i++)
823 {
824 startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
825 for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
826 {
827 int_buf_data[index++] = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
828 }
829 }
830 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, dof_func_offd);
831 hypre_ParCSRCommHandleDestroy(comm_handle);
832 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
833 }
834 }
835
836 #ifdef HYPRE_USING_OPENMP
837 #pragma omp barrier
838 #endif
839
840 row = startf;
841 for (i=start; i < stop; i++)
842 {
843 HYPRE_Int jA, jC, jS;
844 if (CF_marker[i] < 0)
845 {
846 if (num_functions > 1)
847 {
848 jC = A_diag_i[i];
849 for (j=S_diag_i[i]; j < S_diag_i[i+1]; j++)
850 {
851 jS = S_diag_j[j];
852 jA = A_diag_j[jC];
853 while (jA != jS)
854 {
855 if (dof_func[i] == dof_func[jA])
856 {
857 D_w[row] += A_diag_data[jC++];
858 }
859 else
860 jC++;
861 jA = A_diag_j[jC];
862 }
863 jC++;
864 }
865 for (j=jC; j < A_diag_i[i+1]; j++)
866 {
867 if (dof_func[i] == dof_func[A_diag_j[j]])
868 D_w[row] += A_diag_data[j];
869 }
870 jC = A_offd_i[i];
871 for (j=S_offd_i[i]; j < S_offd_i[i+1]; j++)
872 {
873 jS = S_offd_j[j];
874 jA = A_offd_j[jC];
875 while (jA != jS)
876 {
877 if (dof_func[i] == dof_func_offd[jA])
878 {
879 D_w[row] += A_offd_data[jC++];
880 }
881 else
882 jC++;
883 jA = A_offd_j[jC];
884 }
885 jC++;
886 }
887 for (j=jC; j < A_offd_i[i+1]; j++)
888 {
889 if (dof_func[i] == dof_func_offd[A_offd_j[j]])
890 D_w[row] += A_offd_data[j];
891 }
892 row++;
893 }
894 else
895 {
896 for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++)
897 {
898 D_w[row] += A_diag_data[j];
899 }
900 for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++)
901 {
902 D_w[row] += A_offd_data[j];
903 }
904 for (j=As_FF_diag_i[row]+1; j < As_FF_diag_i[row+1]; j++)
905 {
906 D_w[row] -= As_FF_diag_data[j];
907 }
908 for (j=As_FF_offd_i[row]; j < As_FF_offd_i[row+1]; j++)
909 {
910 D_w[row] -= As_FF_offd_data[j];
911 }
912 D_w[row] -= D_q[row];
913 row++;
914 }
915 }
916 }
917
918 for (i=startf; i<stopf; i++)
919 {
920 for (j = As_FF_diag_i[i]+1; j < As_FF_diag_i[i+1]; j++)
921 {
922 jj = As_FF_diag_j[j];
923 value = D_q[jj];
924 for (k = As_FF_diag_i[jj]+1; k < As_FF_diag_i[jj+1]; k++)
925 {
926 kk = As_FF_diag_j[k];
927 if (kk == i)
928 {
929 value1 = tmp_FF_diag_data[k];
930 value += value1;
931 D_theta[i] += As_FF_diag_data[j]*value1/value;
932 break;
933 }
934 }
935 As_FF_diag_data[j] /= value;
936 }
937 for (j = As_FF_offd_i[i]; j < As_FF_offd_i[i+1]; j++)
938 {
939 jj = As_FF_offd_j[j];
940 value = D_q_offd[jj];
941 for (k = As_FF_ext_i[jj]; k < As_FF_ext_i[jj+1]; k++)
942 {
943 kk = (HYPRE_Int)(As_FF_ext_j[k] - first_index);
944 if (kk == i)
945 {
946 value1 = As_FF_ext_data[k];
947 value += value1;
948 D_theta[i] += As_FF_offd_data[j]*value1/value;
949 break;
950 }
951 }
952 As_FF_offd_data[j] /= value;
953 }
954 As_FF_diag_data[As_FF_diag_i[i]] = 1.0;
955 }
956
957 #ifdef HYPRE_USING_OPENMP
958 #pragma omp barrier
959 #endif
960
961 for (i=startf; i<stopf; i++)
962 {
963 theta = (D_theta[i]+D_w[i]);
964 if (theta)
965 {
966 theta = -1.0/theta;
967 for (j=As_FF_diag_i[i]; j < As_FF_diag_i[i+1]; j++)
968 As_FF_diag_data[j] *= theta;
969 for (j=As_FF_offd_i[i]; j < As_FF_offd_i[i+1]; j++)
970 As_FF_offd_data[j] *= theta;
971 }
972 }
973
974 } /* end parallel region */
975
976 W = hypre_ParMatmul(As_FF, As_FC);
977 W_diag = hypre_ParCSRMatrixDiag(W);
978 W_offd = hypre_ParCSRMatrixOffd(W);
979 W_diag_i = hypre_CSRMatrixI(W_diag);
980 W_diag_j = hypre_CSRMatrixJ(W_diag);
981 W_diag_data = hypre_CSRMatrixData(W_diag);
982 W_offd_i = hypre_CSRMatrixI(W_offd);
983 W_offd_j = hypre_CSRMatrixJ(W_offd);
984 W_offd_data = hypre_CSRMatrixData(W_offd);
985 num_cols_P_offd = hypre_CSRMatrixNumCols(W_offd);
986 /*-----------------------------------------------------------------------
987 * Intialize data for P
988 *-----------------------------------------------------------------------*/
989 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
990 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
991
992 P_diag_size = n_Cpts + hypre_CSRMatrixI(W_diag)[n_Fpts];
993 P_offd_size = hypre_CSRMatrixI(W_offd)[n_Fpts];
994
995 if (P_diag_size)
996 {
997 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, memory_location_P);
998 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, memory_location_P);
999 }
1000
1001 if (P_offd_size)
1002 {
1003 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, memory_location_P);
1004 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, memory_location_P);
1005 }
1006
1007 #ifdef HYPRE_USING_OPENMP
1008 #pragma omp parallel private(i,j,start,stop,startf,stopf,c_pt,row,cnt_diag,cnt_offd)
1009 #endif
1010 {
1011 HYPRE_Int my_thread_num = hypre_GetThreadNum();
1012 startf = startf_array[my_thread_num];
1013 stopf = startf_array[my_thread_num+1];
1014 start = start_array[my_thread_num];
1015 stop = start_array[my_thread_num+1];
1016
1017 if (my_thread_num > 0)
1018 c_pt = cpt_array[my_thread_num-1];
1019 else
1020 c_pt = 0;
1021 cnt_diag = W_diag_i[startf]+c_pt;
1022 cnt_offd = W_offd_i[startf];
1023 row = startf;
1024 for (i=start; i < stop; i++)
1025 {
1026 if (CF_marker[i] > 0)
1027 {
1028 P_diag_j[cnt_diag] = c_pt++;
1029 P_diag_data[cnt_diag++] = 1.0;
1030 }
1031 else
1032 {
1033 for (j=W_diag_i[row]; j < W_diag_i[row+1]; j++)
1034 {
1035 P_diag_j[cnt_diag] = W_diag_j[j];
1036 P_diag_data[cnt_diag++] = W_diag_data[j];
1037 }
1038 for (j=W_offd_i[row]; j < W_offd_i[row+1]; j++)
1039 {
1040 P_offd_j[cnt_offd] = W_offd_j[j];
1041 P_offd_data[cnt_offd++] = W_offd_data[j];
1042 }
1043 row++;
1044 }
1045 P_diag_i[i+1] = cnt_diag;
1046 P_offd_i[i+1] = cnt_offd;
1047 }
1048
1049 } /* end parallel region */
1050
1051 /*-----------------------------------------------------------------------
1052 * Create matrix
1053 *-----------------------------------------------------------------------*/
1054
1055 P = hypre_ParCSRMatrixCreate(comm,
1056 hypre_ParCSRMatrixGlobalNumRows(A),
1057 total_global_cpts,
1058 hypre_ParCSRMatrixColStarts(A),
1059 num_cpts_global,
1060 num_cols_P_offd,
1061 P_diag_i[n_fine],
1062 P_offd_i[n_fine]);
1063
1064 P_diag = hypre_ParCSRMatrixDiag(P);
1065 hypre_CSRMatrixData(P_diag) = P_diag_data;
1066 hypre_CSRMatrixI(P_diag) = P_diag_i;
1067 hypre_CSRMatrixJ(P_diag) = P_diag_j;
1068 P_offd = hypre_ParCSRMatrixOffd(P);
1069 hypre_CSRMatrixData(P_offd) = P_offd_data;
1070 hypre_CSRMatrixI(P_offd) = P_offd_i;
1071 hypre_CSRMatrixJ(P_offd) = P_offd_j;
1072 hypre_ParCSRMatrixColMapOffd(P) = hypre_ParCSRMatrixColMapOffd(W);
1073 hypre_ParCSRMatrixColMapOffd(W) = NULL;
1074
1075 hypre_CSRMatrixMemoryLocation(P_diag) = memory_location_P;
1076 hypre_CSRMatrixMemoryLocation(P_offd) = memory_location_P;
1077
1078 /* Compress P, removing coefficients smaller than trunc_factor * Max */
1079 if (trunc_factor != 0.0 || max_elmts > 0)
1080 {
1081 HYPRE_Int *map;
1082 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
1083 P_diag_data = hypre_CSRMatrixData(P_diag);
1084 P_diag_i = hypre_CSRMatrixI(P_diag);
1085 P_diag_j = hypre_CSRMatrixJ(P_diag);
1086 P_offd_data = hypre_CSRMatrixData(P_offd);
1087 P_offd_i = hypre_CSRMatrixI(P_offd);
1088 P_offd_j = hypre_CSRMatrixJ(P_offd);
1089 P_diag_size = P_diag_i[n_fine];
1090 P_offd_size = P_offd_i[n_fine];
1091
1092 col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
1093 if (num_cols_P_offd)
1094 {
1095 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
1096 for (i=0; i < P_offd_size; i++)
1097 P_marker[P_offd_j[i]] = 1;
1098
1099 new_ncols_P_offd = 0;
1100 for (i=0; i < num_cols_P_offd; i++)
1101 if (P_marker[i]) new_ncols_P_offd++;
1102
1103 new_col_map_offd = hypre_CTAlloc(HYPRE_BigInt, new_ncols_P_offd, HYPRE_MEMORY_HOST);
1104 map = hypre_CTAlloc(HYPRE_Int, new_ncols_P_offd, HYPRE_MEMORY_HOST);
1105
1106 index = 0;
1107 for (i=0; i < num_cols_P_offd; i++)
1108 if (P_marker[i])
1109 {
1110 new_col_map_offd[index] = col_map_offd_P[i];
1111 map[index++] = i;
1112 }
1113 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
1114
1115
1116 #ifdef HYPRE_USING_OPENMP
1117 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
1118 #endif
1119 for (i=0; i < P_offd_size; i++)
1120 {
1121 P_offd_j[i] = hypre_BinarySearch(map, P_offd_j[i],
1122 new_ncols_P_offd);
1123 }
1124 hypre_TFree(col_map_offd_P, HYPRE_MEMORY_HOST);
1125 hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
1126 hypre_CSRMatrixNumCols(P_offd) = new_ncols_P_offd;
1127 hypre_TFree(map, HYPRE_MEMORY_HOST);
1128 }
1129 }
1130
1131 hypre_MatvecCommPkgCreate(P);
1132
1133 *P_ptr = P;
1134
1135 /* Deallocate memory */
1136 hypre_TFree(D_q, HYPRE_MEMORY_HOST);
1137 hypre_TFree(D_q_offd, HYPRE_MEMORY_HOST);
1138 hypre_TFree(D_w, HYPRE_MEMORY_HOST);
1139 hypre_TFree(D_theta, HYPRE_MEMORY_HOST);
1140 hypre_TFree(dof_func_offd, HYPRE_MEMORY_HOST);
1141 hypre_TFree(cpt_array, HYPRE_MEMORY_HOST);
1142 hypre_TFree(start_array, HYPRE_MEMORY_HOST);
1143 hypre_TFree(startf_array, HYPRE_MEMORY_HOST);
1144 hypre_TFree(buf_data, HYPRE_MEMORY_HOST);
1145 hypre_TFree(tmp_FF_diag_data, HYPRE_MEMORY_HOST);
1146 hypre_ParCSRMatrixDestroy(As_FF);
1147 hypre_ParCSRMatrixDestroy(As_FC);
1148 hypre_ParCSRMatrixDestroy(W);
1149 hypre_CSRMatrixDestroy(As_FF_ext);
1150
1151 return hypre_error_flag;
1152 }
1153
1154 /*-----------------------------------------------------------------------*
1155 * Modularized Extended+i Interpolation
1156 *-----------------------------------------------------------------------*/
1157 HYPRE_Int
hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRMatrix ** P_ptr)1158 hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A,
1159 HYPRE_Int *CF_marker,
1160 hypre_ParCSRMatrix *S,
1161 HYPRE_BigInt *num_cpts_global,
1162 HYPRE_Int num_functions,
1163 HYPRE_Int *dof_func,
1164 HYPRE_Int debug_flag,
1165 HYPRE_Real trunc_factor,
1166 HYPRE_Int max_elmts,
1167 hypre_ParCSRMatrix **P_ptr)
1168 {
1169 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1170 hypre_GpuProfilingPushRange("ModExtPIInterp");
1171 #endif
1172
1173 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
1174
1175 HYPRE_Int ierr = 0;
1176
1177 if (exec == HYPRE_EXEC_HOST)
1178 {
1179 ierr = hypre_BoomerAMGBuildModExtPIInterpHost(A, CF_marker, S, num_cpts_global,
1180 debug_flag, num_functions, dof_func,
1181 trunc_factor, max_elmts, P_ptr);
1182 }
1183 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1184 else
1185 {
1186 ierr = hypre_BoomerAMGBuildExtPIInterpDevice(A, CF_marker, S, num_cpts_global, 1, NULL,
1187 debug_flag, trunc_factor, max_elmts, P_ptr);
1188 }
1189 #endif
1190
1191 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1192 hypre_GpuProfilingPopRange();
1193 #endif
1194
1195 return ierr;
1196 }
1197
1198 /*---------------------------------------------------------------------------
1199 * hypre_BoomerAMGBuildModExtPEInterp
1200 * Comment:
1201 *--------------------------------------------------------------------------*/
1202 HYPRE_Int
hypre_BoomerAMGBuildModExtPEInterpHost(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRMatrix ** P_ptr)1203 hypre_BoomerAMGBuildModExtPEInterpHost(hypre_ParCSRMatrix *A,
1204 HYPRE_Int *CF_marker,
1205 hypre_ParCSRMatrix *S,
1206 HYPRE_BigInt *num_cpts_global,
1207 HYPRE_Int num_functions,
1208 HYPRE_Int *dof_func,
1209 HYPRE_Int debug_flag,
1210 HYPRE_Real trunc_factor,
1211 HYPRE_Int max_elmts,
1212 hypre_ParCSRMatrix **P_ptr)
1213 {
1214 /* Communication Variables */
1215 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
1216 HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
1217 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1218 hypre_ParCSRCommHandle *comm_handle = NULL;
1219
1220 HYPRE_Int my_id, num_procs;
1221
1222 /* Variables to store input variables */
1223 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1224 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
1225 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
1226 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
1227
1228 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1229 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
1230 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
1231 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
1232
1233 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
1234 HYPRE_Int *S_diag_j = hypre_CSRMatrixJ(S_diag);
1235 HYPRE_Int *S_diag_i = hypre_CSRMatrixI(S_diag);
1236
1237 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
1238 HYPRE_Int *S_offd_j = hypre_CSRMatrixJ(S_offd);
1239 HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
1240
1241 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
1242 HYPRE_BigInt total_global_cpts;
1243
1244 /* Interpolation matrix P */
1245 hypre_ParCSRMatrix *P;
1246 hypre_CSRMatrix *P_diag;
1247 hypre_CSRMatrix *P_offd;
1248
1249 HYPRE_Real *P_diag_data = NULL;
1250 HYPRE_Int *P_diag_i, *P_diag_j = NULL;
1251 HYPRE_Real *P_offd_data = NULL;
1252 HYPRE_Int *P_offd_i, *P_offd_j = NULL;
1253
1254 /* Intermediate matrices */
1255 hypre_ParCSRMatrix *As_FF, *As_FC, *W;
1256 HYPRE_Real *D_beta, *D_w, *D_lambda, *D_tmp, *D_tau, *D_tmp_offd = NULL;
1257 hypre_CSRMatrix *As_FF_diag;
1258 hypre_CSRMatrix *As_FF_offd;
1259 hypre_CSRMatrix *As_FC_diag;
1260 hypre_CSRMatrix *As_FC_offd;
1261 hypre_CSRMatrix *W_diag;
1262 hypre_CSRMatrix *W_offd;
1263
1264 HYPRE_Int *As_FF_diag_i;
1265 HYPRE_Int *As_FF_diag_j;
1266 HYPRE_Int *As_FF_offd_i;
1267 HYPRE_Int *As_FF_offd_j;
1268 HYPRE_Int *As_FC_diag_i;
1269 HYPRE_Int *As_FC_offd_i;
1270 HYPRE_Int *W_diag_i;
1271 HYPRE_Int *W_offd_i;
1272 HYPRE_Int *W_diag_j;
1273 HYPRE_Int *W_offd_j = NULL;
1274
1275 HYPRE_Real *As_FF_diag_data;
1276 HYPRE_Real *As_FF_offd_data = NULL;
1277 HYPRE_Real *As_FC_diag_data;
1278 HYPRE_Real *As_FC_offd_data = NULL;
1279 HYPRE_Real *W_diag_data;
1280 HYPRE_Real *W_offd_data = NULL;
1281 HYPRE_Real *buf_data = NULL;
1282
1283 HYPRE_BigInt *col_map_offd_P = NULL;
1284 HYPRE_BigInt *new_col_map_offd = NULL;
1285 HYPRE_Int P_diag_size;
1286 HYPRE_Int P_offd_size;
1287 HYPRE_Int new_ncols_P_offd;
1288 HYPRE_Int num_cols_P_offd;
1289 HYPRE_Int *P_marker = NULL;
1290 HYPRE_Int *dof_func_offd = NULL;
1291
1292 /* Loop variables */
1293 HYPRE_Int index, startc, num_sends;
1294 HYPRE_Int i, j;
1295 HYPRE_Int *cpt_array;
1296 HYPRE_Int *start_array;
1297 HYPRE_Int *startf_array;
1298 HYPRE_Int start, stop, startf, stopf;
1299 HYPRE_Int cnt_diag, cnt_offd, row, c_pt;
1300 HYPRE_Int num_cols_A_FF_offd;
1301 HYPRE_Real value, theta;
1302
1303 /* Definitions */
1304 //HYPRE_Real wall_time;
1305 HYPRE_Int n_Cpts, n_Fpts;
1306 HYPRE_Int num_threads = hypre_NumThreads();
1307
1308 //if (debug_flag==4) wall_time = time_getWallclockSeconds();
1309
1310 /* BEGIN */
1311 hypre_MPI_Comm_size(comm, &num_procs);
1312 hypre_MPI_Comm_rank(comm,&my_id);
1313
1314 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
1315 hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs-1, comm);
1316 n_Cpts = num_cpts_global[1]-num_cpts_global[0];
1317
1318 hypre_ParCSRMatrixGenerateFFFC(A, CF_marker, num_cpts_global, S, &As_FC, &As_FF);
1319
1320 As_FC_diag = hypre_ParCSRMatrixDiag(As_FC);
1321 As_FC_diag_i = hypre_CSRMatrixI(As_FC_diag);
1322 As_FC_diag_data = hypre_CSRMatrixData(As_FC_diag);
1323 As_FC_offd = hypre_ParCSRMatrixOffd(As_FC);
1324 As_FC_offd_i = hypre_CSRMatrixI(As_FC_offd);
1325 As_FC_offd_data = hypre_CSRMatrixData(As_FC_offd);
1326 As_FF_diag = hypre_ParCSRMatrixDiag(As_FF);
1327 As_FF_diag_i = hypre_CSRMatrixI(As_FF_diag);
1328 As_FF_diag_j = hypre_CSRMatrixJ(As_FF_diag);
1329 As_FF_diag_data = hypre_CSRMatrixData(As_FF_diag);
1330 As_FF_offd = hypre_ParCSRMatrixOffd(As_FF);
1331 As_FF_offd_i = hypre_CSRMatrixI(As_FF_offd);
1332 As_FF_offd_j = hypre_CSRMatrixJ(As_FF_offd);
1333 As_FF_offd_data = hypre_CSRMatrixData(As_FF_offd);
1334 n_Fpts = hypre_CSRMatrixNumRows(As_FF_diag);
1335 num_cols_A_FF_offd = hypre_CSRMatrixNumCols(As_FF_offd);
1336
1337 D_beta = hypre_CTAlloc(HYPRE_Real, n_Fpts, HYPRE_MEMORY_HOST);
1338 D_lambda = hypre_CTAlloc(HYPRE_Real, n_Fpts, HYPRE_MEMORY_HOST);
1339 D_tmp = hypre_CTAlloc(HYPRE_Real, n_Fpts, HYPRE_MEMORY_HOST);
1340 D_tau = hypre_CTAlloc(HYPRE_Real, n_Fpts, HYPRE_MEMORY_HOST);
1341 D_w = hypre_CTAlloc(HYPRE_Real, n_Fpts, HYPRE_MEMORY_HOST);
1342 cpt_array = hypre_CTAlloc(HYPRE_Int, num_threads, HYPRE_MEMORY_HOST);
1343 start_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
1344 startf_array = hypre_CTAlloc(HYPRE_Int, num_threads+1, HYPRE_MEMORY_HOST);
1345
1346 #ifdef HYPRE_USING_OPENMP
1347 #pragma omp parallel private(i,j,start,stop,startf,stopf,row,theta,value)
1348 #endif
1349 {
1350 HYPRE_Int my_thread_num = hypre_GetThreadNum();
1351
1352 start = (n_fine/num_threads)*my_thread_num;
1353 if (my_thread_num == num_threads-1)
1354 {
1355 stop = n_fine;
1356 }
1357 else
1358 {
1359 stop = (n_fine/num_threads)*(my_thread_num+1);
1360 }
1361 start_array[my_thread_num+1] = stop;
1362 for (i=start; i < stop; i++)
1363 {
1364 if (CF_marker[i] > 0)
1365 {
1366 cpt_array[my_thread_num]++;
1367 }
1368 }
1369
1370 #ifdef HYPRE_USING_OPENMP
1371 #pragma omp barrier
1372 #endif
1373 if (my_thread_num == 0)
1374 {
1375 for (i=1; i < num_threads; i++)
1376 {
1377 cpt_array[i] += cpt_array[i-1];
1378 }
1379 if (num_functions > 1)
1380 {
1381 HYPRE_Int *int_buf_data = NULL;
1382 HYPRE_Int num_sends, startc;
1383 HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
1384 dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
1385 index = 0;
1386 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1387 int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
1388 for (i = 0; i < num_sends; i++)
1389 {
1390 startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1391 for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1392 {
1393 int_buf_data[index++] = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1394 }
1395 }
1396 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, dof_func_offd);
1397 hypre_ParCSRCommHandleDestroy(comm_handle);
1398 hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
1399 }
1400 }
1401 #ifdef HYPRE_USING_OPENMP
1402 #pragma omp barrier
1403 #endif
1404 if (my_thread_num > 0)
1405 startf = start - cpt_array[my_thread_num-1];
1406 else
1407 startf = 0;
1408
1409 if (my_thread_num < num_threads-1)
1410 stopf = stop - cpt_array[my_thread_num];
1411 else
1412 stopf = n_Fpts;
1413
1414 startf_array[my_thread_num+1] = stopf;
1415
1416 for (i=startf; i < stopf; i++)
1417 {
1418 HYPRE_Real number;
1419 for (j=As_FF_diag_i[i]+1; j < As_FF_diag_i[i+1]; j++)
1420 {
1421 D_lambda[i] += As_FF_diag_data[j];
1422 }
1423 for (j=As_FF_offd_i[i]; j < As_FF_offd_i[i+1]; j++)
1424 {
1425 D_lambda[i] += As_FF_offd_data[j];
1426 }
1427 number = (HYPRE_Real)(As_FF_diag_i[i+1]-As_FF_diag_i[i]-1+As_FF_offd_i[i+1]-As_FF_offd_i[i]);
1428 if (number) D_lambda[i] /= number;
1429 for (j=As_FC_diag_i[i]; j < As_FC_diag_i[i+1]; j++)
1430 {
1431 D_beta[i] += As_FC_diag_data[j];
1432 }
1433 for (j=As_FC_offd_i[i]; j < As_FC_offd_i[i+1]; j++)
1434 {
1435 D_beta[i] += As_FC_offd_data[j];
1436 }
1437 if (D_lambda[i]+D_beta[i]) D_tmp[i] = D_lambda[i]/(D_beta[i]+D_lambda[i]);
1438 }
1439
1440
1441 #ifdef HYPRE_USING_OPENMP
1442 #pragma omp barrier
1443 #endif
1444 if (my_thread_num == 0)
1445 {
1446 if (num_cols_A_FF_offd)
1447 {
1448 D_tmp_offd = hypre_CTAlloc(HYPRE_Real, num_cols_A_FF_offd, HYPRE_MEMORY_HOST);
1449 }
1450 index = 0;
1451 comm_pkg = hypre_ParCSRMatrixCommPkg(As_FF);
1452 if (!comm_pkg)
1453 {
1454 hypre_MatvecCommPkgCreate(As_FF);
1455 comm_pkg = hypre_ParCSRMatrixCommPkg(As_FF);
1456 }
1457 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1458 buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST);
1459 for (i = 0; i < num_sends; i++)
1460 {
1461 startc = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1462 for (j = startc; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1463 {
1464 buf_data[index++] = D_tmp[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1465 }
1466 }
1467
1468 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, buf_data, D_tmp_offd);
1469 hypre_ParCSRCommHandleDestroy(comm_handle);
1470 }
1471
1472 #ifdef HYPRE_USING_OPENMP
1473 #pragma omp barrier
1474 #endif
1475
1476 row = startf;
1477 for (i=start; i < stop; i++)
1478 {
1479 if (CF_marker[i] < 0)
1480 {
1481 if (num_functions > 1)
1482 {
1483 HYPRE_Int jA, jC, jS;
1484 jC = A_diag_i[i];
1485 for (j=S_diag_i[i]; j < S_diag_i[i+1]; j++)
1486 {
1487 jS = S_diag_j[j];
1488 jA = A_diag_j[jC];
1489 while (jA != jS)
1490 {
1491 if (dof_func[i] == dof_func[jA])
1492 {
1493 D_w[row] += A_diag_data[jC++];
1494 }
1495 else
1496 jC++;
1497 jA = A_diag_j[jC];
1498 }
1499 jC++;
1500 }
1501 for (j=jC; j < A_diag_i[i+1]; j++)
1502 {
1503 if (dof_func[i] == dof_func[A_diag_j[j]])
1504 D_w[row] += A_diag_data[j];
1505 }
1506 jC = A_offd_i[i];
1507 for (j=S_offd_i[i]; j < S_offd_i[i+1]; j++)
1508 {
1509 jS = S_offd_j[j];
1510 jA = A_offd_j[jC];
1511 while (jA != jS)
1512 {
1513 if (dof_func[i] == dof_func_offd[jA])
1514 {
1515 D_w[row] += A_offd_data[jC++];
1516 }
1517 else
1518 jC++;
1519 jA = A_offd_j[jC];
1520 }
1521 jC++;
1522 }
1523 for (j=jC; j < A_offd_i[i+1]; j++)
1524 {
1525 if (dof_func[i] == dof_func_offd[A_offd_j[j]])
1526 D_w[row] += A_offd_data[j];
1527 }
1528 row++;
1529 }
1530 else
1531 {
1532 for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++)
1533 {
1534 D_w[row] += A_diag_data[j];
1535 }
1536 for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++)
1537 {
1538 D_w[row] += A_offd_data[j];
1539 }
1540 for (j=As_FF_diag_i[row]+1; j < As_FF_diag_i[row+1]; j++)
1541 {
1542 D_w[row] -= As_FF_diag_data[j];
1543 }
1544 for (j=As_FF_offd_i[row]; j < As_FF_offd_i[row+1]; j++)
1545 {
1546 D_w[row] -= As_FF_offd_data[j];
1547 }
1548 D_w[row] -= D_beta[row];
1549 row++;
1550 }
1551 }
1552 }
1553
1554 for (i=startf; i<stopf; i++)
1555 {
1556 for (j=As_FF_diag_i[i]+1; j < As_FF_diag_i[i+1]; j++)
1557 {
1558 index = As_FF_diag_j[j];
1559 D_tau[i] += As_FF_diag_data[j]*D_tmp[index];
1560 }
1561 for (j=As_FF_offd_i[i]; j < As_FF_offd_i[i+1]; j++)
1562 {
1563 index = As_FF_offd_j[j];
1564 D_tau[i] += As_FF_offd_data[j]*D_tmp_offd[index];
1565 }
1566 }
1567 for (i=startf; i<stopf; i++)
1568 {
1569 value = D_w[i]+D_tau[i];
1570 if (value) value = -1.0/value;
1571 theta = D_beta[i]+D_lambda[i];
1572 As_FF_diag_data[As_FF_diag_i[i]] = value*theta;
1573 if (theta) theta = 1.0/theta;
1574 for (j = As_FF_diag_i[i]+1; j < As_FF_diag_i[i+1]; j++)
1575 {
1576 As_FF_diag_data[j] *= value;
1577 }
1578 for (j = As_FF_offd_i[i]; j < As_FF_offd_i[i+1]; j++)
1579 {
1580 As_FF_offd_data[j] *= value;
1581 }
1582 for (j = As_FC_diag_i[i]; j < As_FC_diag_i[i+1]; j++)
1583 {
1584 As_FC_diag_data[j] *= theta;
1585 }
1586 for (j = As_FC_offd_i[i]; j < As_FC_offd_i[i+1]; j++)
1587 {
1588 As_FC_offd_data[j] *= theta;
1589 }
1590 }
1591
1592 } /* end parallel region */
1593
1594 W = hypre_ParMatmul(As_FF, As_FC);
1595 W_diag = hypre_ParCSRMatrixDiag(W);
1596 W_offd = hypre_ParCSRMatrixOffd(W);
1597 W_diag_i = hypre_CSRMatrixI(W_diag);
1598 W_diag_j = hypre_CSRMatrixJ(W_diag);
1599 W_diag_data = hypre_CSRMatrixData(W_diag);
1600 W_offd_i = hypre_CSRMatrixI(W_offd);
1601 W_offd_j = hypre_CSRMatrixJ(W_offd);
1602 W_offd_data = hypre_CSRMatrixData(W_offd);
1603 num_cols_P_offd = hypre_CSRMatrixNumCols(W_offd);
1604 /*-----------------------------------------------------------------------
1605 * Intialize data for P
1606 *-----------------------------------------------------------------------*/
1607 P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
1608 P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
1609
1610 P_diag_size = n_Cpts + hypre_CSRMatrixI(W_diag)[n_Fpts];
1611 P_offd_size = hypre_CSRMatrixI(W_offd)[n_Fpts];
1612
1613 if (P_diag_size)
1614 {
1615 P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, memory_location_P);
1616 P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, memory_location_P);
1617 }
1618
1619 if (P_offd_size)
1620 {
1621 P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, memory_location_P);
1622 P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, memory_location_P);
1623 }
1624
1625 #ifdef HYPRE_USING_OPENMP
1626 #pragma omp parallel private(i,j,start,stop,startf,stopf,c_pt,row,cnt_diag,cnt_offd)
1627 #endif
1628 {
1629 HYPRE_Int my_thread_num = hypre_GetThreadNum();
1630 startf = startf_array[my_thread_num];
1631 stopf = startf_array[my_thread_num+1];
1632 start = start_array[my_thread_num];
1633 stop = start_array[my_thread_num+1];
1634
1635 if (my_thread_num > 0)
1636 c_pt = cpt_array[my_thread_num-1];
1637 else
1638 c_pt = 0;
1639 cnt_diag = W_diag_i[startf]+c_pt;
1640 cnt_offd = W_offd_i[startf];
1641 row = startf;
1642 for (i=start; i < stop; i++)
1643 {
1644 if (CF_marker[i] > 0)
1645 {
1646 P_diag_j[cnt_diag] = c_pt++;
1647 P_diag_data[cnt_diag++] = 1.0;
1648 }
1649 else
1650 {
1651 for (j=W_diag_i[row]; j < W_diag_i[row+1]; j++)
1652 {
1653 P_diag_j[cnt_diag] = W_diag_j[j];
1654 P_diag_data[cnt_diag++] = W_diag_data[j];
1655 }
1656 for (j=W_offd_i[row]; j < W_offd_i[row+1]; j++)
1657 {
1658 P_offd_j[cnt_offd] = W_offd_j[j];
1659 P_offd_data[cnt_offd++] = W_offd_data[j];
1660 }
1661 row++;
1662 }
1663 P_diag_i[i+1] = cnt_diag;
1664 P_offd_i[i+1] = cnt_offd;
1665 }
1666
1667 } /* end parallel region */
1668
1669 /*-----------------------------------------------------------------------
1670 * Create matrix
1671 *-----------------------------------------------------------------------*/
1672
1673 P = hypre_ParCSRMatrixCreate(comm,
1674 hypre_ParCSRMatrixGlobalNumRows(A),
1675 total_global_cpts,
1676 hypre_ParCSRMatrixColStarts(A),
1677 num_cpts_global,
1678 num_cols_P_offd,
1679 P_diag_i[n_fine],
1680 P_offd_i[n_fine]);
1681
1682 P_diag = hypre_ParCSRMatrixDiag(P);
1683 hypre_CSRMatrixData(P_diag) = P_diag_data;
1684 hypre_CSRMatrixI(P_diag) = P_diag_i;
1685 hypre_CSRMatrixJ(P_diag) = P_diag_j;
1686 P_offd = hypre_ParCSRMatrixOffd(P);
1687 hypre_CSRMatrixData(P_offd) = P_offd_data;
1688 hypre_CSRMatrixI(P_offd) = P_offd_i;
1689 hypre_CSRMatrixJ(P_offd) = P_offd_j;
1690 hypre_ParCSRMatrixColMapOffd(P) = hypre_ParCSRMatrixColMapOffd(W);
1691 hypre_ParCSRMatrixColMapOffd(W) = NULL;
1692
1693 hypre_CSRMatrixMemoryLocation(P_diag) = memory_location_P;
1694 hypre_CSRMatrixMemoryLocation(P_offd) = memory_location_P;
1695
1696 /* Compress P, removing coefficients smaller than trunc_factor * Max */
1697 if (trunc_factor != 0.0 || max_elmts > 0)
1698 {
1699 HYPRE_Int *map;
1700 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
1701 P_diag_data = hypre_CSRMatrixData(P_diag);
1702 P_diag_i = hypre_CSRMatrixI(P_diag);
1703 P_diag_j = hypre_CSRMatrixJ(P_diag);
1704 P_offd_data = hypre_CSRMatrixData(P_offd);
1705 P_offd_i = hypre_CSRMatrixI(P_offd);
1706 P_offd_j = hypre_CSRMatrixJ(P_offd);
1707 P_diag_size = P_diag_i[n_fine];
1708 P_offd_size = P_offd_i[n_fine];
1709
1710 col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
1711 if (num_cols_P_offd)
1712 {
1713 P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
1714 for (i=0; i < P_offd_size; i++)
1715 P_marker[P_offd_j[i]] = 1;
1716
1717 new_ncols_P_offd = 0;
1718 for (i=0; i < num_cols_P_offd; i++)
1719 if (P_marker[i]) new_ncols_P_offd++;
1720
1721 new_col_map_offd = hypre_CTAlloc(HYPRE_BigInt, new_ncols_P_offd, HYPRE_MEMORY_HOST);
1722 map = hypre_CTAlloc(HYPRE_Int, new_ncols_P_offd, HYPRE_MEMORY_HOST);
1723
1724 index = 0;
1725 for (i=0; i < num_cols_P_offd; i++)
1726 if (P_marker[i])
1727 {
1728 new_col_map_offd[index] = col_map_offd_P[i];
1729 map[index++] = i;
1730 }
1731 hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
1732
1733
1734 #ifdef HYPRE_USING_OPENMP
1735 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
1736 #endif
1737 for (i=0; i < P_offd_size; i++)
1738 {
1739 P_offd_j[i] = hypre_BinarySearch(map, P_offd_j[i],
1740 new_ncols_P_offd);
1741 }
1742 hypre_TFree(col_map_offd_P, HYPRE_MEMORY_HOST);
1743 hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
1744 hypre_CSRMatrixNumCols(P_offd) = new_ncols_P_offd;
1745 hypre_TFree(map, HYPRE_MEMORY_HOST);
1746 }
1747 }
1748
1749 hypre_MatvecCommPkgCreate(P);
1750
1751 *P_ptr = P;
1752
1753 /* Deallocate memory */
1754 hypre_TFree(D_tmp, HYPRE_MEMORY_HOST);
1755 hypre_TFree(D_tmp_offd, HYPRE_MEMORY_HOST);
1756 hypre_TFree(D_w, HYPRE_MEMORY_HOST);
1757 hypre_TFree(D_tau, HYPRE_MEMORY_HOST);
1758 hypre_TFree(D_beta, HYPRE_MEMORY_HOST);
1759 hypre_TFree(D_lambda, HYPRE_MEMORY_HOST);
1760 hypre_TFree(cpt_array, HYPRE_MEMORY_HOST);
1761 hypre_TFree(start_array, HYPRE_MEMORY_HOST);
1762 hypre_TFree(startf_array, HYPRE_MEMORY_HOST);
1763 hypre_TFree(buf_data, HYPRE_MEMORY_HOST);
1764 hypre_ParCSRMatrixDestroy(As_FF);
1765 hypre_ParCSRMatrixDestroy(As_FC);
1766 hypre_ParCSRMatrixDestroy(W);
1767
1768 return hypre_error_flag;
1769 }
1770
1771 /*-----------------------------------------------------------------------*
1772 * Modularized Extended+e Interpolation
1773 *-----------------------------------------------------------------------*/
1774 HYPRE_Int
hypre_BoomerAMGBuildModExtPEInterp(hypre_ParCSRMatrix * A,HYPRE_Int * CF_marker,hypre_ParCSRMatrix * S,HYPRE_BigInt * num_cpts_global,HYPRE_Int num_functions,HYPRE_Int * dof_func,HYPRE_Int debug_flag,HYPRE_Real trunc_factor,HYPRE_Int max_elmts,hypre_ParCSRMatrix ** P_ptr)1775 hypre_BoomerAMGBuildModExtPEInterp(hypre_ParCSRMatrix *A,
1776 HYPRE_Int *CF_marker,
1777 hypre_ParCSRMatrix *S,
1778 HYPRE_BigInt *num_cpts_global,
1779 HYPRE_Int num_functions,
1780 HYPRE_Int *dof_func,
1781 HYPRE_Int debug_flag,
1782 HYPRE_Real trunc_factor,
1783 HYPRE_Int max_elmts,
1784 hypre_ParCSRMatrix **P_ptr)
1785 {
1786 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1787 hypre_GpuProfilingPushRange("ModExtPEInterp");
1788 #endif
1789
1790 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
1791
1792 HYPRE_Int ierr = 0;
1793
1794 if (exec == HYPRE_EXEC_HOST)
1795 {
1796 ierr = hypre_BoomerAMGBuildModExtPEInterpHost(A, CF_marker, S, num_cpts_global,
1797 num_functions, dof_func,
1798 debug_flag, trunc_factor, max_elmts, P_ptr);
1799 }
1800 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1801 else
1802 {
1803 ierr = hypre_BoomerAMGBuildExtPEInterpDevice(A,CF_marker,S,num_cpts_global,1,NULL,
1804 debug_flag,trunc_factor,max_elmts,P_ptr);
1805 }
1806 #endif
1807
1808 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1809 hypre_GpuProfilingPopRange();
1810 #endif
1811
1812 return ierr;
1813 }
1814