1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /******************************************************************************
9  *
10  * Member functions for hypre_ParCSRMatrix class.
11  *
12  *****************************************************************************/
13 
14 #include "_hypre_parcsr_mv.h"
15 
16 #include "../seq_mv/HYPRE_seq_mv.h"
17 #include "../seq_mv/csr_matrix.h"
18 
19 /* In addition to publically accessible interface in HYPRE_mv.h, the
20    implementation in this file uses accessor macros into the sequential matrix
21    structure, and so includes the .h that defines that structure. Should those
22    accessor functions become proper functions at some later date, this will not
23    be necessary. AJC 4/99 */
24 
25 HYPRE_Int hypre_FillResponseParToCSRMatrix(void*, HYPRE_Int, HYPRE_Int, void*, MPI_Comm, void**, HYPRE_Int*);
26 
27 /*--------------------------------------------------------------------------
28  * hypre_ParCSRMatrixCreate
29  *--------------------------------------------------------------------------*/
30 
31 /* If create is called and row_starts and col_starts are NOT null, then it is
32    assumed that they are of length 2 containing the start row of the calling
33    processor followed by the start row of the next processor - AHB 6/05 */
34 
35 hypre_ParCSRMatrix*
hypre_ParCSRMatrixCreate(MPI_Comm comm,HYPRE_BigInt global_num_rows,HYPRE_BigInt global_num_cols,HYPRE_BigInt * row_starts_in,HYPRE_BigInt * col_starts_in,HYPRE_Int num_cols_offd,HYPRE_Int num_nonzeros_diag,HYPRE_Int num_nonzeros_offd)36 hypre_ParCSRMatrixCreate( MPI_Comm      comm,
37                           HYPRE_BigInt  global_num_rows,
38                           HYPRE_BigInt  global_num_cols,
39                           HYPRE_BigInt *row_starts_in,
40                           HYPRE_BigInt *col_starts_in,
41                           HYPRE_Int     num_cols_offd,
42                           HYPRE_Int     num_nonzeros_diag,
43                           HYPRE_Int     num_nonzeros_offd )
44 {
45    hypre_ParCSRMatrix  *matrix;
46    HYPRE_Int            num_procs, my_id;
47    HYPRE_Int            local_num_rows;
48    HYPRE_Int            local_num_cols;
49    HYPRE_BigInt         row_starts[2];
50    HYPRE_BigInt         col_starts[2];
51    HYPRE_BigInt         first_row_index, first_col_diag;
52 
53    matrix = hypre_CTAlloc(hypre_ParCSRMatrix, 1, HYPRE_MEMORY_HOST);
54 
55    hypre_MPI_Comm_rank(comm,&my_id);
56    hypre_MPI_Comm_size(comm,&num_procs);
57 
58    if (!row_starts_in)
59    {
60       hypre_GenerateLocalPartitioning(global_num_rows, num_procs, my_id,
61                                       row_starts);
62    }
63    else
64    {
65       row_starts[0] = row_starts_in[0];
66       row_starts[1] = row_starts_in[1];
67    }
68 
69    if (!col_starts_in)
70    {
71       hypre_GenerateLocalPartitioning(global_num_cols, num_procs, my_id,
72                                       col_starts);
73    }
74    else
75    {
76       col_starts[0] = col_starts_in[0];
77       col_starts[1] = col_starts_in[1];
78    }
79 
80    /* row_starts[0] is start of local rows.
81       row_starts[1] is start of next processor's rows */
82    first_row_index = row_starts[0];
83    local_num_rows  = row_starts[1] - first_row_index;
84    first_col_diag  = col_starts[0];
85    local_num_cols  = col_starts[1] - first_col_diag;
86 
87    hypre_ParCSRMatrixComm(matrix) = comm;
88    hypre_ParCSRMatrixDiag(matrix) =
89       hypre_CSRMatrixCreate(local_num_rows, local_num_cols, num_nonzeros_diag);
90    hypre_ParCSRMatrixOffd(matrix) =
91       hypre_CSRMatrixCreate(local_num_rows, num_cols_offd, num_nonzeros_offd);
92    hypre_ParCSRMatrixDiagT(matrix) = NULL;
93    hypre_ParCSRMatrixOffdT(matrix) = NULL; // JSP: transposed matrices are optional
94    hypre_ParCSRMatrixGlobalNumRows(matrix)   = global_num_rows;
95    hypre_ParCSRMatrixGlobalNumCols(matrix)   = global_num_cols;
96    hypre_ParCSRMatrixGlobalNumRownnz(matrix) = global_num_rows;
97    hypre_ParCSRMatrixFirstRowIndex(matrix)   = first_row_index;
98    hypre_ParCSRMatrixFirstColDiag(matrix)    = first_col_diag;
99    hypre_ParCSRMatrixLastRowIndex(matrix) = first_row_index + local_num_rows - 1;
100    hypre_ParCSRMatrixLastColDiag(matrix)  = first_col_diag + local_num_cols - 1;
101 
102    hypre_ParCSRMatrixRowStarts(matrix)[0] = row_starts[0];
103    hypre_ParCSRMatrixRowStarts(matrix)[1] = row_starts[1];
104    hypre_ParCSRMatrixColStarts(matrix)[0] = col_starts[0];
105    hypre_ParCSRMatrixColStarts(matrix)[1] = col_starts[1];
106 
107    hypre_ParCSRMatrixColMapOffd(matrix)       = NULL;
108    hypre_ParCSRMatrixDeviceColMapOffd(matrix) = NULL;
109    hypre_ParCSRMatrixProcOrdering(matrix)     = NULL;
110 
111    hypre_ParCSRMatrixAssumedPartition(matrix) = NULL;
112    hypre_ParCSRMatrixOwnsAssumedPartition(matrix) = 1;
113 
114    hypre_ParCSRMatrixCommPkg(matrix)  = NULL;
115    hypre_ParCSRMatrixCommPkgT(matrix) = NULL;
116 
117    /* set defaults */
118    hypre_ParCSRMatrixOwnsData(matrix)     = 1;
119    hypre_ParCSRMatrixRowindices(matrix)   = NULL;
120    hypre_ParCSRMatrixRowvalues(matrix)    = NULL;
121    hypre_ParCSRMatrixGetrowactive(matrix) = 0;
122 
123    matrix->bdiaginv = NULL;
124    matrix->bdiaginv_comm_pkg = NULL;
125    matrix->bdiag_size = -1;
126 
127 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
128    hypre_ParCSRMatrixSocDiagJ(matrix) = NULL;
129    hypre_ParCSRMatrixSocOffdJ(matrix) = NULL;
130 #endif
131 
132    return matrix;
133 }
134 
135 /*--------------------------------------------------------------------------
136  * hypre_ParCSRMatrixDestroy
137  *--------------------------------------------------------------------------*/
138 
139 HYPRE_Int
hypre_ParCSRMatrixDestroy(hypre_ParCSRMatrix * matrix)140 hypre_ParCSRMatrixDestroy( hypre_ParCSRMatrix *matrix )
141 {
142    if (matrix)
143    {
144       HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(matrix);
145 
146       if ( hypre_ParCSRMatrixOwnsData(matrix) )
147       {
148          hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix));
149          hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix));
150 
151          if ( hypre_ParCSRMatrixDiagT(matrix) )
152          {
153             hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiagT(matrix));
154          }
155 
156          if ( hypre_ParCSRMatrixOffdT(matrix) )
157          {
158             hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffdT(matrix));
159          }
160 
161          if (hypre_ParCSRMatrixColMapOffd(matrix))
162          {
163             hypre_TFree(hypre_ParCSRMatrixColMapOffd(matrix), HYPRE_MEMORY_HOST);
164          }
165 
166          if (hypre_ParCSRMatrixDeviceColMapOffd(matrix))
167          {
168             hypre_TFree(hypre_ParCSRMatrixDeviceColMapOffd(matrix), HYPRE_MEMORY_DEVICE);
169          }
170 
171          if (hypre_ParCSRMatrixCommPkg(matrix))
172          {
173             hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(matrix));
174          }
175 
176          if (hypre_ParCSRMatrixCommPkgT(matrix))
177          {
178             hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkgT(matrix));
179          }
180       }
181 
182       /* RL: this is actually not correct since the memory_location may have been changed after allocation
183        * put them in containers TODO */
184       hypre_TFree(hypre_ParCSRMatrixRowindices(matrix), memory_location);
185       hypre_TFree(hypre_ParCSRMatrixRowvalues(matrix), memory_location);
186 
187       if ( hypre_ParCSRMatrixAssumedPartition(matrix) && hypre_ParCSRMatrixOwnsAssumedPartition(matrix) )
188       {
189          hypre_AssumedPartitionDestroy(hypre_ParCSRMatrixAssumedPartition(matrix));
190       }
191 
192       if ( hypre_ParCSRMatrixProcOrdering(matrix) )
193       {
194          hypre_TFree(hypre_ParCSRMatrixProcOrdering(matrix), HYPRE_MEMORY_HOST);
195       }
196 
197       hypre_TFree(matrix->bdiaginv, HYPRE_MEMORY_HOST);
198       if (matrix->bdiaginv_comm_pkg)
199       {
200          hypre_MatvecCommPkgDestroy(matrix->bdiaginv_comm_pkg);
201       }
202 
203 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
204       hypre_TFree(hypre_ParCSRMatrixSocDiagJ(matrix), HYPRE_MEMORY_DEVICE);
205       hypre_TFree(hypre_ParCSRMatrixSocOffdJ(matrix), HYPRE_MEMORY_DEVICE);
206 #endif
207 
208       hypre_TFree(matrix, HYPRE_MEMORY_HOST);
209    }
210 
211    return hypre_error_flag;
212 }
213 
214 /*--------------------------------------------------------------------------
215  * hypre_ParCSRMatrixInitialize
216  *--------------------------------------------------------------------------*/
217 
218 HYPRE_Int
hypre_ParCSRMatrixInitialize_v2(hypre_ParCSRMatrix * matrix,HYPRE_MemoryLocation memory_location)219 hypre_ParCSRMatrixInitialize_v2( hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location )
220 {
221    if (!matrix)
222    {
223       hypre_error_in_arg(1);
224       return hypre_error_flag;
225    }
226 
227    hypre_CSRMatrixInitialize_v2(hypre_ParCSRMatrixDiag(matrix), 0, memory_location);
228    hypre_CSRMatrixInitialize_v2(hypre_ParCSRMatrixOffd(matrix), 0, memory_location);
229 
230    hypre_ParCSRMatrixColMapOffd(matrix) =
231       hypre_CTAlloc(HYPRE_BigInt, hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(matrix)),
232                     HYPRE_MEMORY_HOST);
233 
234    return hypre_error_flag;
235 }
236 
237 HYPRE_Int
hypre_ParCSRMatrixInitialize(hypre_ParCSRMatrix * matrix)238 hypre_ParCSRMatrixInitialize( hypre_ParCSRMatrix *matrix )
239 {
240    return hypre_ParCSRMatrixInitialize_v2(matrix, hypre_ParCSRMatrixMemoryLocation(matrix));
241 }
242 
243 /*--------------------------------------------------------------------------
244  * hypre_ParCSRMatrixClone
245  * Creates and returns a new copy S of the argument A
246  * The following variables are not copied because they will be constructed
247  * later if needed: CommPkg, CommPkgT, rowindices, rowvalues
248  *--------------------------------------------------------------------------*/
249 
250 hypre_ParCSRMatrix*
hypre_ParCSRMatrixClone_v2(hypre_ParCSRMatrix * A,HYPRE_Int copy_data,HYPRE_MemoryLocation memory_location)251 hypre_ParCSRMatrixClone_v2(hypre_ParCSRMatrix *A, HYPRE_Int copy_data, HYPRE_MemoryLocation memory_location)
252 {
253    hypre_ParCSRMatrix *S;
254 
255    S = hypre_ParCSRMatrixCreate( hypre_ParCSRMatrixComm(A),
256                                  hypre_ParCSRMatrixGlobalNumRows(A),
257                                  hypre_ParCSRMatrixGlobalNumCols(A),
258                                  hypre_ParCSRMatrixRowStarts(A),
259                                  hypre_ParCSRMatrixColStarts(A),
260                                  hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A)),
261                                  hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(A)),
262                                  hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(A)) );
263 
264    hypre_ParCSRMatrixNumNonzeros(S)  = hypre_ParCSRMatrixNumNonzeros(A);
265    hypre_ParCSRMatrixDNumNonzeros(S) = hypre_ParCSRMatrixNumNonzeros(A);
266 
267    hypre_ParCSRMatrixInitialize_v2(S, memory_location);
268 
269    hypre_ParCSRMatrixCopy(A, S, copy_data);
270 
271    return S;
272 }
273 
274 hypre_ParCSRMatrix*
hypre_ParCSRMatrixClone(hypre_ParCSRMatrix * A,HYPRE_Int copy_data)275 hypre_ParCSRMatrixClone(hypre_ParCSRMatrix *A, HYPRE_Int copy_data)
276 {
277    return hypre_ParCSRMatrixClone_v2(A, copy_data, hypre_ParCSRMatrixMemoryLocation(A));
278 }
279 
280 HYPRE_Int
hypre_ParCSRMatrixMigrate(hypre_ParCSRMatrix * A,HYPRE_MemoryLocation memory_location)281 hypre_ParCSRMatrixMigrate(hypre_ParCSRMatrix *A, HYPRE_MemoryLocation memory_location)
282 {
283    if (!A)
284    {
285       return hypre_error_flag;
286    }
287 
288    HYPRE_MemoryLocation old_memory_location = hypre_ParCSRMatrixMemoryLocation(A);
289 
290    if ( hypre_GetActualMemLocation(memory_location) != hypre_GetActualMemLocation(old_memory_location) )
291    {
292       hypre_CSRMatrix *A_diag = hypre_CSRMatrixClone_v2(hypre_ParCSRMatrixDiag(A), 1, memory_location);
293       hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(A));
294       hypre_ParCSRMatrixDiag(A) = A_diag;
295 
296       hypre_CSRMatrix *A_offd = hypre_CSRMatrixClone_v2(hypre_ParCSRMatrixOffd(A), 1, memory_location);
297       hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(A));
298       hypre_ParCSRMatrixOffd(A) = A_offd;
299 
300       hypre_TFree(hypre_ParCSRMatrixRowindices(A), old_memory_location);
301       hypre_TFree(hypre_ParCSRMatrixRowvalues(A), old_memory_location);
302    }
303    else
304    {
305       hypre_CSRMatrixMemoryLocation(hypre_ParCSRMatrixDiag(A)) = memory_location;
306       hypre_CSRMatrixMemoryLocation(hypre_ParCSRMatrixOffd(A)) = memory_location;
307    }
308 
309    return hypre_error_flag;
310 }
311 
312 HYPRE_Int
hypre_ParCSRMatrixSetNumNonzeros_core(hypre_ParCSRMatrix * matrix,const char * format)313 hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, const char* format )
314 {
315    MPI_Comm comm;
316    hypre_CSRMatrix *diag;
317    hypre_CSRMatrix *offd;
318 
319    if (!matrix)
320    {
321       hypre_error_in_arg(1);
322       return hypre_error_flag;
323    }
324 
325    comm = hypre_ParCSRMatrixComm(matrix);
326    diag = hypre_ParCSRMatrixDiag(matrix);
327    offd = hypre_ParCSRMatrixOffd(matrix);
328 
329    /* TODO in HYPRE_DEBUG ? */
330    hypre_CSRMatrixCheckSetNumNonzeros(diag);
331    hypre_CSRMatrixCheckSetNumNonzeros(offd);
332 
333    if (format[0] == 'I')
334    {
335       HYPRE_BigInt total_num_nonzeros;
336       HYPRE_BigInt local_num_nonzeros;
337       local_num_nonzeros = (HYPRE_BigInt) ( hypre_CSRMatrixNumNonzeros(diag) +
338                                             hypre_CSRMatrixNumNonzeros(offd) );
339 
340       hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, HYPRE_MPI_BIG_INT,
341                           hypre_MPI_SUM, comm);
342 
343       hypre_ParCSRMatrixNumNonzeros(matrix) = total_num_nonzeros;
344    }
345    else if (format[0] == 'D')
346    {
347       HYPRE_Real total_num_nonzeros;
348       HYPRE_Real local_num_nonzeros;
349       local_num_nonzeros = (HYPRE_Real) ( hypre_CSRMatrixNumNonzeros(diag) +
350                                           hypre_CSRMatrixNumNonzeros(offd) );
351 
352       hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1,
353                           HYPRE_MPI_REAL, hypre_MPI_SUM, comm);
354 
355       hypre_ParCSRMatrixDNumNonzeros(matrix) = total_num_nonzeros;
356    }
357    else
358    {
359       hypre_error_in_arg(1);
360       return hypre_error_flag;
361    }
362 
363    return hypre_error_flag;
364 }
365 
366 /*--------------------------------------------------------------------------
367  * hypre_ParCSRMatrixSetNumNonzeros
368  *--------------------------------------------------------------------------*/
369 HYPRE_Int
hypre_ParCSRMatrixSetNumNonzeros(hypre_ParCSRMatrix * matrix)370 hypre_ParCSRMatrixSetNumNonzeros( hypre_ParCSRMatrix *matrix )
371 {
372    return hypre_ParCSRMatrixSetNumNonzeros_core(matrix, "Int");
373 }
374 
375 /*--------------------------------------------------------------------------
376  * hypre_ParCSRMatrixSetDNumNonzeros
377  *--------------------------------------------------------------------------*/
378 
379 HYPRE_Int
hypre_ParCSRMatrixSetDNumNonzeros(hypre_ParCSRMatrix * matrix)380 hypre_ParCSRMatrixSetDNumNonzeros( hypre_ParCSRMatrix *matrix )
381 {
382    return hypre_ParCSRMatrixSetNumNonzeros_core(matrix, "Double");
383 }
384 
385 /*--------------------------------------------------------------------------
386  * hypre_ParCSRMatrixSetNumRownnz
387  *--------------------------------------------------------------------------*/
388 
389 HYPRE_Int
hypre_ParCSRMatrixSetNumRownnz(hypre_ParCSRMatrix * matrix)390 hypre_ParCSRMatrixSetNumRownnz( hypre_ParCSRMatrix *matrix )
391 {
392    MPI_Comm          comm = hypre_ParCSRMatrixComm(matrix);
393    hypre_CSRMatrix  *diag = hypre_ParCSRMatrixDiag(matrix);
394    hypre_CSRMatrix  *offd = hypre_ParCSRMatrixOffd(matrix);
395    HYPRE_Int        *rownnz_diag = hypre_CSRMatrixRownnz(diag);
396    HYPRE_Int        *rownnz_offd = hypre_CSRMatrixRownnz(offd);
397    HYPRE_Int         num_rownnz_diag = hypre_CSRMatrixNumRownnz(diag);
398    HYPRE_Int         num_rownnz_offd = hypre_CSRMatrixNumRownnz(offd);
399 
400    HYPRE_BigInt      local_num_rownnz;
401    HYPRE_BigInt      global_num_rownnz;
402    HYPRE_Int         i, j;
403 
404    if (!matrix)
405    {
406       hypre_error_in_arg(1);
407       return hypre_error_flag;
408    }
409 
410    local_num_rownnz = i = j = 0;
411    while (i < num_rownnz_diag && j < num_rownnz_offd)
412    {
413       local_num_rownnz++;
414       if (rownnz_diag[i] < rownnz_offd[j])
415       {
416          i++;
417       }
418       else
419       {
420          j++;
421       }
422    }
423 
424    local_num_rownnz += (HYPRE_BigInt) ((num_rownnz_diag - i) + (num_rownnz_offd - j));
425 
426    hypre_MPI_Allreduce(&local_num_rownnz, &global_num_rownnz, 1,
427                        HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
428 
429    hypre_ParCSRMatrixGlobalNumRownnz(matrix) = global_num_rownnz;
430 
431    return hypre_error_flag;
432 }
433 
434 /*--------------------------------------------------------------------------
435  * hypre_ParCSRMatrixSetDataOwner
436  *--------------------------------------------------------------------------*/
437 
438 HYPRE_Int
hypre_ParCSRMatrixSetDataOwner(hypre_ParCSRMatrix * matrix,HYPRE_Int owns_data)439 hypre_ParCSRMatrixSetDataOwner( hypre_ParCSRMatrix *matrix,
440                                 HYPRE_Int              owns_data )
441 {
442    if (!matrix)
443    {
444       hypre_error_in_arg(1);
445       return hypre_error_flag;
446    }
447 
448    hypre_ParCSRMatrixOwnsData(matrix) = owns_data;
449 
450    return hypre_error_flag;
451 }
452 
453 /*--------------------------------------------------------------------------
454  * hypre_ParCSRMatrixRead
455  *--------------------------------------------------------------------------*/
456 
457 hypre_ParCSRMatrix *
hypre_ParCSRMatrixRead(MPI_Comm comm,const char * file_name)458 hypre_ParCSRMatrixRead( MPI_Comm    comm,
459                         const char *file_name )
460 {
461    hypre_ParCSRMatrix  *matrix;
462    hypre_CSRMatrix     *diag;
463    hypre_CSRMatrix     *offd;
464 
465    HYPRE_Int            my_id, num_procs;
466    HYPRE_Int            num_cols_offd;
467    HYPRE_Int            i, local_num_rows;
468 
469    HYPRE_BigInt         row_starts[2];
470    HYPRE_BigInt         col_starts[2];
471    HYPRE_BigInt        *col_map_offd;
472    HYPRE_BigInt         row_s, row_e, col_s, col_e;
473    HYPRE_BigInt         global_num_rows, global_num_cols;
474 
475    FILE                *fp;
476    char                 new_file_d[80], new_file_o[80], new_file_info[80];
477 
478    hypre_MPI_Comm_rank(comm,&my_id);
479    hypre_MPI_Comm_size(comm,&num_procs);
480 
481    hypre_sprintf(new_file_d,"%s.D.%d",file_name,my_id);
482    hypre_sprintf(new_file_o,"%s.O.%d",file_name,my_id);
483    hypre_sprintf(new_file_info,"%s.INFO.%d",file_name,my_id);
484    fp = fopen(new_file_info, "r");
485    hypre_fscanf(fp, "%b", &global_num_rows);
486    hypre_fscanf(fp, "%b", &global_num_cols);
487    hypre_fscanf(fp, "%d", &num_cols_offd);
488    /* the bgl input file should only contain the EXACT range for local processor */
489    hypre_fscanf(fp, "%d %d %d %d", &row_s, &row_e, &col_s, &col_e);
490    row_starts[0] = row_s;
491    row_starts[1] = row_e;
492    col_starts[0] = col_s;
493    col_starts[1] = col_e;
494 
495    col_map_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd, HYPRE_MEMORY_HOST);
496 
497    for (i = 0; i < num_cols_offd; i++)
498    {
499       hypre_fscanf(fp, "%b", &col_map_offd[i]);
500    }
501 
502    fclose(fp);
503 
504    diag = hypre_CSRMatrixRead(new_file_d);
505    local_num_rows = hypre_CSRMatrixNumRows(diag);
506 
507    if (num_cols_offd)
508    {
509       offd = hypre_CSRMatrixRead(new_file_o);
510    }
511    else
512    {
513       offd = hypre_CSRMatrixCreate(local_num_rows,0,0);
514       hypre_CSRMatrixInitialize(offd);
515    }
516 
517    matrix = hypre_CTAlloc(hypre_ParCSRMatrix, 1, HYPRE_MEMORY_HOST);
518 
519    hypre_ParCSRMatrixComm(matrix) = comm;
520    hypre_ParCSRMatrixGlobalNumRows(matrix) = global_num_rows;
521    hypre_ParCSRMatrixGlobalNumCols(matrix) = global_num_cols;
522    hypre_ParCSRMatrixFirstRowIndex(matrix) = row_s;
523    hypre_ParCSRMatrixFirstColDiag(matrix) = col_s;
524    hypre_ParCSRMatrixLastRowIndex(matrix) = row_e - 1;
525    hypre_ParCSRMatrixLastColDiag(matrix) = col_e - 1;
526 
527    hypre_ParCSRMatrixRowStarts(matrix)[0] = row_starts[0];
528    hypre_ParCSRMatrixRowStarts(matrix)[1] = row_starts[1];
529    hypre_ParCSRMatrixColStarts(matrix)[0] = col_starts[0];
530    hypre_ParCSRMatrixColStarts(matrix)[1] = col_starts[1];
531 
532    hypre_ParCSRMatrixCommPkg(matrix) = NULL;
533 
534    /* set defaults */
535    hypre_ParCSRMatrixOwnsData(matrix) = 1;
536    hypre_ParCSRMatrixDiag(matrix) = diag;
537    hypre_ParCSRMatrixOffd(matrix) = offd;
538    if (num_cols_offd)
539    {
540       hypre_ParCSRMatrixColMapOffd(matrix) = col_map_offd;
541    }
542    else
543    {
544       hypre_ParCSRMatrixColMapOffd(matrix) = NULL;
545    }
546 
547    return matrix;
548 }
549 
550 /*--------------------------------------------------------------------------
551  * hypre_ParCSRMatrixPrint
552  *--------------------------------------------------------------------------*/
553 
554 HYPRE_Int
hypre_ParCSRMatrixPrint(hypre_ParCSRMatrix * matrix,const char * file_name)555 hypre_ParCSRMatrixPrint( hypre_ParCSRMatrix *matrix,
556                          const char         *file_name )
557 {
558    MPI_Comm comm;
559    HYPRE_BigInt global_num_rows;
560    HYPRE_BigInt global_num_cols;
561    HYPRE_BigInt *col_map_offd;
562    HYPRE_Int  my_id, i, num_procs;
563    char   new_file_d[80], new_file_o[80], new_file_info[80];
564    FILE *fp;
565    HYPRE_Int num_cols_offd = 0;
566    HYPRE_BigInt row_s, row_e, col_s, col_e;
567    if (!matrix)
568    {
569       hypre_error_in_arg(1);
570       return hypre_error_flag;
571    }
572    comm = hypre_ParCSRMatrixComm(matrix);
573    global_num_rows = hypre_ParCSRMatrixGlobalNumRows(matrix);
574    global_num_cols = hypre_ParCSRMatrixGlobalNumCols(matrix);
575    col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix);
576    if (hypre_ParCSRMatrixOffd(matrix))
577       num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(matrix));
578 
579    hypre_MPI_Comm_rank(comm, &my_id);
580    hypre_MPI_Comm_size(comm, &num_procs);
581 
582    hypre_sprintf(new_file_d,"%s.D.%d",file_name,my_id);
583    hypre_sprintf(new_file_o,"%s.O.%d",file_name,my_id);
584    hypre_sprintf(new_file_info,"%s.INFO.%d",file_name,my_id);
585    hypre_CSRMatrixPrint(hypre_ParCSRMatrixDiag(matrix),new_file_d);
586    if (num_cols_offd != 0)
587       hypre_CSRMatrixPrint(hypre_ParCSRMatrixOffd(matrix),new_file_o);
588 
589    fp = fopen(new_file_info, "w");
590    hypre_fprintf(fp, "%b\n", global_num_rows);
591    hypre_fprintf(fp, "%b\n", global_num_cols);
592    hypre_fprintf(fp, "%d\n", num_cols_offd);
593    row_s = hypre_ParCSRMatrixFirstRowIndex(matrix);
594    row_e = hypre_ParCSRMatrixLastRowIndex(matrix);
595    col_s =  hypre_ParCSRMatrixFirstColDiag(matrix);
596    col_e =  hypre_ParCSRMatrixLastColDiag(matrix);
597    /* add 1 to the ends because this is a starts partition */
598    hypre_fprintf(fp, "%b %b %b %b\n", row_s, row_e + 1, col_s, col_e + 1);
599    for (i=0; i < num_cols_offd; i++)
600       hypre_fprintf(fp, "%b\n", col_map_offd[i]);
601    fclose(fp);
602 
603    return hypre_error_flag;
604 }
605 
606 /*--------------------------------------------------------------------------
607  * hypre_ParCSRMatrixPrintIJ
608  *--------------------------------------------------------------------------*/
609 
610 HYPRE_Int
hypre_ParCSRMatrixPrintIJ(const hypre_ParCSRMatrix * matrix,const HYPRE_Int base_i,const HYPRE_Int base_j,const char * filename)611 hypre_ParCSRMatrixPrintIJ( const hypre_ParCSRMatrix *matrix,
612                            const HYPRE_Int           base_i,
613                            const HYPRE_Int           base_j,
614                            const char               *filename )
615 {
616    MPI_Comm             comm;
617    HYPRE_BigInt         first_row_index;
618    HYPRE_BigInt         first_col_diag;
619    hypre_CSRMatrix     *diag;
620    hypre_CSRMatrix     *offd;
621    HYPRE_BigInt        *col_map_offd;
622    HYPRE_Int            num_rows;
623    const HYPRE_BigInt  *row_starts;
624    const HYPRE_BigInt  *col_starts;
625    HYPRE_Complex       *diag_data;
626    HYPRE_Int           *diag_i;
627    HYPRE_Int           *diag_j;
628    HYPRE_Complex       *offd_data;
629    HYPRE_Int           *offd_i;
630    HYPRE_Int           *offd_j;
631    HYPRE_Int            myid, num_procs, i, j;
632    HYPRE_BigInt         I, J;
633    char                 new_filename[255];
634    FILE                *file;
635    HYPRE_Int            num_nonzeros_offd;
636    HYPRE_BigInt         ilower, iupper, jlower, jupper;
637 
638    if (!matrix)
639    {
640       hypre_error_in_arg(1);
641       return hypre_error_flag;
642    }
643 
644    comm            = hypre_ParCSRMatrixComm(matrix);
645    first_row_index = hypre_ParCSRMatrixFirstRowIndex(matrix);
646    first_col_diag  = hypre_ParCSRMatrixFirstColDiag(matrix);
647    diag            = hypre_ParCSRMatrixDiag(matrix);
648    offd            = hypre_ParCSRMatrixOffd(matrix);
649    col_map_offd    = hypre_ParCSRMatrixColMapOffd(matrix);
650    num_rows        = hypre_ParCSRMatrixNumRows(matrix);
651    row_starts      = hypre_ParCSRMatrixRowStarts(matrix);
652    col_starts      = hypre_ParCSRMatrixColStarts(matrix);
653    hypre_MPI_Comm_rank(comm, &myid);
654    hypre_MPI_Comm_size(comm, &num_procs);
655 
656    hypre_sprintf(new_filename,"%s.%05d", filename, myid);
657 
658    if ((file = fopen(new_filename, "w")) == NULL)
659    {
660       hypre_error_w_msg(HYPRE_ERROR_GENERIC,"Error: can't open output file %s\n");
661       return hypre_error_flag;
662    }
663 
664    diag_data = hypre_CSRMatrixData(diag);
665    diag_i    = hypre_CSRMatrixI(diag);
666    diag_j    = hypre_CSRMatrixJ(diag);
667 
668    num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(offd);
669    if (num_nonzeros_offd)
670    {
671       offd_data = hypre_CSRMatrixData(offd);
672       offd_i    = hypre_CSRMatrixI(offd);
673       offd_j    = hypre_CSRMatrixJ(offd);
674    }
675 
676    ilower = row_starts[0] + (HYPRE_BigInt) base_i;
677    iupper = row_starts[1] + (HYPRE_BigInt) base_i - 1;
678    jlower = col_starts[0] + (HYPRE_BigInt) base_j;
679    jupper = col_starts[1] + (HYPRE_BigInt) base_j - 1;
680 
681    hypre_fprintf(file, "%b %b %b %b\n", ilower, iupper, jlower, jupper);
682 
683    for (i = 0; i < num_rows; i++)
684    {
685       I = first_row_index + (HYPRE_BigInt)(i + base_i);
686 
687       /* print diag columns */
688       for (j = diag_i[i]; j < diag_i[i+1]; j++)
689       {
690          J = first_col_diag + (HYPRE_BigInt)(diag_j[j] + base_j);
691          if (diag_data)
692          {
693 #ifdef HYPRE_COMPLEX
694             hypre_fprintf(file, "%b %b %.14e , %.14e\n", I, J,
695                           hypre_creal(diag_data[j]), hypre_cimag(diag_data[j]));
696 #else
697             hypre_fprintf(file, "%b %b %.14e\n", I, J, diag_data[j]);
698 #endif
699          }
700          else
701          {
702             hypre_fprintf(file, "%b %b\n", I, J);
703          }
704       }
705 
706       /* print offd columns */
707       if (num_nonzeros_offd)
708       {
709          for (j = offd_i[i]; j < offd_i[i+1]; j++)
710          {
711             J = col_map_offd[offd_j[j]] + (HYPRE_BigInt) base_j;
712             if (offd_data)
713             {
714 #ifdef HYPRE_COMPLEX
715                hypre_fprintf(file, "%b %b %.14e , %.14e\n", I, J,
716                              hypre_creal(offd_data[j]), hypre_cimag(offd_data[j]));
717 #else
718                hypre_fprintf(file, "%b %b %.14e\n", I, J, offd_data[j]);
719 #endif
720             }
721             else
722             {
723                hypre_fprintf(file, "%b %b\n", I, J);
724             }
725          }
726       }
727    }
728 
729    fclose(file);
730 
731    return hypre_error_flag;
732 }
733 
734 /*--------------------------------------------------------------------------
735  * hypre_ParCSRMatrixReadIJ
736  *--------------------------------------------------------------------------*/
737 
738 HYPRE_Int
hypre_ParCSRMatrixReadIJ(MPI_Comm comm,const char * filename,HYPRE_Int * base_i_ptr,HYPRE_Int * base_j_ptr,hypre_ParCSRMatrix ** matrix_ptr)739 hypre_ParCSRMatrixReadIJ( MPI_Comm             comm,
740                           const char          *filename,
741                           HYPRE_Int           *base_i_ptr,
742                           HYPRE_Int           *base_j_ptr,
743                           hypre_ParCSRMatrix **matrix_ptr)
744 {
745    HYPRE_BigInt        global_num_rows;
746    HYPRE_BigInt        global_num_cols;
747    HYPRE_BigInt        first_row_index;
748    HYPRE_BigInt        first_col_diag;
749    HYPRE_BigInt        last_col_diag;
750    hypre_ParCSRMatrix *matrix;
751    hypre_CSRMatrix    *diag;
752    hypre_CSRMatrix    *offd;
753    HYPRE_BigInt       *col_map_offd;
754    HYPRE_BigInt        row_starts[2];
755    HYPRE_BigInt        col_starts[2];
756    HYPRE_Int           num_rows;
757    HYPRE_BigInt        big_base_i, big_base_j;
758    HYPRE_Int           base_i, base_j;
759    HYPRE_Complex      *diag_data;
760    HYPRE_Int          *diag_i;
761    HYPRE_Int          *diag_j;
762    HYPRE_Complex      *offd_data;
763    HYPRE_Int          *offd_i;
764    HYPRE_Int          *offd_j;
765    HYPRE_BigInt       *tmp_j;
766    HYPRE_BigInt       *aux_offd_j;
767    HYPRE_BigInt        I, J;
768    HYPRE_Int           myid, num_procs, i, i2, j;
769    char                new_filename[255];
770    FILE               *file;
771    HYPRE_Int           num_cols_offd, num_nonzeros_diag, num_nonzeros_offd;
772    HYPRE_Int           i_col, num_cols;
773    HYPRE_Int           diag_cnt, offd_cnt, row_cnt;
774    HYPRE_Complex       data;
775 
776    hypre_MPI_Comm_size(comm, &num_procs);
777    hypre_MPI_Comm_rank(comm, &myid);
778 
779    hypre_sprintf(new_filename,"%s.%05d", filename, myid);
780 
781    if ((file = fopen(new_filename, "r")) == NULL)
782    {
783       hypre_error_w_msg(HYPRE_ERROR_GENERIC,"Error: can't open output file %s\n");
784       return hypre_error_flag;
785    }
786 
787    hypre_fscanf(file, "%b %b", &global_num_rows, &global_num_cols);
788    hypre_fscanf(file, "%d %d %d", &num_rows, &num_cols, &num_cols_offd);
789    hypre_fscanf(file, "%d %d", &num_nonzeros_diag, &num_nonzeros_offd);
790    hypre_fscanf(file, "%b %b %b %b", &row_starts[0], &col_starts[0], &row_starts[1], &col_starts[1]);
791 
792    big_base_i = row_starts[0];
793    big_base_j = col_starts[0];
794    base_i = (HYPRE_Int) row_starts[0];
795    base_j = (HYPRE_Int) col_starts[0];
796 
797    matrix = hypre_ParCSRMatrixCreate(comm, global_num_rows, global_num_cols,
798                                      row_starts, col_starts, num_cols_offd,
799                                      num_nonzeros_diag, num_nonzeros_offd);
800    hypre_ParCSRMatrixInitialize(matrix);
801 
802    diag = hypre_ParCSRMatrixDiag(matrix);
803    offd = hypre_ParCSRMatrixOffd(matrix);
804 
805    diag_data = hypre_CSRMatrixData(diag);
806    diag_i    = hypre_CSRMatrixI(diag);
807    diag_j    = hypre_CSRMatrixJ(diag);
808 
809    offd_i    = hypre_CSRMatrixI(offd);
810    if (num_nonzeros_offd)
811    {
812       offd_data = hypre_CSRMatrixData(offd);
813       offd_j    = hypre_CSRMatrixJ(offd);
814       tmp_j     = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros_offd, HYPRE_MEMORY_HOST);
815    }
816 
817    first_row_index = hypre_ParCSRMatrixFirstRowIndex(matrix);
818    first_col_diag = hypre_ParCSRMatrixFirstColDiag(matrix);
819    last_col_diag = first_col_diag+(HYPRE_BigInt)num_cols-1;
820 
821    diag_cnt = 0;
822    offd_cnt = 0;
823    row_cnt = 0;
824    for (i = 0; i < num_nonzeros_diag+num_nonzeros_offd; i++)
825    {
826       /* read values */
827       hypre_fscanf(file, "%b %b %le", &I, &J, &data);
828       i2 = (HYPRE_Int)(I-big_base_i-first_row_index);
829       J -= big_base_j;
830       if (i2 > row_cnt)
831       {
832          diag_i[i2] = diag_cnt;
833          offd_i[i2] = offd_cnt;
834          row_cnt++;
835       }
836       if (J < first_col_diag || J > last_col_diag)
837       {
838          tmp_j[offd_cnt] = J;
839          offd_data[offd_cnt++] = data;
840       }
841       else
842       {
843          diag_j[diag_cnt] = (HYPRE_Int)(J - first_col_diag);
844          diag_data[diag_cnt++] = data;
845       }
846    }
847    diag_i[num_rows] = diag_cnt;
848    offd_i[num_rows] = offd_cnt;
849 
850    fclose(file);
851 
852    /*  generate col_map_offd */
853    if (num_nonzeros_offd)
854    {
855       aux_offd_j = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros_offd, HYPRE_MEMORY_HOST);
856       for (i=0; i < num_nonzeros_offd; i++)
857          aux_offd_j[i] = (HYPRE_BigInt)offd_j[i];
858       hypre_BigQsort0(aux_offd_j,0,num_nonzeros_offd-1);
859       col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix);
860       col_map_offd[0] = aux_offd_j[0];
861       offd_cnt = 0;
862       for (i=1; i < num_nonzeros_offd; i++)
863       {
864          if (aux_offd_j[i] > col_map_offd[offd_cnt])
865             col_map_offd[++offd_cnt] = aux_offd_j[i];
866       }
867       for (i=0; i < num_nonzeros_offd; i++)
868       {
869          offd_j[i] = hypre_BigBinarySearch(col_map_offd, tmp_j[i], num_cols_offd);
870       }
871       hypre_TFree(aux_offd_j, HYPRE_MEMORY_HOST);
872       hypre_TFree(tmp_j, HYPRE_MEMORY_HOST);
873    }
874 
875    /* move diagonal element in first position in each row */
876    for (i=0; i < num_rows; i++)
877    {
878       i_col = diag_i[i];
879       for (j=i_col; j < diag_i[i+1]; j++)
880       {
881          if (diag_j[j] == i)
882          {
883             diag_j[j] = diag_j[i_col];
884             data = diag_data[j];
885             diag_data[j] = diag_data[i_col];
886             diag_data[i_col] = data;
887             diag_j[i_col] = i;
888             break;
889          }
890       }
891    }
892 
893    *base_i_ptr = base_i;
894    *base_j_ptr = base_j;
895    *matrix_ptr = matrix;
896 
897    return hypre_error_flag;
898 }
899 
900 /*--------------------------------------------------------------------------
901  * hypre_ParCSRMatrixGetLocalRange
902  * returns the row numbers of the rows stored on this processor.
903  * "End" is actually the row number of the last row on this processor.
904  *--------------------------------------------------------------------------*/
905 
906 HYPRE_Int
hypre_ParCSRMatrixGetLocalRange(hypre_ParCSRMatrix * matrix,HYPRE_BigInt * row_start,HYPRE_BigInt * row_end,HYPRE_BigInt * col_start,HYPRE_BigInt * col_end)907 hypre_ParCSRMatrixGetLocalRange( hypre_ParCSRMatrix *matrix,
908                                  HYPRE_BigInt       *row_start,
909                                  HYPRE_BigInt       *row_end,
910                                  HYPRE_BigInt       *col_start,
911                                  HYPRE_BigInt       *col_end )
912 {
913    HYPRE_Int my_id;
914 
915    if (!matrix)
916    {
917       hypre_error_in_arg(1);
918       return hypre_error_flag;
919    }
920 
921    hypre_MPI_Comm_rank( hypre_ParCSRMatrixComm(matrix), &my_id );
922 
923    *row_start = hypre_ParCSRMatrixFirstRowIndex(matrix);
924    *row_end = hypre_ParCSRMatrixLastRowIndex(matrix);
925    *col_start =  hypre_ParCSRMatrixFirstColDiag(matrix);
926    *col_end =  hypre_ParCSRMatrixLastColDiag(matrix);
927 
928    return hypre_error_flag;
929 }
930 
931 /*--------------------------------------------------------------------------
932  * hypre_ParCSRMatrixGetRow
933  * Returns global column indices and/or values for a given row in the global
934  * matrix. Global row number is used, but the row must be stored locally or
935  * an error is returned. This implementation copies from the two matrices that
936  * store the local data, storing them in the hypre_ParCSRMatrix structure.
937  * Only a single row can be accessed via this function at any one time; the
938  * corresponding RestoreRow function must be called, to avoid bleeding memory,
939  * and to be able to look at another row.
940  * Either one of col_ind and values can be left null, and those values will
941  * not be returned.
942  * All indices are returned in 0-based indexing, no matter what is used under
943  * the hood. EXCEPTION: currently this only works if the local CSR matrices
944  * use 0-based indexing.
945  * This code, semantics, implementation, etc., are all based on PETSc's hypre_MPI_AIJ
946  * matrix code, adjusted for our data and software structures.
947  * AJC 4/99.
948  *--------------------------------------------------------------------------*/
949 
950 HYPRE_Int
hypre_ParCSRMatrixGetRowHost(hypre_ParCSRMatrix * mat,HYPRE_BigInt row,HYPRE_Int * size,HYPRE_BigInt ** col_ind,HYPRE_Complex ** values)951 hypre_ParCSRMatrixGetRowHost( hypre_ParCSRMatrix  *mat,
952                               HYPRE_BigInt         row,
953                               HYPRE_Int           *size,
954                               HYPRE_BigInt       **col_ind,
955                               HYPRE_Complex      **values )
956 {
957    HYPRE_Int my_id;
958    HYPRE_BigInt row_start, row_end;
959    hypre_CSRMatrix *Aa;
960    hypre_CSRMatrix *Ba;
961 
962    if (!mat)
963    {
964       hypre_error_in_arg(1);
965       return hypre_error_flag;
966    }
967 
968    Aa = (hypre_CSRMatrix *) hypre_ParCSRMatrixDiag(mat);
969    Ba = (hypre_CSRMatrix *) hypre_ParCSRMatrixOffd(mat);
970 
971    if (hypre_ParCSRMatrixGetrowactive(mat))
972    {
973       return(-1);
974    }
975 
976    hypre_MPI_Comm_rank( hypre_ParCSRMatrixComm(mat), &my_id );
977 
978    hypre_ParCSRMatrixGetrowactive(mat) = 1;
979    row_start = hypre_ParCSRMatrixFirstRowIndex(mat);
980    row_end = hypre_ParCSRMatrixLastRowIndex(mat) + 1;
981    if (row < row_start || row >= row_end)
982    {
983       return(-1);
984    }
985 
986    /* if buffer is not allocated and some information is requested,
987       allocate buffer */
988    if (!hypre_ParCSRMatrixRowvalues(mat) && ( col_ind || values ))
989    {
990       /*
991         allocate enough space to hold information from the longest row.
992       */
993       HYPRE_Int max = 1,tmp;
994       HYPRE_Int i;
995       HYPRE_Int m = row_end - row_start;
996 
997       for ( i = 0; i < m; i++ )
998       {
999          tmp = hypre_CSRMatrixI(Aa)[i+1] - hypre_CSRMatrixI(Aa)[i] +
1000                hypre_CSRMatrixI(Ba)[i+1] - hypre_CSRMatrixI(Ba)[i];
1001          if (max < tmp)
1002          {
1003             max = tmp;
1004          }
1005       }
1006 
1007       hypre_ParCSRMatrixRowvalues(mat)  =
1008          (HYPRE_Complex *) hypre_CTAlloc(HYPRE_Complex, max, hypre_ParCSRMatrixMemoryLocation(mat));
1009       hypre_ParCSRMatrixRowindices(mat) =
1010          (HYPRE_BigInt *)  hypre_CTAlloc(HYPRE_BigInt,  max, hypre_ParCSRMatrixMemoryLocation(mat));
1011    }
1012 
1013    /* Copy from dual sequential matrices into buffer */
1014    {
1015       HYPRE_Complex    *vworkA, *vworkB, *v_p;
1016       HYPRE_Int        i, *cworkA, *cworkB;
1017       HYPRE_BigInt     cstart = hypre_ParCSRMatrixFirstColDiag(mat);
1018       HYPRE_Int        nztot, nzA, nzB, lrow = (HYPRE_Int)(row-row_start);
1019       HYPRE_BigInt     *cmap, *idx_p;
1020 
1021       nzA = hypre_CSRMatrixI(Aa)[lrow+1] - hypre_CSRMatrixI(Aa)[lrow];
1022       cworkA = &( hypre_CSRMatrixJ(Aa)[ hypre_CSRMatrixI(Aa)[lrow] ] );
1023       vworkA = &( hypre_CSRMatrixData(Aa)[ hypre_CSRMatrixI(Aa)[lrow] ] );
1024 
1025       nzB = hypre_CSRMatrixI(Ba)[lrow+1] - hypre_CSRMatrixI(Ba)[lrow];
1026       cworkB = &( hypre_CSRMatrixJ(Ba)[ hypre_CSRMatrixI(Ba)[lrow] ] );
1027       vworkB = &( hypre_CSRMatrixData(Ba)[ hypre_CSRMatrixI(Ba)[lrow] ] );
1028 
1029       nztot = nzA + nzB;
1030 
1031       cmap = hypre_ParCSRMatrixColMapOffd(mat);
1032 
1033       if (values || col_ind)
1034       {
1035          if (nztot)
1036          {
1037             /* Sort by increasing column numbers, assuming A and B already sorted */
1038             HYPRE_Int imark = -1;
1039 
1040             if (values)
1041             {
1042                *values = v_p = hypre_ParCSRMatrixRowvalues(mat);
1043                for ( i = 0; i < nzB; i++ )
1044                {
1045                   if (cmap[cworkB[i]] < cstart)
1046                   {
1047                      v_p[i] = vworkB[i];
1048                   }
1049                   else
1050                   {
1051                      break;
1052                   }
1053                }
1054                imark = i;
1055                for ( i = 0; i < nzA; i++ )
1056                {
1057                   v_p[imark+i] = vworkA[i];
1058                }
1059                for ( i = imark; i < nzB; i++ )
1060                {
1061                   v_p[nzA+i] = vworkB[i];
1062                }
1063             }
1064 
1065             if (col_ind)
1066             {
1067                *col_ind = idx_p = hypre_ParCSRMatrixRowindices(mat);
1068                if (imark > -1)
1069                {
1070                   for ( i = 0; i < imark; i++ )
1071                   {
1072                      idx_p[i] = cmap[cworkB[i]];
1073                   }
1074                }
1075                else
1076                {
1077                   for ( i = 0; i < nzB; i++ )
1078                   {
1079                      if (cmap[cworkB[i]] < cstart)
1080                      {
1081                         idx_p[i] = cmap[cworkB[i]];
1082                      }
1083                      else
1084                      {
1085                         break;
1086                      }
1087                   }
1088                   imark = i;
1089                }
1090                for ( i = 0; i < nzA; i++ )
1091                {
1092                   idx_p[imark+i] = cstart + cworkA[i];
1093                }
1094                for ( i = imark; i < nzB; i++ )
1095                {
1096                   idx_p[nzA+i] = cmap[cworkB[i]];
1097                }
1098             }
1099          }
1100          else
1101          {
1102             if (col_ind)
1103             {
1104                *col_ind = 0;
1105             }
1106             if (values)
1107             {
1108                *values = 0;
1109             }
1110          }
1111       }
1112 
1113       *size = nztot;
1114    } /* End of copy */
1115 
1116    return hypre_error_flag;
1117 }
1118 
1119 
1120 HYPRE_Int
hypre_ParCSRMatrixGetRow(hypre_ParCSRMatrix * mat,HYPRE_BigInt row,HYPRE_Int * size,HYPRE_BigInt ** col_ind,HYPRE_Complex ** values)1121 hypre_ParCSRMatrixGetRow( hypre_ParCSRMatrix  *mat,
1122                           HYPRE_BigInt         row,
1123                           HYPRE_Int           *size,
1124                           HYPRE_BigInt       **col_ind,
1125                           HYPRE_Complex      **values )
1126 {
1127 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1128    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(mat) );
1129 
1130    if (exec == HYPRE_EXEC_DEVICE)
1131    {
1132       return hypre_ParCSRMatrixGetRowDevice(mat, row, size, col_ind, values);
1133    }
1134    else
1135 #endif
1136    {
1137       return hypre_ParCSRMatrixGetRowHost(mat, row, size, col_ind, values);
1138    }
1139 
1140    return hypre_error_flag;
1141 }
1142 
1143 /*--------------------------------------------------------------------------
1144  * hypre_ParCSRMatrixRestoreRow
1145  *--------------------------------------------------------------------------*/
1146 
1147 HYPRE_Int
hypre_ParCSRMatrixRestoreRow(hypre_ParCSRMatrix * matrix,HYPRE_BigInt row,HYPRE_Int * size,HYPRE_BigInt ** col_ind,HYPRE_Complex ** values)1148 hypre_ParCSRMatrixRestoreRow( hypre_ParCSRMatrix *matrix,
1149                               HYPRE_BigInt        row,
1150                               HYPRE_Int          *size,
1151                               HYPRE_BigInt      **col_ind,
1152                               HYPRE_Complex     **values )
1153 {
1154    if (!hypre_ParCSRMatrixGetrowactive(matrix))
1155    {
1156       hypre_error(HYPRE_ERROR_GENERIC);
1157       return hypre_error_flag;
1158    }
1159 
1160    hypre_ParCSRMatrixGetrowactive(matrix) = 0;
1161 
1162    return hypre_error_flag;
1163 }
1164 
1165 /*--------------------------------------------------------------------------
1166  * hypre_CSRMatrixToParCSRMatrix:
1167  *
1168  * Generates a ParCSRMatrix distributed across the processors in comm
1169  * from a CSRMatrix on proc 0 .
1170  *
1171  *--------------------------------------------------------------------------*/
1172 
1173 hypre_ParCSRMatrix *
hypre_CSRMatrixToParCSRMatrix(MPI_Comm comm,hypre_CSRMatrix * A,HYPRE_BigInt * global_row_starts,HYPRE_BigInt * global_col_starts)1174 hypre_CSRMatrixToParCSRMatrix( MPI_Comm         comm,
1175                                hypre_CSRMatrix *A,
1176                                HYPRE_BigInt    *global_row_starts,
1177                                HYPRE_BigInt    *global_col_starts )
1178 {
1179    hypre_ParCSRMatrix *parcsr_A;
1180 
1181    HYPRE_BigInt       *global_data;
1182    HYPRE_BigInt        global_size;
1183    HYPRE_BigInt        global_num_rows;
1184    HYPRE_BigInt        global_num_cols;
1185 
1186    HYPRE_Int           num_procs, my_id;
1187    HYPRE_Int          *num_rows_proc;
1188    HYPRE_Int          *num_nonzeros_proc;
1189    HYPRE_BigInt       *row_starts = NULL;
1190    HYPRE_BigInt       *col_starts = NULL;
1191 
1192    hypre_CSRMatrix    *local_A;
1193    HYPRE_Complex      *A_data;
1194    HYPRE_Int          *A_i;
1195    HYPRE_Int          *A_j;
1196 
1197    hypre_MPI_Request  *requests;
1198    hypre_MPI_Status   *status, status0;
1199    hypre_MPI_Datatype *csr_matrix_datatypes;
1200 
1201    HYPRE_Int           free_global_row_starts = 0;
1202    HYPRE_Int           free_global_col_starts = 0;
1203 
1204    HYPRE_Int           total_size;
1205    HYPRE_BigInt        first_col_diag;
1206    HYPRE_BigInt        last_col_diag;
1207    HYPRE_Int           num_rows;
1208    HYPRE_Int           num_nonzeros;
1209    HYPRE_Int           i, ind;
1210 
1211    hypre_MPI_Comm_rank(comm, &my_id);
1212    hypre_MPI_Comm_size(comm, &num_procs);
1213 
1214    total_size = 4;
1215    if (my_id == 0)
1216    {
1217       total_size += 2*(num_procs + 1);
1218    }
1219 
1220    global_data = hypre_CTAlloc(HYPRE_BigInt, total_size, HYPRE_MEMORY_HOST);
1221    if (my_id == 0)
1222    {
1223       global_size = 3;
1224       if (global_row_starts)
1225       {
1226          if (global_col_starts)
1227          {
1228             if (global_col_starts != global_row_starts)
1229             {
1230                /* contains code for what to expect,
1231                   if 0: global_row_starts = global_col_starts, only global_row_starts given
1232                   if 1: only global_row_starts given, global_col_starts = NULL
1233                   if 2: both global_row_starts and global_col_starts given
1234                   if 3: only global_col_starts given, global_row_starts = NULL */
1235                global_data[3] = 2;
1236                global_size += (HYPRE_BigInt) (2*(num_procs + 1) + 1);
1237                for (i = 0; i < (num_procs + 1); i++)
1238                {
1239                   global_data[i+4] = global_row_starts[i];
1240                }
1241                for (i = 0; i < (num_procs + 1); i++)
1242                {
1243                   global_data[i+num_procs+5] = global_col_starts[i];
1244                }
1245             }
1246             else
1247             {
1248                global_data[3] = 0;
1249                global_size += (HYPRE_BigInt) ((num_procs + 1) + 1);
1250                for (i = 0; i < (num_procs + 1); i++)
1251                {
1252                   global_data[i+4] = global_row_starts[i];
1253                }
1254             }
1255          }
1256          else
1257          {
1258             global_data[3] = 1;
1259             global_size += (HYPRE_BigInt) ((num_procs + 1) + 1);
1260             for (i = 0; i < (num_procs + 1); i++)
1261             {
1262                global_data[i+4] = global_row_starts[i];
1263             }
1264          }
1265       }
1266       else
1267       {
1268          if (global_col_starts)
1269          {
1270             global_data[3] = 3;
1271             global_size += (HYPRE_BigInt) ((num_procs + 1) + 1);
1272             for (i = 0; i < (num_procs + 1); i++)
1273             {
1274                global_data[i+4] = global_col_starts[i];
1275             }
1276          }
1277       }
1278 
1279       global_data[0] = (HYPRE_BigInt) hypre_CSRMatrixNumRows(A);
1280       global_data[1] = (HYPRE_BigInt) hypre_CSRMatrixNumCols(A);
1281       global_data[2] = global_size;
1282       A_data = hypre_CSRMatrixData(A);
1283       A_i = hypre_CSRMatrixI(A);
1284       A_j = hypre_CSRMatrixJ(A);
1285    }
1286    hypre_MPI_Bcast(global_data, 3, HYPRE_MPI_BIG_INT, 0, comm);
1287    global_num_rows = global_data[0];
1288    global_num_cols = global_data[1];
1289    global_size     = global_data[2];
1290 
1291    if (global_size > 3)
1292    {
1293       HYPRE_Int  send_start;
1294 
1295       if (global_data[3] == 2)
1296       {
1297          row_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1298          col_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1299 
1300          send_start = 4;
1301          hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1302                            &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm);
1303 
1304          send_start = 5;
1305          hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1306                            &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm);
1307 
1308          send_start = 4 + (num_procs + 1);
1309          hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1310                            &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm);
1311 
1312          send_start = 5 + (num_procs + 1);
1313          hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1314                            &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm);
1315       }
1316       else if ((global_data[3] == 0) || (global_data[3] == 1))
1317       {
1318          row_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1319 
1320          send_start = 4;
1321          hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1322                            &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm);
1323 
1324          send_start = 5;
1325          hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1326                            &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm);
1327 
1328          if (global_data[3] == 0)
1329          {
1330             col_starts = row_starts;
1331          }
1332       }
1333       else
1334       {
1335          col_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1336 
1337          send_start = 4;
1338          hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1339                            &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm);
1340 
1341          send_start = 5;
1342          hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1343                            &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm);
1344       }
1345    }
1346    hypre_TFree(global_data, HYPRE_MEMORY_HOST);
1347 
1348    // Create ParCSR matrix
1349    parcsr_A = hypre_ParCSRMatrixCreate(comm, global_num_rows, global_num_cols,
1350                                        row_starts, col_starts, 0, 0, 0);
1351    hypre_TFree(row_starts, HYPRE_MEMORY_HOST);
1352    hypre_TFree(col_starts, HYPRE_MEMORY_HOST);
1353 
1354    // Allocate memory for building ParCSR matrix
1355    num_rows_proc     = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST);
1356    num_nonzeros_proc = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST);
1357 
1358    if (my_id == 0)
1359    {
1360       if (!global_row_starts)
1361       {
1362          hypre_GeneratePartitioning(global_num_rows, num_procs, &global_row_starts);
1363          free_global_row_starts = 1;
1364       }
1365       if (!global_col_starts)
1366       {
1367          hypre_GeneratePartitioning(global_num_rows, num_procs, &global_col_starts);
1368          free_global_col_starts = 1;
1369       }
1370 
1371       for (i = 0; i < num_procs; i++)
1372       {
1373          num_rows_proc[i] = (HYPRE_Int) (global_row_starts[i+1] - global_row_starts[i]);
1374          num_nonzeros_proc[i] = A_i[(HYPRE_Int)global_row_starts[i+1]] -
1375                                 A_i[(HYPRE_Int)global_row_starts[i]];
1376       }
1377       //num_nonzeros_proc[num_procs-1] = A_i[(HYPRE_Int)global_num_rows] - A_i[(HYPRE_Int)row_starts[num_procs-1]];
1378    }
1379    hypre_MPI_Scatter(num_rows_proc, 1, HYPRE_MPI_INT, &num_rows, 1, HYPRE_MPI_INT, 0, comm);
1380    hypre_MPI_Scatter(num_nonzeros_proc, 1, HYPRE_MPI_INT, &num_nonzeros, 1, HYPRE_MPI_INT, 0, comm);
1381 
1382    /* RL: this is not correct: (HYPRE_Int) global_num_cols */
1383    local_A = hypre_CSRMatrixCreate(num_rows, (HYPRE_Int) global_num_cols, num_nonzeros);
1384 
1385    csr_matrix_datatypes = hypre_CTAlloc(hypre_MPI_Datatype,  num_procs, HYPRE_MEMORY_HOST);
1386    if (my_id == 0)
1387    {
1388       requests = hypre_CTAlloc(hypre_MPI_Request, num_procs-1, HYPRE_MEMORY_HOST);
1389       status = hypre_CTAlloc(hypre_MPI_Status, num_procs-1, HYPRE_MEMORY_HOST);
1390       for (i = 1; i < num_procs; i++)
1391       {
1392          ind = A_i[(HYPRE_Int) global_row_starts[i]];
1393 
1394          hypre_BuildCSRMatrixMPIDataType(num_nonzeros_proc[i],
1395                                          num_rows_proc[i],
1396                                          &A_data[ind],
1397                                          &A_i[(HYPRE_Int) global_row_starts[i]],
1398                                          &A_j[ind],
1399                                          &csr_matrix_datatypes[i]);
1400          hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, comm,
1401                          &requests[i-1]);
1402          hypre_MPI_Type_free(&csr_matrix_datatypes[i]);
1403       }
1404       hypre_CSRMatrixData(local_A) = A_data;
1405       hypre_CSRMatrixI(local_A) = A_i;
1406       hypre_CSRMatrixJ(local_A) = A_j;
1407       hypre_CSRMatrixOwnsData(local_A) = 0;
1408 
1409       hypre_MPI_Waitall(num_procs-1, requests, status);
1410 
1411       hypre_TFree(requests, HYPRE_MEMORY_HOST);
1412       hypre_TFree(status, HYPRE_MEMORY_HOST);
1413       hypre_TFree(num_rows_proc, HYPRE_MEMORY_HOST);
1414       hypre_TFree(num_nonzeros_proc, HYPRE_MEMORY_HOST);
1415 
1416       if (free_global_row_starts)
1417       {
1418          hypre_TFree(global_row_starts, HYPRE_MEMORY_HOST);
1419       }
1420       if (free_global_col_starts)
1421       {
1422          hypre_TFree(global_col_starts, HYPRE_MEMORY_HOST);
1423       }
1424    }
1425    else
1426    {
1427       hypre_CSRMatrixInitialize(local_A);
1428       hypre_BuildCSRMatrixMPIDataType(num_nonzeros,
1429                                       num_rows,
1430                                       hypre_CSRMatrixData(local_A),
1431                                       hypre_CSRMatrixI(local_A),
1432                                       hypre_CSRMatrixJ(local_A),
1433                                       &csr_matrix_datatypes[0]);
1434       hypre_MPI_Recv(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[0], 0, 0, comm, &status0);
1435       hypre_MPI_Type_free(csr_matrix_datatypes);
1436    }
1437 
1438    first_col_diag = hypre_ParCSRMatrixFirstColDiag(parcsr_A);
1439    last_col_diag  = hypre_ParCSRMatrixLastColDiag(parcsr_A);
1440 
1441    GenerateDiagAndOffd(local_A, parcsr_A, first_col_diag, last_col_diag);
1442 
1443    /* set pointers back to NULL before destroying */
1444    if (my_id == 0)
1445    {
1446       hypre_CSRMatrixData(local_A) = NULL;
1447       hypre_CSRMatrixI(local_A) = NULL;
1448       hypre_CSRMatrixJ(local_A) = NULL;
1449    }
1450    hypre_CSRMatrixDestroy(local_A);
1451    hypre_TFree(csr_matrix_datatypes, HYPRE_MEMORY_HOST);
1452 
1453    return parcsr_A;
1454 }
1455 
1456 /* RL: XXX this is not a scalable routine, see `marker' therein */
1457 HYPRE_Int
GenerateDiagAndOffd(hypre_CSRMatrix * A,hypre_ParCSRMatrix * matrix,HYPRE_BigInt first_col_diag,HYPRE_BigInt last_col_diag)1458 GenerateDiagAndOffd(hypre_CSRMatrix    *A,
1459                     hypre_ParCSRMatrix *matrix,
1460                     HYPRE_BigInt        first_col_diag,
1461                     HYPRE_BigInt        last_col_diag)
1462 {
1463    HYPRE_Int  i, j;
1464    HYPRE_Int  jo, jd;
1465    HYPRE_Int  num_rows = hypre_CSRMatrixNumRows(A);
1466    HYPRE_Int  num_cols = hypre_CSRMatrixNumCols(A);
1467    HYPRE_Complex *a_data = hypre_CSRMatrixData(A);
1468    HYPRE_Int *a_i = hypre_CSRMatrixI(A);
1469    /*RL: XXX FIXME if A spans global column space, the following a_j should be bigJ */
1470    HYPRE_Int *a_j = hypre_CSRMatrixJ(A);
1471 
1472    hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix);
1473    hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix);
1474 
1475    HYPRE_BigInt  *col_map_offd;
1476 
1477    HYPRE_Complex *diag_data, *offd_data;
1478    HYPRE_Int  *diag_i, *offd_i;
1479    HYPRE_Int  *diag_j, *offd_j;
1480    HYPRE_Int  *marker;
1481    HYPRE_Int num_cols_diag, num_cols_offd;
1482    HYPRE_Int first_elmt = a_i[0];
1483    HYPRE_Int num_nonzeros = a_i[num_rows]-first_elmt;
1484    HYPRE_Int counter;
1485 
1486    num_cols_diag = (HYPRE_Int)(last_col_diag - first_col_diag +1);
1487    num_cols_offd = 0;
1488 
1489    HYPRE_MemoryLocation memory_location = hypre_CSRMatrixMemoryLocation(A);
1490 
1491    if (num_cols - num_cols_diag)
1492    {
1493       hypre_CSRMatrixInitialize_v2(diag, 0, memory_location);
1494       diag_i = hypre_CSRMatrixI(diag);
1495 
1496       hypre_CSRMatrixInitialize_v2(offd, 0, memory_location);
1497       offd_i = hypre_CSRMatrixI(offd);
1498       marker = hypre_CTAlloc(HYPRE_Int, num_cols, HYPRE_MEMORY_HOST);
1499 
1500       for (i=0; i < num_cols; i++)
1501       {
1502          marker[i] = 0;
1503       }
1504 
1505       jo = 0;
1506       jd = 0;
1507       for (i = 0; i < num_rows; i++)
1508       {
1509          offd_i[i] = jo;
1510          diag_i[i] = jd;
1511 
1512          for (j = a_i[i]-first_elmt; j < a_i[i+1]-first_elmt; j++)
1513          {
1514             if (a_j[j] < first_col_diag || a_j[j] > last_col_diag)
1515             {
1516                if (!marker[a_j[j]])
1517                {
1518                   marker[a_j[j]] = 1;
1519                   num_cols_offd++;
1520                }
1521                jo++;
1522             }
1523             else
1524             {
1525                jd++;
1526             }
1527          }
1528       }
1529       offd_i[num_rows] = jo;
1530       diag_i[num_rows] = jd;
1531 
1532       hypre_ParCSRMatrixColMapOffd(matrix) = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd, HYPRE_MEMORY_HOST);
1533       col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix);
1534 
1535       counter = 0;
1536       for (i = 0; i < num_cols; i++)
1537       {
1538          if (marker[i])
1539          {
1540             col_map_offd[counter] = (HYPRE_BigInt) i;
1541             marker[i] = counter;
1542             counter++;
1543          }
1544       }
1545 
1546       hypre_CSRMatrixNumNonzeros(diag) = jd;
1547       hypre_CSRMatrixInitialize(diag);
1548       diag_data = hypre_CSRMatrixData(diag);
1549       diag_j = hypre_CSRMatrixJ(diag);
1550 
1551       hypre_CSRMatrixNumNonzeros(offd) = jo;
1552       hypre_CSRMatrixNumCols(offd) = num_cols_offd;
1553       hypre_CSRMatrixInitialize(offd);
1554       offd_data = hypre_CSRMatrixData(offd);
1555       offd_j = hypre_CSRMatrixJ(offd);
1556 
1557       jo = 0;
1558       jd = 0;
1559       for (i=0; i < num_rows; i++)
1560       {
1561          for (j=a_i[i]-first_elmt; j < a_i[i+1]-first_elmt; j++)
1562          {
1563             if (a_j[j] < (HYPRE_Int)first_col_diag || a_j[j] > (HYPRE_Int)last_col_diag)
1564             {
1565                offd_data[jo] = a_data[j];
1566                offd_j[jo++] = marker[a_j[j]];
1567             }
1568             else
1569             {
1570                diag_data[jd] = a_data[j];
1571                diag_j[jd++] = (HYPRE_Int)(a_j[j]-first_col_diag);
1572             }
1573          }
1574       }
1575       hypre_TFree(marker, HYPRE_MEMORY_HOST);
1576    }
1577    else
1578    {
1579       hypre_CSRMatrixNumNonzeros(diag) = num_nonzeros;
1580       hypre_CSRMatrixInitialize(diag);
1581       diag_data = hypre_CSRMatrixData(diag);
1582       diag_i = hypre_CSRMatrixI(diag);
1583       diag_j = hypre_CSRMatrixJ(diag);
1584 
1585       for (i=0; i < num_nonzeros; i++)
1586       {
1587          diag_data[i] = a_data[i];
1588          diag_j[i] = a_j[i];
1589       }
1590       offd_i = hypre_CTAlloc(HYPRE_Int,  num_rows+1, HYPRE_MEMORY_HOST);
1591 
1592       for (i=0; i < num_rows+1; i++)
1593       {
1594          diag_i[i] = a_i[i];
1595          offd_i[i] = 0;
1596       }
1597 
1598       hypre_CSRMatrixNumCols(offd) = 0;
1599       hypre_CSRMatrixI(offd) = offd_i;
1600    }
1601 
1602    return hypre_error_flag;
1603 }
1604 
1605 hypre_CSRMatrix *
hypre_MergeDiagAndOffd(hypre_ParCSRMatrix * par_matrix)1606 hypre_MergeDiagAndOffd(hypre_ParCSRMatrix *par_matrix)
1607 {
1608    hypre_CSRMatrix  *diag = hypre_ParCSRMatrixDiag(par_matrix);
1609    hypre_CSRMatrix  *offd = hypre_ParCSRMatrixOffd(par_matrix);
1610    hypre_CSRMatrix  *matrix;
1611 
1612    HYPRE_BigInt       num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix);
1613    HYPRE_BigInt       first_col_diag = hypre_ParCSRMatrixFirstColDiag(par_matrix);
1614    HYPRE_BigInt      *col_map_offd = hypre_ParCSRMatrixColMapOffd(par_matrix);
1615    HYPRE_Int          num_rows = hypre_CSRMatrixNumRows(diag);
1616 
1617    HYPRE_Int          *diag_i = hypre_CSRMatrixI(diag);
1618    HYPRE_Int          *diag_j = hypre_CSRMatrixJ(diag);
1619    HYPRE_Complex      *diag_data = hypre_CSRMatrixData(diag);
1620    HYPRE_Int          *offd_i = hypre_CSRMatrixI(offd);
1621    HYPRE_Int          *offd_j = hypre_CSRMatrixJ(offd);
1622    HYPRE_Complex      *offd_data = hypre_CSRMatrixData(offd);
1623 
1624    HYPRE_Int          *matrix_i;
1625    HYPRE_BigInt       *matrix_j;
1626    HYPRE_Complex      *matrix_data;
1627 
1628    HYPRE_Int          num_nonzeros, i, j;
1629    HYPRE_Int          count;
1630    HYPRE_Int          size, rest, num_threads, ii;
1631 
1632    HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(par_matrix);
1633 
1634    num_nonzeros = diag_i[num_rows] + offd_i[num_rows];
1635 
1636    matrix = hypre_CSRMatrixCreate(num_rows,num_cols,num_nonzeros);
1637    hypre_CSRMatrixMemoryLocation(matrix) = memory_location;
1638    hypre_CSRMatrixBigInitialize(matrix);
1639 
1640    matrix_i = hypre_CSRMatrixI(matrix);
1641    matrix_j = hypre_CSRMatrixBigJ(matrix);
1642    matrix_data = hypre_CSRMatrixData(matrix);
1643    num_threads = hypre_NumThreads();
1644    size = num_rows/num_threads;
1645    rest = num_rows - size*num_threads;
1646 
1647 #ifdef HYPRE_USING_OPENMP
1648 #pragma omp parallel for private(ii, i, j, count) HYPRE_SMP_SCHEDULE
1649 #endif
1650    for (ii = 0; ii < num_threads; ii++)
1651    {
1652       HYPRE_Int ns, ne;
1653       if (ii < rest)
1654       {
1655          ns = ii*size+ii;
1656          ne = (ii+1)*size+ii+1;
1657       }
1658       else
1659       {
1660          ns = ii*size+rest;
1661          ne = (ii+1)*size+rest;
1662       }
1663       count = diag_i[ns]+offd_i[ns];;
1664       for (i = ns; i < ne; i++)
1665       {
1666          matrix_i[i] = count;
1667          for (j=diag_i[i]; j < diag_i[i+1]; j++)
1668          {
1669             matrix_data[count] = diag_data[j];
1670             matrix_j[count++] = (HYPRE_BigInt)diag_j[j]+first_col_diag;
1671          }
1672          for (j=offd_i[i]; j < offd_i[i+1]; j++)
1673          {
1674             matrix_data[count] = offd_data[j];
1675             matrix_j[count++] = col_map_offd[offd_j[j]];
1676          }
1677       }
1678    } /* end parallel region */
1679 
1680    matrix_i[num_rows] = num_nonzeros;
1681 
1682    return matrix;
1683 }
1684 
1685 /*--------------------------------------------------------------------------
1686  * hypre_ParCSRMatrixToCSRMatrixAll:
1687  * generates a CSRMatrix from a ParCSRMatrix on all processors that have
1688  * parts of the ParCSRMatrix
1689  * Warning: this only works for a ParCSRMatrix that is smaller than 2^31-1
1690  *--------------------------------------------------------------------------*/
1691 
1692 hypre_CSRMatrix *
hypre_ParCSRMatrixToCSRMatrixAll(hypre_ParCSRMatrix * par_matrix)1693 hypre_ParCSRMatrixToCSRMatrixAll(hypre_ParCSRMatrix *par_matrix)
1694 {
1695    MPI_Comm comm = hypre_ParCSRMatrixComm(par_matrix);
1696    hypre_CSRMatrix *matrix;
1697    hypre_CSRMatrix *local_matrix;
1698    HYPRE_Int num_rows = (HYPRE_Int)hypre_ParCSRMatrixGlobalNumRows(par_matrix);
1699    HYPRE_Int num_cols = (HYPRE_Int)hypre_ParCSRMatrixGlobalNumCols(par_matrix);
1700    HYPRE_Int *matrix_i;
1701    HYPRE_Int *matrix_j;
1702    HYPRE_Complex *matrix_data;
1703 
1704    HYPRE_Int *local_matrix_i;
1705    HYPRE_Int *local_matrix_j;
1706    HYPRE_Complex *local_matrix_data;
1707 
1708    HYPRE_Int i, j;
1709    HYPRE_Int local_num_rows;
1710    HYPRE_Int local_num_nonzeros;
1711    HYPRE_Int num_nonzeros;
1712    HYPRE_Int num_data;
1713    HYPRE_Int num_requests;
1714    HYPRE_Int vec_len, offset;
1715    HYPRE_Int start_index;
1716    HYPRE_Int proc_id;
1717    HYPRE_Int num_procs, my_id;
1718    HYPRE_Int num_types;
1719    HYPRE_Int *used_procs;
1720 
1721    hypre_MPI_Request *requests;
1722    hypre_MPI_Status *status;
1723 
1724    HYPRE_Int *new_vec_starts;
1725 
1726    HYPRE_Int num_contacts;
1727    HYPRE_Int contact_proc_list[1];
1728    HYPRE_Int contact_send_buf[1];
1729    HYPRE_Int contact_send_buf_starts[2];
1730    HYPRE_Int max_response_size;
1731    HYPRE_Int *response_recv_buf=NULL;
1732    HYPRE_Int *response_recv_buf_starts = NULL;
1733    hypre_DataExchangeResponse response_obj;
1734    hypre_ProcListElements send_proc_obj;
1735 
1736    HYPRE_Int *send_info = NULL;
1737    hypre_MPI_Status  status1;
1738    HYPRE_Int count, tag1 = 11112, tag2 = 22223, tag3 = 33334;
1739    HYPRE_Int start;
1740 
1741    hypre_MPI_Comm_size(comm, &num_procs);
1742    hypre_MPI_Comm_rank(comm, &my_id);
1743 
1744    local_num_rows = (HYPRE_Int)(hypre_ParCSRMatrixLastRowIndex(par_matrix)  -
1745       hypre_ParCSRMatrixFirstRowIndex(par_matrix) + 1);
1746 
1747 
1748    local_matrix = hypre_MergeDiagAndOffd(par_matrix); /* creates matrix */
1749    hypre_CSRMatrixBigJtoJ(local_matrix); /* copies big_j to j */
1750    local_matrix_i = hypre_CSRMatrixI(local_matrix);
1751    local_matrix_j = hypre_CSRMatrixJ(local_matrix);
1752    local_matrix_data = hypre_CSRMatrixData(local_matrix);
1753 
1754 
1755    /* determine procs that have vector data and store their ids in used_procs */
1756    /* we need to do an exchange data for this.  If I own row then I will contact
1757       processor 0 with the endpoint of my local range */
1758 
1759    if (local_num_rows > 0)
1760    {
1761       num_contacts = 1;
1762       contact_proc_list[0] = 0;
1763       contact_send_buf[0] =  (HYPRE_Int)hypre_ParCSRMatrixLastRowIndex(par_matrix);
1764       contact_send_buf_starts[0] = 0;
1765       contact_send_buf_starts[1] = 1;
1766 
1767    }
1768    else
1769    {
1770       num_contacts = 0;
1771       contact_send_buf_starts[0] = 0;
1772       contact_send_buf_starts[1] = 0;
1773    }
1774    /*build the response object*/
1775    /*send_proc_obj will  be for saving info from contacts */
1776    send_proc_obj.length = 0;
1777    send_proc_obj.storage_length = 10;
1778    send_proc_obj.id = hypre_CTAlloc(HYPRE_Int,  send_proc_obj.storage_length, HYPRE_MEMORY_HOST);
1779    send_proc_obj.vec_starts =
1780       hypre_CTAlloc(HYPRE_Int,  send_proc_obj.storage_length + 1, HYPRE_MEMORY_HOST);
1781    send_proc_obj.vec_starts[0] = 0;
1782    send_proc_obj.element_storage_length = 10;
1783    send_proc_obj.elements =
1784       hypre_CTAlloc(HYPRE_BigInt,  send_proc_obj.element_storage_length, HYPRE_MEMORY_HOST);
1785 
1786    max_response_size = 0; /* each response is null */
1787    response_obj.fill_response = hypre_FillResponseParToCSRMatrix;
1788    response_obj.data1 = NULL;
1789    response_obj.data2 = &send_proc_obj; /*this is where we keep info from contacts*/
1790 
1791 
1792    hypre_DataExchangeList(num_contacts,
1793                           contact_proc_list, contact_send_buf,
1794                           contact_send_buf_starts, sizeof(HYPRE_Int),
1795                           sizeof(HYPRE_Int), &response_obj,
1796                           max_response_size, 1,
1797                           comm, (void**) &response_recv_buf,
1798                           &response_recv_buf_starts);
1799 
1800    /* now processor 0 should have a list of ranges for processors that have rows -
1801       these are in send_proc_obj - it needs to create the new list of processors
1802       and also an array of vec starts - and send to those who own row*/
1803    if (my_id)
1804    {
1805       if (local_num_rows)
1806       {
1807          /* look for a message from processor 0 */
1808          hypre_MPI_Probe(0, tag1, comm, &status1);
1809          hypre_MPI_Get_count(&status1, HYPRE_MPI_INT, &count);
1810 
1811          send_info = hypre_CTAlloc(HYPRE_Int,  count, HYPRE_MEMORY_HOST);
1812          hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, comm, &status1);
1813 
1814          /* now unpack */
1815          num_types = send_info[0];
1816          used_procs =  hypre_CTAlloc(HYPRE_Int,  num_types, HYPRE_MEMORY_HOST);
1817          new_vec_starts = hypre_CTAlloc(HYPRE_Int,  num_types+1, HYPRE_MEMORY_HOST);
1818 
1819          for (i=1; i<= num_types; i++)
1820          {
1821             used_procs[i-1] = send_info[i];
1822          }
1823          for (i=num_types+1; i< count; i++)
1824          {
1825             new_vec_starts[i-num_types-1] = send_info[i] ;
1826          }
1827       }
1828       else /* clean up and exit */
1829       {
1830          hypre_TFree(send_proc_obj.vec_starts, HYPRE_MEMORY_HOST);
1831          hypre_TFree(send_proc_obj.id, HYPRE_MEMORY_HOST);
1832          hypre_TFree(send_proc_obj.elements, HYPRE_MEMORY_HOST);
1833          if(response_recv_buf)        hypre_TFree(response_recv_buf, HYPRE_MEMORY_HOST);
1834          if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts, HYPRE_MEMORY_HOST);
1835 
1836 
1837          if (hypre_CSRMatrixOwnsData(local_matrix))
1838             hypre_CSRMatrixDestroy(local_matrix);
1839          else
1840             hypre_TFree(local_matrix, HYPRE_MEMORY_HOST);
1841 
1842          return NULL;
1843       }
1844    }
1845    else /* my_id ==0 */
1846    {
1847       num_types = send_proc_obj.length;
1848       used_procs =  hypre_CTAlloc(HYPRE_Int,  num_types, HYPRE_MEMORY_HOST);
1849       new_vec_starts = hypre_CTAlloc(HYPRE_Int,  num_types+1, HYPRE_MEMORY_HOST);
1850 
1851       new_vec_starts[0] = 0;
1852       for (i=0; i< num_types; i++)
1853       {
1854          used_procs[i] = send_proc_obj.id[i];
1855          new_vec_starts[i+1] = send_proc_obj.elements[i]+1;
1856       }
1857       hypre_qsort0(used_procs, 0, num_types-1);
1858       hypre_qsort0(new_vec_starts, 0, num_types);
1859       /*now we need to put into an array to send */
1860       count =  2*num_types+2;
1861       send_info = hypre_CTAlloc(HYPRE_Int,  count, HYPRE_MEMORY_HOST);
1862       send_info[0] = num_types;
1863       for (i=1; i<= num_types; i++)
1864       {
1865          send_info[i] = (HYPRE_BigInt)used_procs[i-1];
1866       }
1867       for (i=num_types+1; i< count; i++)
1868       {
1869          send_info[i] = new_vec_starts[i-num_types-1];
1870       }
1871       requests = hypre_CTAlloc(hypre_MPI_Request,  num_types, HYPRE_MEMORY_HOST);
1872       status =  hypre_CTAlloc(hypre_MPI_Status,  num_types, HYPRE_MEMORY_HOST);
1873 
1874       /* don't send to myself  - these are sorted so my id would be first*/
1875       start = 0;
1876       if (num_types && used_procs[0] == 0)
1877       {
1878          start = 1;
1879       }
1880 
1881       for (i=start; i < num_types; i++)
1882       {
1883          hypre_MPI_Isend(send_info, count, HYPRE_MPI_INT, used_procs[i], tag1,
1884                          comm, &requests[i-start]);
1885       }
1886       hypre_MPI_Waitall(num_types-start, requests, status);
1887 
1888       hypre_TFree(status, HYPRE_MEMORY_HOST);
1889       hypre_TFree(requests, HYPRE_MEMORY_HOST);
1890    }
1891    /* clean up */
1892    hypre_TFree(send_proc_obj.vec_starts, HYPRE_MEMORY_HOST);
1893    hypre_TFree(send_proc_obj.id, HYPRE_MEMORY_HOST);
1894    hypre_TFree(send_proc_obj.elements, HYPRE_MEMORY_HOST);
1895    hypre_TFree(send_info, HYPRE_MEMORY_HOST);
1896    if(response_recv_buf)        hypre_TFree(response_recv_buf, HYPRE_MEMORY_HOST);
1897    if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts, HYPRE_MEMORY_HOST);
1898 
1899    /* now proc 0 can exit if it has no rows */
1900    if (!local_num_rows)
1901    {
1902       if (hypre_CSRMatrixOwnsData(local_matrix))
1903          hypre_CSRMatrixDestroy(local_matrix);
1904       else
1905          hypre_TFree(local_matrix, HYPRE_MEMORY_HOST);
1906 
1907       hypre_TFree(new_vec_starts, HYPRE_MEMORY_HOST);
1908       hypre_TFree(used_procs, HYPRE_MEMORY_HOST);
1909 
1910       return NULL;
1911    }
1912 
1913    /* everyone left has rows and knows: new_vec_starts, num_types, and used_procs */
1914 
1915    /* this matrix should be rather small */
1916    matrix_i = hypre_CTAlloc(HYPRE_Int,  num_rows+1, HYPRE_MEMORY_HOST);
1917 
1918    num_requests = 4*num_types;
1919    requests = hypre_CTAlloc(hypre_MPI_Request,  num_requests, HYPRE_MEMORY_HOST);
1920    status = hypre_CTAlloc(hypre_MPI_Status,  num_requests, HYPRE_MEMORY_HOST);
1921 
1922    /* exchange contents of local_matrix_i - here we are sending to ourself also*/
1923 
1924    j = 0;
1925    for (i = 0; i < num_types; i++)
1926    {
1927       proc_id = used_procs[i];
1928       vec_len = (HYPRE_Int)(new_vec_starts[i+1] - new_vec_starts[i]);
1929       hypre_MPI_Irecv(&matrix_i[new_vec_starts[i]+1], vec_len, HYPRE_MPI_INT,
1930                       proc_id, tag2, comm, &requests[j++]);
1931    }
1932    for (i = 0; i < num_types; i++)
1933    {
1934       proc_id = used_procs[i];
1935       hypre_MPI_Isend(&local_matrix_i[1], local_num_rows, HYPRE_MPI_INT,
1936                       proc_id, tag2, comm, &requests[j++]);
1937    }
1938 
1939    hypre_MPI_Waitall(j, requests, status);
1940 
1941    /* generate matrix_i from received data */
1942    /* global numbering?*/
1943    offset = matrix_i[new_vec_starts[1]];
1944    for (i=1; i < num_types; i++)
1945    {
1946       for (j = new_vec_starts[i]; j < new_vec_starts[i+1]; j++)
1947          matrix_i[j+1] += offset;
1948       offset = matrix_i[new_vec_starts[i+1]];
1949    }
1950 
1951    num_nonzeros = matrix_i[num_rows];
1952 
1953    matrix = hypre_CSRMatrixCreate(num_rows, num_cols, num_nonzeros);
1954 
1955    hypre_CSRMatrixMemoryLocation(matrix) = HYPRE_MEMORY_HOST;
1956 
1957    hypre_CSRMatrixI(matrix) = matrix_i;
1958    hypre_CSRMatrixInitialize(matrix);
1959    matrix_j = hypre_CSRMatrixJ(matrix);
1960    matrix_data = hypre_CSRMatrixData(matrix);
1961 
1962    /* generate datatypes for further data exchange and exchange remaining
1963       data, i.e. column info and actual data */
1964 
1965    j = 0;
1966    for (i = 0; i < num_types; i++)
1967    {
1968       proc_id = used_procs[i];
1969       start_index = matrix_i[(HYPRE_Int)new_vec_starts[i]];
1970       num_data = matrix_i[(HYPRE_Int)new_vec_starts[i+1]] - start_index;
1971       hypre_MPI_Irecv(&matrix_data[start_index], num_data, HYPRE_MPI_COMPLEX,
1972                       used_procs[i], tag1, comm, &requests[j++]);
1973       hypre_MPI_Irecv(&matrix_j[start_index], num_data, HYPRE_MPI_INT,
1974                       used_procs[i], tag3, comm, &requests[j++]);
1975    }
1976    local_num_nonzeros = local_matrix_i[local_num_rows];
1977    for (i=0; i < num_types; i++)
1978    {
1979       hypre_MPI_Isend(local_matrix_data, local_num_nonzeros, HYPRE_MPI_COMPLEX,
1980                       used_procs[i], tag1, comm, &requests[j++]);
1981       hypre_MPI_Isend(local_matrix_j, local_num_nonzeros, HYPRE_MPI_INT,
1982                       used_procs[i], tag3, comm, &requests[j++]);
1983    }
1984 
1985 
1986    hypre_MPI_Waitall(num_requests, requests, status);
1987 
1988    hypre_TFree(new_vec_starts, HYPRE_MEMORY_HOST);
1989 
1990    if (hypre_CSRMatrixOwnsData(local_matrix))
1991       hypre_CSRMatrixDestroy(local_matrix);
1992    else
1993       hypre_TFree(local_matrix, HYPRE_MEMORY_HOST);
1994 
1995    if (num_requests)
1996    {
1997       hypre_TFree(requests, HYPRE_MEMORY_HOST);
1998       hypre_TFree(status, HYPRE_MEMORY_HOST);
1999       hypre_TFree(used_procs, HYPRE_MEMORY_HOST);
2000    }
2001 
2002    return matrix;
2003 }
2004 
2005 /*--------------------------------------------------------------------------
2006  * hypre_ParCSRMatrixCopy,
2007  * copies B to A,
2008  * if copy_data = 0, only the structure of A is copied to B
2009  * the routine does not check whether the dimensions of A and B are compatible
2010  *--------------------------------------------------------------------------*/
2011 
2012 HYPRE_Int
hypre_ParCSRMatrixCopy(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B,HYPRE_Int copy_data)2013 hypre_ParCSRMatrixCopy( hypre_ParCSRMatrix *A,
2014                         hypre_ParCSRMatrix *B,
2015                         HYPRE_Int copy_data )
2016 {
2017    hypre_CSRMatrix *A_diag;
2018    hypre_CSRMatrix *A_offd;
2019    HYPRE_BigInt *col_map_offd_A;
2020    hypre_CSRMatrix *B_diag;
2021    hypre_CSRMatrix *B_offd;
2022    HYPRE_BigInt *col_map_offd_B;
2023    HYPRE_Int num_cols_offd_A;
2024    HYPRE_Int num_cols_offd_B;
2025 
2026    if (!A)
2027    {
2028       hypre_error_in_arg(1);
2029       return hypre_error_flag;
2030    }
2031    if (!B)
2032    {
2033       hypre_error_in_arg(1);
2034       return hypre_error_flag;
2035    }
2036 
2037    A_diag = hypre_ParCSRMatrixDiag(A);
2038    A_offd = hypre_ParCSRMatrixOffd(A);
2039    B_diag = hypre_ParCSRMatrixDiag(B);
2040    B_offd = hypre_ParCSRMatrixOffd(B);
2041 
2042    num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
2043    num_cols_offd_B = hypre_CSRMatrixNumCols(B_offd);
2044 
2045    hypre_assert(num_cols_offd_A == num_cols_offd_B);
2046 
2047    col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
2048    col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B);
2049 
2050    hypre_CSRMatrixCopy(A_diag, B_diag, copy_data);
2051    hypre_CSRMatrixCopy(A_offd, B_offd, copy_data);
2052 
2053    /* should not happen if B has been initialized */
2054    if (num_cols_offd_B && col_map_offd_B == NULL)
2055    {
2056       col_map_offd_B = hypre_TAlloc(HYPRE_BigInt, num_cols_offd_B, HYPRE_MEMORY_HOST);
2057       hypre_ParCSRMatrixColMapOffd(B) = col_map_offd_B;
2058    }
2059 
2060    hypre_TMemcpy(col_map_offd_B, col_map_offd_A, HYPRE_BigInt, num_cols_offd_B,
2061                  HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
2062 
2063    return hypre_error_flag;
2064 }
2065 
2066 /*--------------------------------------------------------------------
2067  * hypre_FillResponseParToCSRMatrix
2068  * Fill response function for determining the send processors
2069  * data exchange
2070  *--------------------------------------------------------------------*/
2071 
2072 HYPRE_Int
hypre_FillResponseParToCSRMatrix(void * p_recv_contact_buf,HYPRE_Int contact_size,HYPRE_Int contact_proc,void * ro,MPI_Comm comm,void ** p_send_response_buf,HYPRE_Int * response_message_size)2073 hypre_FillResponseParToCSRMatrix( void       *p_recv_contact_buf,
2074                                   HYPRE_Int   contact_size,
2075                                   HYPRE_Int   contact_proc,
2076                                   void       *ro,
2077                                   MPI_Comm    comm,
2078                                   void      **p_send_response_buf,
2079                                   HYPRE_Int *response_message_size )
2080 {
2081    HYPRE_Int    myid;
2082    HYPRE_Int    i, index, count, elength;
2083 
2084    HYPRE_BigInt    *recv_contact_buf = (HYPRE_BigInt * ) p_recv_contact_buf;
2085 
2086    hypre_DataExchangeResponse  *response_obj = (hypre_DataExchangeResponse*)ro;
2087 
2088    hypre_ProcListElements      *send_proc_obj = (hypre_ProcListElements*)response_obj->data2;
2089 
2090    hypre_MPI_Comm_rank(comm, &myid );
2091 
2092    /*check to see if we need to allocate more space in send_proc_obj for ids*/
2093    if (send_proc_obj->length == send_proc_obj->storage_length)
2094    {
2095       send_proc_obj->storage_length +=10; /*add space for 10 more processors*/
2096       send_proc_obj->id = hypre_TReAlloc(send_proc_obj->id, HYPRE_Int,
2097                                          send_proc_obj->storage_length, HYPRE_MEMORY_HOST);
2098       send_proc_obj->vec_starts =
2099          hypre_TReAlloc(send_proc_obj->vec_starts, HYPRE_Int,
2100                         send_proc_obj->storage_length + 1, HYPRE_MEMORY_HOST);
2101    }
2102 
2103    /*initialize*/
2104    count = send_proc_obj->length;
2105    index = send_proc_obj->vec_starts[count]; /*this is the number of elements*/
2106 
2107    /*send proc*/
2108    send_proc_obj->id[count] = contact_proc;
2109 
2110    /*do we need more storage for the elements?*/
2111    if (send_proc_obj->element_storage_length < index + contact_size)
2112    {
2113       elength = hypre_max(contact_size, 10);
2114       elength += index;
2115       send_proc_obj->elements = hypre_TReAlloc(send_proc_obj->elements,
2116                                                HYPRE_BigInt,  elength, HYPRE_MEMORY_HOST);
2117       send_proc_obj->element_storage_length = elength;
2118    }
2119    /*populate send_proc_obj*/
2120    for (i=0; i< contact_size; i++)
2121    {
2122       send_proc_obj->elements[index++] = recv_contact_buf[i];
2123    }
2124    send_proc_obj->vec_starts[count+1] = index;
2125    send_proc_obj->length++;
2126 
2127    /*output - no message to return (confirmation) */
2128    *response_message_size = 0;
2129 
2130    return hypre_error_flag;
2131 }
2132 
2133 /*--------------------------------------------------------------------------
2134  * hypre_ParCSRMatrixUnion
2135  * Creates and returns a new matrix whose elements are the union of A and B.
2136  * Data is not copied, only structural information is created.
2137  * A and B must have the same communicator, numbers and distributions of rows
2138  * and columns (they can differ in which row-column pairs are nonzero, thus
2139  * in which columns are in a offd block)
2140  *--------------------------------------------------------------------------*/
2141 
hypre_ParCSRMatrixUnion(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B)2142 hypre_ParCSRMatrix * hypre_ParCSRMatrixUnion( hypre_ParCSRMatrix * A,
2143                                               hypre_ParCSRMatrix * B )
2144 {
2145    hypre_ParCSRMatrix *C;
2146    HYPRE_BigInt       *col_map_offd_C = NULL;
2147    HYPRE_Int           my_id, p;
2148    MPI_Comm            comm = hypre_ParCSRMatrixComm( A );
2149 
2150    hypre_MPI_Comm_rank(comm, &my_id);
2151 
2152    C = hypre_CTAlloc( hypre_ParCSRMatrix,  1 , HYPRE_MEMORY_HOST);
2153    hypre_ParCSRMatrixComm( C ) = hypre_ParCSRMatrixComm( A );
2154    hypre_ParCSRMatrixGlobalNumRows( C ) = hypre_ParCSRMatrixGlobalNumRows( A );
2155    hypre_ParCSRMatrixGlobalNumCols( C ) = hypre_ParCSRMatrixGlobalNumCols( A );
2156    hypre_ParCSRMatrixFirstRowIndex( C ) = hypre_ParCSRMatrixFirstRowIndex( A );
2157    hypre_assert( hypre_ParCSRMatrixFirstRowIndex( B )
2158                  == hypre_ParCSRMatrixFirstRowIndex( A ) );
2159    hypre_TMemcpy(hypre_ParCSRMatrixRowStarts(C), hypre_ParCSRMatrixRowStarts(A),
2160                  HYPRE_BigInt, 2, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
2161    hypre_TMemcpy(hypre_ParCSRMatrixColStarts(C), hypre_ParCSRMatrixColStarts(A),
2162                  HYPRE_BigInt, 2, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
2163    for (p = 0; p < 2; ++p)
2164       hypre_assert( hypre_ParCSRMatrixColStarts(A)[p]
2165                     == hypre_ParCSRMatrixColStarts(B)[p] );
2166    hypre_ParCSRMatrixFirstColDiag( C ) = hypre_ParCSRMatrixFirstColDiag( A );
2167    hypre_ParCSRMatrixLastRowIndex( C ) = hypre_ParCSRMatrixLastRowIndex( A );
2168    hypre_ParCSRMatrixLastColDiag( C ) = hypre_ParCSRMatrixLastColDiag( A );
2169 
2170    hypre_ParCSRMatrixDiag( C ) =
2171       hypre_CSRMatrixUnion( hypre_ParCSRMatrixDiag(A), hypre_ParCSRMatrixDiag(B),
2172                             0, 0, 0 );
2173    hypre_ParCSRMatrixOffd( C ) =
2174       hypre_CSRMatrixUnion( hypre_ParCSRMatrixOffd(A), hypre_ParCSRMatrixOffd(B),
2175                             hypre_ParCSRMatrixColMapOffd(A),
2176                             hypre_ParCSRMatrixColMapOffd(B), &col_map_offd_C );
2177    hypre_ParCSRMatrixColMapOffd( C ) = col_map_offd_C;
2178    hypre_ParCSRMatrixCommPkg( C ) = NULL;
2179    hypre_ParCSRMatrixCommPkgT( C ) = NULL;
2180    hypre_ParCSRMatrixOwnsData( C ) = 1;
2181    /*  SetNumNonzeros, SetDNumNonzeros are global, need hypre_MPI_Allreduce.
2182        I suspect, but don't know, that other parts of hypre do not assume that
2183        the correct values have been set.
2184        hypre_ParCSRMatrixSetNumNonzeros( C );
2185        hypre_ParCSRMatrixSetDNumNonzeros( C );*/
2186    hypre_ParCSRMatrixNumNonzeros( C ) = 0;
2187    hypre_ParCSRMatrixDNumNonzeros( C ) = 0.0;
2188    hypre_ParCSRMatrixRowindices( C ) = NULL;
2189    hypre_ParCSRMatrixRowvalues( C ) = NULL;
2190    hypre_ParCSRMatrixGetrowactive( C ) = 0;
2191 
2192    return C;
2193 }
2194 
2195 /* Perform dual truncation of ParCSR matrix.
2196  * This code is adapted from original BoomerAMGInterpTruncate()
2197  * A: parCSR matrix to be modified
2198  * tol: relative tolerance or truncation factor for dropping small terms
2199  * max_row_elmts: maximum number of (largest) nonzero elements to keep.
2200  * rescale: Boolean on whether or not to scale resulting matrix. Scaling for
2201  * each row satisfies: sum(nonzero values before dropping)/ sum(nonzero values after dropping),
2202  * this way, the application of the truncated matrix on a constant vector is the same as that of
2203  * the original matrix.
2204  * nrm_type: type of norm used for dropping with tol.
2205  * -- 0 = infinity-norm
2206  * -- 1 = 1-norm
2207  * -- 2 = 2-norm
2208 */
2209 HYPRE_Int
hypre_ParCSRMatrixTruncate(hypre_ParCSRMatrix * A,HYPRE_Real tol,HYPRE_Int max_row_elmts,HYPRE_Int rescale,HYPRE_Int nrm_type)2210 hypre_ParCSRMatrixTruncate(hypre_ParCSRMatrix *A,
2211                            HYPRE_Real          tol,
2212                            HYPRE_Int           max_row_elmts,
2213                            HYPRE_Int           rescale,
2214                            HYPRE_Int           nrm_type)
2215 {
2216 #ifdef HYPRE_PROFILE
2217    hypre_profile_times[HYPRE_TIMER_ID_INTERP_TRUNC] -= hypre_MPI_Wtime();
2218 #endif
2219 
2220    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2221    HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
2222    HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
2223    HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
2224    HYPRE_Int *A_diag_j_new;
2225    HYPRE_Real *A_diag_data_new;
2226 
2227    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
2228    HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
2229    HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
2230    HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
2231    HYPRE_Int *A_offd_j_new;
2232    HYPRE_Real *A_offd_data_new;
2233 
2234    HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
2235    HYPRE_Int num_cols = hypre_CSRMatrixNumCols(A_diag);
2236    HYPRE_Int i, j, start_j;
2237    HYPRE_Int ierr = 0;
2238    HYPRE_Int next_open;
2239    HYPRE_Int now_checking;
2240    HYPRE_Int num_lost;
2241    HYPRE_Int num_lost_global=0;
2242    HYPRE_Int next_open_offd;
2243    HYPRE_Int now_checking_offd;
2244    HYPRE_Int num_lost_offd;
2245    HYPRE_Int num_lost_global_offd;
2246    HYPRE_Int A_diag_size;
2247    HYPRE_Int A_offd_size;
2248    HYPRE_Int num_elmts;
2249    HYPRE_Int cnt, cnt_diag, cnt_offd;
2250    HYPRE_Real row_nrm;
2251    HYPRE_Real drop_coeff;
2252    HYPRE_Real row_sum;
2253    HYPRE_Real scale;
2254 
2255    HYPRE_MemoryLocation memory_location_diag = hypre_CSRMatrixMemoryLocation(A_diag);
2256    HYPRE_MemoryLocation memory_location_offd = hypre_CSRMatrixMemoryLocation(A_offd);
2257 
2258    /* Threading variables.  Entry i of num_lost_(offd_)per_thread  holds the
2259     * number of dropped entries over thread i's row range. Cum_lost_per_thread
2260     * will temporarily store the cumulative number of dropped entries up to
2261     * each thread. */
2262    HYPRE_Int my_thread_num, num_threads, start, stop;
2263    HYPRE_Int * max_num_threads = hypre_CTAlloc(HYPRE_Int,  1, HYPRE_MEMORY_HOST);
2264    HYPRE_Int * cum_lost_per_thread;
2265    HYPRE_Int * num_lost_per_thread;
2266    HYPRE_Int * num_lost_offd_per_thread;
2267 
2268    /* Initialize threading variables */
2269    max_num_threads[0] = hypre_NumThreads();
2270    cum_lost_per_thread = hypre_CTAlloc(HYPRE_Int,  max_num_threads[0], HYPRE_MEMORY_HOST);
2271    num_lost_per_thread = hypre_CTAlloc(HYPRE_Int,  max_num_threads[0], HYPRE_MEMORY_HOST);
2272    num_lost_offd_per_thread = hypre_CTAlloc(HYPRE_Int,  max_num_threads[0], HYPRE_MEMORY_HOST);
2273    for (i = 0; i < max_num_threads[0]; i++)
2274    {
2275       num_lost_per_thread[i] = 0;
2276       num_lost_offd_per_thread[i] = 0;
2277    }
2278 
2279 #ifdef HYPRE_USING_OPENMP
2280 #pragma omp parallel private(i,my_thread_num,num_threads,row_nrm, drop_coeff,j,start_j,row_sum,scale,num_lost,now_checking,next_open,num_lost_offd,now_checking_offd,next_open_offd,start,stop,cnt_diag,cnt_offd,num_elmts,cnt)
2281 #endif
2282    {
2283       my_thread_num = hypre_GetThreadNum();
2284       num_threads = hypre_NumActiveThreads();
2285 
2286       /* Compute each thread's range of rows to truncate and compress.  Note,
2287        * that i, j and data are all compressed as entries are dropped, but
2288        * that the compression only occurs locally over each thread's row
2289        * range.  A_diag_i is only made globally consistent at the end of this
2290        * routine.  During the dropping phases, A_diag_i[stop] will point to
2291        * the start of the next thread's row range.  */
2292 
2293       /* my row range */
2294       start = (n_fine / num_threads) * my_thread_num;
2295       if (my_thread_num == num_threads-1)
2296       {
2297          stop = n_fine;
2298       }
2299       else
2300       {
2301          stop = (n_fine / num_threads) * (my_thread_num + 1);
2302       }
2303 
2304       /*
2305        * Truncate based on truncation tolerance
2306        */
2307       if (tol > 0)
2308       {
2309          num_lost = 0;
2310          num_lost_offd = 0;
2311 
2312          next_open = A_diag_i[start];
2313          now_checking = A_diag_i[start];
2314          next_open_offd = A_offd_i[start];;
2315          now_checking_offd = A_offd_i[start];;
2316 
2317          for (i = start; i < stop; i++)
2318          {
2319             row_nrm = 0;
2320             /* compute norm for dropping small terms */
2321             if (nrm_type == 0)
2322             {
2323                /* infty-norm */
2324                for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2325                {
2326                   row_nrm = (row_nrm < fabs(A_diag_data[j])) ?
2327                      fabs(A_diag_data[j]) : row_nrm;
2328                }
2329                for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
2330                {
2331                   row_nrm = (row_nrm < fabs(A_offd_data[j])) ?
2332                      fabs(A_offd_data[j]) : row_nrm;
2333                }
2334             }
2335             if (nrm_type == 1)
2336             {
2337                /* 1-norm */
2338                for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2339                {
2340                   row_nrm += fabs(A_diag_data[j]);
2341                }
2342                for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
2343                {
2344                   row_nrm += fabs(A_offd_data[j]);
2345                }
2346             }
2347             if (nrm_type == 2)
2348             {
2349                /* 2-norm */
2350                for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2351                {
2352                   HYPRE_Complex v = A_diag_data[j];
2353                   row_nrm += v*v;
2354                }
2355                for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
2356                {
2357                   HYPRE_Complex v = A_offd_data[j];
2358                   row_nrm += v*v;
2359                }
2360                row_nrm  = sqrt(row_nrm);
2361             }
2362             drop_coeff = tol * row_nrm;
2363 
2364             start_j = A_diag_i[i];
2365             if (num_lost)
2366             {
2367                A_diag_i[i] -= num_lost;
2368             }
2369             row_sum = 0;
2370             scale = 0;
2371             for (j = start_j; j < A_diag_i[i+1]; j++)
2372             {
2373                row_sum += A_diag_data[now_checking];
2374                if (fabs(A_diag_data[now_checking]) < drop_coeff)
2375                {
2376                   num_lost++;
2377                   now_checking++;
2378                }
2379                else
2380                {
2381                   scale += A_diag_data[now_checking];
2382                   A_diag_data[next_open] = A_diag_data[now_checking];
2383                   A_diag_j[next_open] = A_diag_j[now_checking];
2384                   now_checking++;
2385                   next_open++;
2386                }
2387             }
2388 
2389             start_j = A_offd_i[i];
2390             if (num_lost_offd)
2391             {
2392                A_offd_i[i] -= num_lost_offd;
2393             }
2394 
2395             for (j = start_j; j < A_offd_i[i+1]; j++)
2396             {
2397                row_sum += A_offd_data[now_checking_offd];
2398                if (fabs(A_offd_data[now_checking_offd]) < drop_coeff)
2399                {
2400                   num_lost_offd++;
2401                   now_checking_offd++;
2402                }
2403                else
2404                {
2405                   scale += A_offd_data[now_checking_offd];
2406                   A_offd_data[next_open_offd] = A_offd_data[now_checking_offd];
2407                   A_offd_j[next_open_offd] = A_offd_j[now_checking_offd];
2408                   now_checking_offd++;
2409                   next_open_offd++;
2410                }
2411             }
2412 
2413             /* scale row of A */
2414             if (rescale && scale != 0.)
2415             {
2416                if (scale != row_sum)
2417                {
2418                   scale = row_sum/scale;
2419                   for (j = A_diag_i[i]; j < (A_diag_i[i+1]-num_lost); j++)
2420                   {
2421                      A_diag_data[j] *= scale;
2422                   }
2423                   for (j = A_offd_i[i]; j < (A_offd_i[i+1]-num_lost_offd); j++)
2424                   {
2425                      A_offd_data[j] *= scale;
2426                   }
2427                }
2428             }
2429          } /* end loop for (i = 0; i < n_fine; i++) */
2430 
2431          /* store number of dropped elements and number of threads */
2432          if (my_thread_num == 0)
2433          {
2434             max_num_threads[0] = num_threads;
2435          }
2436          num_lost_per_thread[my_thread_num] = num_lost;
2437          num_lost_offd_per_thread[my_thread_num] = num_lost_offd;
2438 
2439       } /* end if (trunc_factor > 0) */
2440 
2441       /*
2442        * Truncate based on capping the nnz per row
2443        *
2444        */
2445       if (max_row_elmts > 0)
2446       {
2447          HYPRE_Int A_mxnum, cnt1, last_index, last_index_offd;
2448          HYPRE_Int *A_aux_j;
2449          HYPRE_Real *A_aux_data;
2450 
2451          /* find maximum row length locally over this row range */
2452          A_mxnum = 0;
2453          for (i=start; i<stop; i++)
2454          {
2455             /* Note A_diag_i[stop] is the starting point for the next thread
2456              * in j and data, not the stop point for this thread */
2457             last_index = A_diag_i[i+1];
2458             last_index_offd = A_offd_i[i+1];
2459             if (i == stop-1)
2460             {
2461                last_index -= num_lost_per_thread[my_thread_num];
2462                last_index_offd -= num_lost_offd_per_thread[my_thread_num];
2463             }
2464             cnt1 = last_index-A_diag_i[i] + last_index_offd-A_offd_i[i];
2465             if (cnt1 > A_mxnum)
2466             {
2467                A_mxnum = cnt1;
2468             }
2469          }
2470 
2471          /* Some rows exceed max_row_elmts, and require truncation.  Essentially,
2472           * each thread truncates and compresses its range of rows locally. */
2473          if (A_mxnum > max_row_elmts)
2474          {
2475             num_lost = 0;
2476             num_lost_offd = 0;
2477 
2478             /* two temporary arrays to hold row i for temporary operations */
2479             A_aux_j = hypre_CTAlloc(HYPRE_Int,  A_mxnum, HYPRE_MEMORY_HOST);
2480             A_aux_data = hypre_CTAlloc(HYPRE_Real,  A_mxnum, HYPRE_MEMORY_HOST);
2481             cnt_diag = A_diag_i[start];
2482             cnt_offd = A_offd_i[start];
2483 
2484             for (i = start; i < stop; i++)
2485             {
2486                /* Note A_diag_i[stop] is the starting point for the next thread
2487                 * in j and data, not the stop point for this thread */
2488                last_index = A_diag_i[i+1];
2489                last_index_offd = A_offd_i[i+1];
2490                if (i == stop-1)
2491                {
2492                   last_index -= num_lost_per_thread[my_thread_num];
2493                   last_index_offd -= num_lost_offd_per_thread[my_thread_num];
2494                }
2495 
2496                row_sum = 0;
2497                num_elmts = last_index-A_diag_i[i] + last_index_offd-A_offd_i[i];
2498                if (max_row_elmts < num_elmts)
2499                {
2500                   /* copy both diagonal and off-diag parts of row i to _aux_ arrays */
2501                   cnt = 0;
2502                   for (j = A_diag_i[i]; j < last_index; j++)
2503                   {
2504                      A_aux_j[cnt] = A_diag_j[j];
2505                      A_aux_data[cnt++] = A_diag_data[j];
2506                      row_sum += A_diag_data[j];
2507                   }
2508                   num_lost += cnt;
2509                   cnt1 = cnt;
2510                   for (j = A_offd_i[i]; j < last_index_offd; j++)
2511                   {
2512                      A_aux_j[cnt] = A_offd_j[j]+num_cols;
2513                      A_aux_data[cnt++] = A_offd_data[j];
2514                      row_sum += A_offd_data[j];
2515                   }
2516                   num_lost_offd += cnt-cnt1;
2517 
2518                   /* sort data */
2519                   hypre_qsort2_abs(A_aux_j,A_aux_data,0,cnt-1);
2520                   scale = 0;
2521                   if (i > start)
2522                   {
2523                      A_diag_i[i] = cnt_diag;
2524                      A_offd_i[i] = cnt_offd;
2525                   }
2526                   for (j = 0; j < max_row_elmts; j++)
2527                   {
2528                      scale += A_aux_data[j];
2529                      if (A_aux_j[j] < num_cols)
2530                      {
2531                         A_diag_j[cnt_diag] = A_aux_j[j];
2532                         A_diag_data[cnt_diag++] = A_aux_data[j];
2533                      }
2534                      else
2535                      {
2536                         A_offd_j[cnt_offd] = A_aux_j[j]-num_cols;
2537                         A_offd_data[cnt_offd++] = A_aux_data[j];
2538                      }
2539                   }
2540                   num_lost -= cnt_diag-A_diag_i[i];
2541                   num_lost_offd -= cnt_offd-A_offd_i[i];
2542 
2543                   /* scale row of A */
2544                   if (rescale && (scale != 0.))
2545                   {
2546                      if (scale != row_sum)
2547                      {
2548                         scale = row_sum/scale;
2549                         for (j = A_diag_i[i]; j < cnt_diag; j++)
2550                         {
2551                            A_diag_data[j] *= scale;
2552                         }
2553                         for (j = A_offd_i[i]; j < cnt_offd; j++)
2554                         {
2555                            A_offd_data[j] *= scale;
2556                         }
2557                      }
2558                   }
2559                }  /* end if (max_row_elmts < num_elmts) */
2560                else
2561                {
2562                   /* nothing dropped from this row, but still have to shift entries back
2563                    * by the number dropped so far */
2564                   if (A_diag_i[i] != cnt_diag)
2565                   {
2566                      start_j = A_diag_i[i];
2567                      A_diag_i[i] = cnt_diag;
2568                      for (j = start_j; j < last_index; j++)
2569                      {
2570                         A_diag_j[cnt_diag] = A_diag_j[j];
2571                         A_diag_data[cnt_diag++] = A_diag_data[j];
2572                      }
2573                   }
2574                   else
2575                   {
2576                      cnt_diag += last_index-A_diag_i[i];
2577                   }
2578 
2579                   if (A_offd_i[i] != cnt_offd)
2580                   {
2581                      start_j = A_offd_i[i];
2582                      A_offd_i[i] = cnt_offd;
2583                      for (j = start_j; j < last_index_offd; j++)
2584                      {
2585                         A_offd_j[cnt_offd] = A_offd_j[j];
2586                         A_offd_data[cnt_offd++] = A_offd_data[j];
2587                      }
2588                   }
2589                   else
2590                   {
2591                      cnt_offd += last_index_offd-A_offd_i[i];
2592                   }
2593                }
2594             } /* end for (i = 0; i < n_fine; i++) */
2595 
2596             num_lost_per_thread[my_thread_num] += num_lost;
2597             num_lost_offd_per_thread[my_thread_num] += num_lost_offd;
2598             hypre_TFree(A_aux_j, HYPRE_MEMORY_HOST);
2599             hypre_TFree(A_aux_data, HYPRE_MEMORY_HOST);
2600 
2601          } /* end if (A_mxnum > max_row_elmts) */
2602       } /* end if (max_row_elmts > 0) */
2603 
2604 
2605       /* Sum up num_lost_global */
2606 #ifdef HYPRE_USING_OPENMP
2607 #pragma omp barrier
2608 #endif
2609       if (my_thread_num == 0)
2610       {
2611          num_lost_global = 0;
2612          num_lost_global_offd = 0;
2613          for (i = 0; i < max_num_threads[0]; i++)
2614          {
2615             num_lost_global += num_lost_per_thread[i];
2616             num_lost_global_offd += num_lost_offd_per_thread[i];
2617          }
2618       }
2619 #ifdef HYPRE_USING_OPENMP
2620 #pragma omp barrier
2621 #endif
2622 
2623       /*
2624        * Synchronize and create new diag data structures
2625        */
2626       if (num_lost_global)
2627       {
2628          /* Each thread has it's own locally compressed CSR matrix from rows start
2629           * to stop.  Now, we have to copy each thread's chunk into the new
2630           * process-wide CSR data structures
2631           *
2632           * First, we compute the new process-wide number of nonzeros (i.e.,
2633           * A_diag_size), and compute cum_lost_per_thread[k] so that this
2634           * entry holds the cumulative sum of entries dropped up to and
2635           * including thread k. */
2636          if (my_thread_num == 0)
2637          {
2638             A_diag_size = A_diag_i[n_fine];
2639 
2640             for (i = 0; i < max_num_threads[0]; i++)
2641             {
2642                A_diag_size -= num_lost_per_thread[i];
2643                if (i > 0)
2644                {
2645                   cum_lost_per_thread[i] = num_lost_per_thread[i] + cum_lost_per_thread[i-1];
2646                }
2647                else
2648                {
2649                   cum_lost_per_thread[i] = num_lost_per_thread[i];
2650                }
2651             }
2652 
2653             A_diag_j_new = hypre_CTAlloc(HYPRE_Int, A_diag_size, memory_location_diag);
2654             A_diag_data_new = hypre_CTAlloc(HYPRE_Real, A_diag_size, memory_location_diag);
2655          }
2656 #ifdef HYPRE_USING_OPENMP
2657 #pragma omp barrier
2658 #endif
2659 
2660          /* points to next open spot in new data structures for this thread */
2661          if (my_thread_num == 0)
2662          {
2663             next_open = 0;
2664          }
2665          else
2666          {
2667             /* remember, cum_lost_per_thread[k] stores the num dropped up to and
2668              * including thread k */
2669             next_open = A_diag_i[start] - cum_lost_per_thread[my_thread_num-1];
2670          }
2671 
2672          /* copy the j and data arrays over */
2673          for (i = A_diag_i[start]; i < A_diag_i[stop] - num_lost_per_thread[my_thread_num]; i++)
2674          {
2675             A_diag_j_new[next_open] = A_diag_j[i];
2676             A_diag_data_new[next_open] = A_diag_data[i];
2677             next_open += 1;
2678          }
2679 
2680 #ifdef HYPRE_USING_OPENMP
2681 #pragma omp barrier
2682 #endif
2683          /* update A_diag_i with number of dropped entries by all lower ranked
2684           * threads */
2685          if (my_thread_num > 0)
2686          {
2687             for (i=start; i<stop; i++)
2688             {
2689                A_diag_i[i] -= cum_lost_per_thread[my_thread_num-1];
2690             }
2691          }
2692 
2693          if (my_thread_num == 0)
2694          {
2695             /* Set last entry */
2696             A_diag_i[n_fine] = A_diag_size ;
2697 
2698             hypre_TFree(A_diag_j, memory_location_diag);
2699             hypre_TFree(A_diag_data, memory_location_diag);
2700             hypre_CSRMatrixJ(A_diag) = A_diag_j_new;
2701             hypre_CSRMatrixData(A_diag) = A_diag_data_new;
2702             hypre_CSRMatrixNumNonzeros(A_diag) = A_diag_size;
2703          }
2704       }
2705 
2706       /*
2707        * Synchronize and create new offd data structures
2708        */
2709 #ifdef HYPRE_USING_OPENMP
2710 #pragma omp barrier
2711 #endif
2712       if (num_lost_global_offd)
2713       {
2714          /* Repeat process for off-diagonal */
2715          if (my_thread_num == 0)
2716          {
2717             A_offd_size = A_offd_i[n_fine];
2718             for (i = 0; i < max_num_threads[0]; i++)
2719             {
2720                A_offd_size -= num_lost_offd_per_thread[i];
2721                if (i > 0)
2722                {
2723                   cum_lost_per_thread[i] = num_lost_offd_per_thread[i] + cum_lost_per_thread[i-1];
2724                }
2725                else
2726                {
2727                   cum_lost_per_thread[i] = num_lost_offd_per_thread[i];
2728                }
2729             }
2730 
2731             A_offd_j_new = hypre_CTAlloc(HYPRE_Int, A_offd_size, memory_location_offd);
2732             A_offd_data_new = hypre_CTAlloc(HYPRE_Real, A_offd_size, memory_location_offd);
2733          }
2734 #ifdef HYPRE_USING_OPENMP
2735 #pragma omp barrier
2736 #endif
2737 
2738          /* points to next open spot in new data structures for this thread */
2739          if (my_thread_num == 0)
2740          {
2741             next_open = 0;
2742          }
2743          else
2744          {
2745             /* remember, cum_lost_per_thread[k] stores the num dropped up to and
2746              * including thread k */
2747             next_open = A_offd_i[start] - cum_lost_per_thread[my_thread_num-1];
2748          }
2749 
2750          /* copy the j and data arrays over */
2751          for (i = A_offd_i[start]; i < A_offd_i[stop] - num_lost_offd_per_thread[my_thread_num]; i++)
2752          {
2753             A_offd_j_new[next_open] = A_offd_j[i];
2754             A_offd_data_new[next_open] = A_offd_data[i];
2755             next_open += 1;
2756          }
2757 
2758 #ifdef HYPRE_USING_OPENMP
2759 #pragma omp barrier
2760 #endif
2761          /* update A_offd_i with number of dropped entries by all lower ranked
2762           * threads */
2763          if (my_thread_num > 0)
2764          {
2765             for (i=start; i<stop; i++)
2766             {
2767                A_offd_i[i] -= cum_lost_per_thread[my_thread_num-1];
2768             }
2769          }
2770 
2771          if (my_thread_num == 0)
2772          {
2773             /* Set last entry */
2774             A_offd_i[n_fine] = A_offd_size ;
2775 
2776             hypre_TFree(A_offd_j, memory_location_offd);
2777             hypre_TFree(A_offd_data, memory_location_offd);
2778             hypre_CSRMatrixJ(A_offd) = A_offd_j_new;
2779             hypre_CSRMatrixData(A_offd) = A_offd_data_new;
2780             hypre_CSRMatrixNumNonzeros(A_offd) = A_offd_size;
2781          }
2782       }
2783 
2784    } /* end parallel region */
2785 
2786    hypre_TFree(max_num_threads, HYPRE_MEMORY_HOST);
2787    hypre_TFree(cum_lost_per_thread, HYPRE_MEMORY_HOST);
2788    hypre_TFree(num_lost_per_thread, HYPRE_MEMORY_HOST);
2789    hypre_TFree(num_lost_offd_per_thread, HYPRE_MEMORY_HOST);
2790 
2791 #ifdef HYPRE_PROFILE
2792    hypre_profile_times[HYPRE_TIMER_ID_INTERP_TRUNC] += hypre_MPI_Wtime();
2793 #endif
2794 
2795    return ierr;
2796 }
2797 
2798 HYPRE_Int
hypre_ParCSRMatrixSetConstantValues(hypre_ParCSRMatrix * A,HYPRE_Complex value)2799 hypre_ParCSRMatrixSetConstantValues( hypre_ParCSRMatrix *A,
2800                                      HYPRE_Complex       value )
2801 {
2802    hypre_CSRMatrixSetConstantValues(hypre_ParCSRMatrixDiag(A), value);
2803    hypre_CSRMatrixSetConstantValues(hypre_ParCSRMatrixOffd(A), value);
2804 
2805    return hypre_error_flag;
2806 }
2807 
2808 void
hypre_ParCSRMatrixCopyColMapOffdToDevice(hypre_ParCSRMatrix * A)2809 hypre_ParCSRMatrixCopyColMapOffdToDevice(hypre_ParCSRMatrix *A)
2810 {
2811 #if defined(HYPRE_USING_GPU)
2812    if (hypre_ParCSRMatrixDeviceColMapOffd(A) == NULL)
2813    {
2814       const HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
2815       hypre_ParCSRMatrixDeviceColMapOffd(A) = hypre_TAlloc(HYPRE_BigInt, num_cols_A_offd, HYPRE_MEMORY_DEVICE);
2816       hypre_TMemcpy(hypre_ParCSRMatrixDeviceColMapOffd(A), hypre_ParCSRMatrixColMapOffd(A), HYPRE_BigInt, num_cols_A_offd,
2817                     HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST);
2818    }
2819 #endif
2820 }
2821