1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 /******************************************************************************
9 *
10 * Member functions for hypre_ParCSRMatrix class.
11 *
12 *****************************************************************************/
13
14 #include "_hypre_parcsr_mv.h"
15
16 #include "../seq_mv/HYPRE_seq_mv.h"
17 #include "../seq_mv/csr_matrix.h"
18
19 /* In addition to publically accessible interface in HYPRE_mv.h, the
20 implementation in this file uses accessor macros into the sequential matrix
21 structure, and so includes the .h that defines that structure. Should those
22 accessor functions become proper functions at some later date, this will not
23 be necessary. AJC 4/99 */
24
25 HYPRE_Int hypre_FillResponseParToCSRMatrix(void*, HYPRE_Int, HYPRE_Int, void*, MPI_Comm, void**, HYPRE_Int*);
26
27 /*--------------------------------------------------------------------------
28 * hypre_ParCSRMatrixCreate
29 *--------------------------------------------------------------------------*/
30
31 /* If create is called and row_starts and col_starts are NOT null, then it is
32 assumed that they are of length 2 containing the start row of the calling
33 processor followed by the start row of the next processor - AHB 6/05 */
34
35 hypre_ParCSRMatrix*
hypre_ParCSRMatrixCreate(MPI_Comm comm,HYPRE_BigInt global_num_rows,HYPRE_BigInt global_num_cols,HYPRE_BigInt * row_starts_in,HYPRE_BigInt * col_starts_in,HYPRE_Int num_cols_offd,HYPRE_Int num_nonzeros_diag,HYPRE_Int num_nonzeros_offd)36 hypre_ParCSRMatrixCreate( MPI_Comm comm,
37 HYPRE_BigInt global_num_rows,
38 HYPRE_BigInt global_num_cols,
39 HYPRE_BigInt *row_starts_in,
40 HYPRE_BigInt *col_starts_in,
41 HYPRE_Int num_cols_offd,
42 HYPRE_Int num_nonzeros_diag,
43 HYPRE_Int num_nonzeros_offd )
44 {
45 hypre_ParCSRMatrix *matrix;
46 HYPRE_Int num_procs, my_id;
47 HYPRE_Int local_num_rows;
48 HYPRE_Int local_num_cols;
49 HYPRE_BigInt row_starts[2];
50 HYPRE_BigInt col_starts[2];
51 HYPRE_BigInt first_row_index, first_col_diag;
52
53 matrix = hypre_CTAlloc(hypre_ParCSRMatrix, 1, HYPRE_MEMORY_HOST);
54
55 hypre_MPI_Comm_rank(comm,&my_id);
56 hypre_MPI_Comm_size(comm,&num_procs);
57
58 if (!row_starts_in)
59 {
60 hypre_GenerateLocalPartitioning(global_num_rows, num_procs, my_id,
61 row_starts);
62 }
63 else
64 {
65 row_starts[0] = row_starts_in[0];
66 row_starts[1] = row_starts_in[1];
67 }
68
69 if (!col_starts_in)
70 {
71 hypre_GenerateLocalPartitioning(global_num_cols, num_procs, my_id,
72 col_starts);
73 }
74 else
75 {
76 col_starts[0] = col_starts_in[0];
77 col_starts[1] = col_starts_in[1];
78 }
79
80 /* row_starts[0] is start of local rows.
81 row_starts[1] is start of next processor's rows */
82 first_row_index = row_starts[0];
83 local_num_rows = row_starts[1] - first_row_index;
84 first_col_diag = col_starts[0];
85 local_num_cols = col_starts[1] - first_col_diag;
86
87 hypre_ParCSRMatrixComm(matrix) = comm;
88 hypre_ParCSRMatrixDiag(matrix) =
89 hypre_CSRMatrixCreate(local_num_rows, local_num_cols, num_nonzeros_diag);
90 hypre_ParCSRMatrixOffd(matrix) =
91 hypre_CSRMatrixCreate(local_num_rows, num_cols_offd, num_nonzeros_offd);
92 hypre_ParCSRMatrixDiagT(matrix) = NULL;
93 hypre_ParCSRMatrixOffdT(matrix) = NULL; // JSP: transposed matrices are optional
94 hypre_ParCSRMatrixGlobalNumRows(matrix) = global_num_rows;
95 hypre_ParCSRMatrixGlobalNumCols(matrix) = global_num_cols;
96 hypre_ParCSRMatrixGlobalNumRownnz(matrix) = global_num_rows;
97 hypre_ParCSRMatrixFirstRowIndex(matrix) = first_row_index;
98 hypre_ParCSRMatrixFirstColDiag(matrix) = first_col_diag;
99 hypre_ParCSRMatrixLastRowIndex(matrix) = first_row_index + local_num_rows - 1;
100 hypre_ParCSRMatrixLastColDiag(matrix) = first_col_diag + local_num_cols - 1;
101
102 hypre_ParCSRMatrixRowStarts(matrix)[0] = row_starts[0];
103 hypre_ParCSRMatrixRowStarts(matrix)[1] = row_starts[1];
104 hypre_ParCSRMatrixColStarts(matrix)[0] = col_starts[0];
105 hypre_ParCSRMatrixColStarts(matrix)[1] = col_starts[1];
106
107 hypre_ParCSRMatrixColMapOffd(matrix) = NULL;
108 hypre_ParCSRMatrixDeviceColMapOffd(matrix) = NULL;
109 hypre_ParCSRMatrixProcOrdering(matrix) = NULL;
110
111 hypre_ParCSRMatrixAssumedPartition(matrix) = NULL;
112 hypre_ParCSRMatrixOwnsAssumedPartition(matrix) = 1;
113
114 hypre_ParCSRMatrixCommPkg(matrix) = NULL;
115 hypre_ParCSRMatrixCommPkgT(matrix) = NULL;
116
117 /* set defaults */
118 hypre_ParCSRMatrixOwnsData(matrix) = 1;
119 hypre_ParCSRMatrixRowindices(matrix) = NULL;
120 hypre_ParCSRMatrixRowvalues(matrix) = NULL;
121 hypre_ParCSRMatrixGetrowactive(matrix) = 0;
122
123 matrix->bdiaginv = NULL;
124 matrix->bdiaginv_comm_pkg = NULL;
125 matrix->bdiag_size = -1;
126
127 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
128 hypre_ParCSRMatrixSocDiagJ(matrix) = NULL;
129 hypre_ParCSRMatrixSocOffdJ(matrix) = NULL;
130 #endif
131
132 return matrix;
133 }
134
135 /*--------------------------------------------------------------------------
136 * hypre_ParCSRMatrixDestroy
137 *--------------------------------------------------------------------------*/
138
139 HYPRE_Int
hypre_ParCSRMatrixDestroy(hypre_ParCSRMatrix * matrix)140 hypre_ParCSRMatrixDestroy( hypre_ParCSRMatrix *matrix )
141 {
142 if (matrix)
143 {
144 HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(matrix);
145
146 if ( hypre_ParCSRMatrixOwnsData(matrix) )
147 {
148 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix));
149 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix));
150
151 if ( hypre_ParCSRMatrixDiagT(matrix) )
152 {
153 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiagT(matrix));
154 }
155
156 if ( hypre_ParCSRMatrixOffdT(matrix) )
157 {
158 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffdT(matrix));
159 }
160
161 if (hypre_ParCSRMatrixColMapOffd(matrix))
162 {
163 hypre_TFree(hypre_ParCSRMatrixColMapOffd(matrix), HYPRE_MEMORY_HOST);
164 }
165
166 if (hypre_ParCSRMatrixDeviceColMapOffd(matrix))
167 {
168 hypre_TFree(hypre_ParCSRMatrixDeviceColMapOffd(matrix), HYPRE_MEMORY_DEVICE);
169 }
170
171 if (hypre_ParCSRMatrixCommPkg(matrix))
172 {
173 hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(matrix));
174 }
175
176 if (hypre_ParCSRMatrixCommPkgT(matrix))
177 {
178 hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkgT(matrix));
179 }
180 }
181
182 /* RL: this is actually not correct since the memory_location may have been changed after allocation
183 * put them in containers TODO */
184 hypre_TFree(hypre_ParCSRMatrixRowindices(matrix), memory_location);
185 hypre_TFree(hypre_ParCSRMatrixRowvalues(matrix), memory_location);
186
187 if ( hypre_ParCSRMatrixAssumedPartition(matrix) && hypre_ParCSRMatrixOwnsAssumedPartition(matrix) )
188 {
189 hypre_AssumedPartitionDestroy(hypre_ParCSRMatrixAssumedPartition(matrix));
190 }
191
192 if ( hypre_ParCSRMatrixProcOrdering(matrix) )
193 {
194 hypre_TFree(hypre_ParCSRMatrixProcOrdering(matrix), HYPRE_MEMORY_HOST);
195 }
196
197 hypre_TFree(matrix->bdiaginv, HYPRE_MEMORY_HOST);
198 if (matrix->bdiaginv_comm_pkg)
199 {
200 hypre_MatvecCommPkgDestroy(matrix->bdiaginv_comm_pkg);
201 }
202
203 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
204 hypre_TFree(hypre_ParCSRMatrixSocDiagJ(matrix), HYPRE_MEMORY_DEVICE);
205 hypre_TFree(hypre_ParCSRMatrixSocOffdJ(matrix), HYPRE_MEMORY_DEVICE);
206 #endif
207
208 hypre_TFree(matrix, HYPRE_MEMORY_HOST);
209 }
210
211 return hypre_error_flag;
212 }
213
214 /*--------------------------------------------------------------------------
215 * hypre_ParCSRMatrixInitialize
216 *--------------------------------------------------------------------------*/
217
218 HYPRE_Int
hypre_ParCSRMatrixInitialize_v2(hypre_ParCSRMatrix * matrix,HYPRE_MemoryLocation memory_location)219 hypre_ParCSRMatrixInitialize_v2( hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location )
220 {
221 if (!matrix)
222 {
223 hypre_error_in_arg(1);
224 return hypre_error_flag;
225 }
226
227 hypre_CSRMatrixInitialize_v2(hypre_ParCSRMatrixDiag(matrix), 0, memory_location);
228 hypre_CSRMatrixInitialize_v2(hypre_ParCSRMatrixOffd(matrix), 0, memory_location);
229
230 hypre_ParCSRMatrixColMapOffd(matrix) =
231 hypre_CTAlloc(HYPRE_BigInt, hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(matrix)),
232 HYPRE_MEMORY_HOST);
233
234 return hypre_error_flag;
235 }
236
237 HYPRE_Int
hypre_ParCSRMatrixInitialize(hypre_ParCSRMatrix * matrix)238 hypre_ParCSRMatrixInitialize( hypre_ParCSRMatrix *matrix )
239 {
240 return hypre_ParCSRMatrixInitialize_v2(matrix, hypre_ParCSRMatrixMemoryLocation(matrix));
241 }
242
243 /*--------------------------------------------------------------------------
244 * hypre_ParCSRMatrixClone
245 * Creates and returns a new copy S of the argument A
246 * The following variables are not copied because they will be constructed
247 * later if needed: CommPkg, CommPkgT, rowindices, rowvalues
248 *--------------------------------------------------------------------------*/
249
250 hypre_ParCSRMatrix*
hypre_ParCSRMatrixClone_v2(hypre_ParCSRMatrix * A,HYPRE_Int copy_data,HYPRE_MemoryLocation memory_location)251 hypre_ParCSRMatrixClone_v2(hypre_ParCSRMatrix *A, HYPRE_Int copy_data, HYPRE_MemoryLocation memory_location)
252 {
253 hypre_ParCSRMatrix *S;
254
255 S = hypre_ParCSRMatrixCreate( hypre_ParCSRMatrixComm(A),
256 hypre_ParCSRMatrixGlobalNumRows(A),
257 hypre_ParCSRMatrixGlobalNumCols(A),
258 hypre_ParCSRMatrixRowStarts(A),
259 hypre_ParCSRMatrixColStarts(A),
260 hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A)),
261 hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(A)),
262 hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(A)) );
263
264 hypre_ParCSRMatrixNumNonzeros(S) = hypre_ParCSRMatrixNumNonzeros(A);
265 hypre_ParCSRMatrixDNumNonzeros(S) = hypre_ParCSRMatrixNumNonzeros(A);
266
267 hypre_ParCSRMatrixInitialize_v2(S, memory_location);
268
269 hypre_ParCSRMatrixCopy(A, S, copy_data);
270
271 return S;
272 }
273
274 hypre_ParCSRMatrix*
hypre_ParCSRMatrixClone(hypre_ParCSRMatrix * A,HYPRE_Int copy_data)275 hypre_ParCSRMatrixClone(hypre_ParCSRMatrix *A, HYPRE_Int copy_data)
276 {
277 return hypre_ParCSRMatrixClone_v2(A, copy_data, hypre_ParCSRMatrixMemoryLocation(A));
278 }
279
280 HYPRE_Int
hypre_ParCSRMatrixMigrate(hypre_ParCSRMatrix * A,HYPRE_MemoryLocation memory_location)281 hypre_ParCSRMatrixMigrate(hypre_ParCSRMatrix *A, HYPRE_MemoryLocation memory_location)
282 {
283 if (!A)
284 {
285 return hypre_error_flag;
286 }
287
288 HYPRE_MemoryLocation old_memory_location = hypre_ParCSRMatrixMemoryLocation(A);
289
290 if ( hypre_GetActualMemLocation(memory_location) != hypre_GetActualMemLocation(old_memory_location) )
291 {
292 hypre_CSRMatrix *A_diag = hypre_CSRMatrixClone_v2(hypre_ParCSRMatrixDiag(A), 1, memory_location);
293 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(A));
294 hypre_ParCSRMatrixDiag(A) = A_diag;
295
296 hypre_CSRMatrix *A_offd = hypre_CSRMatrixClone_v2(hypre_ParCSRMatrixOffd(A), 1, memory_location);
297 hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(A));
298 hypre_ParCSRMatrixOffd(A) = A_offd;
299
300 hypre_TFree(hypre_ParCSRMatrixRowindices(A), old_memory_location);
301 hypre_TFree(hypre_ParCSRMatrixRowvalues(A), old_memory_location);
302 }
303 else
304 {
305 hypre_CSRMatrixMemoryLocation(hypre_ParCSRMatrixDiag(A)) = memory_location;
306 hypre_CSRMatrixMemoryLocation(hypre_ParCSRMatrixOffd(A)) = memory_location;
307 }
308
309 return hypre_error_flag;
310 }
311
312 HYPRE_Int
hypre_ParCSRMatrixSetNumNonzeros_core(hypre_ParCSRMatrix * matrix,const char * format)313 hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, const char* format )
314 {
315 MPI_Comm comm;
316 hypre_CSRMatrix *diag;
317 hypre_CSRMatrix *offd;
318
319 if (!matrix)
320 {
321 hypre_error_in_arg(1);
322 return hypre_error_flag;
323 }
324
325 comm = hypre_ParCSRMatrixComm(matrix);
326 diag = hypre_ParCSRMatrixDiag(matrix);
327 offd = hypre_ParCSRMatrixOffd(matrix);
328
329 /* TODO in HYPRE_DEBUG ? */
330 hypre_CSRMatrixCheckSetNumNonzeros(diag);
331 hypre_CSRMatrixCheckSetNumNonzeros(offd);
332
333 if (format[0] == 'I')
334 {
335 HYPRE_BigInt total_num_nonzeros;
336 HYPRE_BigInt local_num_nonzeros;
337 local_num_nonzeros = (HYPRE_BigInt) ( hypre_CSRMatrixNumNonzeros(diag) +
338 hypre_CSRMatrixNumNonzeros(offd) );
339
340 hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, HYPRE_MPI_BIG_INT,
341 hypre_MPI_SUM, comm);
342
343 hypre_ParCSRMatrixNumNonzeros(matrix) = total_num_nonzeros;
344 }
345 else if (format[0] == 'D')
346 {
347 HYPRE_Real total_num_nonzeros;
348 HYPRE_Real local_num_nonzeros;
349 local_num_nonzeros = (HYPRE_Real) ( hypre_CSRMatrixNumNonzeros(diag) +
350 hypre_CSRMatrixNumNonzeros(offd) );
351
352 hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1,
353 HYPRE_MPI_REAL, hypre_MPI_SUM, comm);
354
355 hypre_ParCSRMatrixDNumNonzeros(matrix) = total_num_nonzeros;
356 }
357 else
358 {
359 hypre_error_in_arg(1);
360 return hypre_error_flag;
361 }
362
363 return hypre_error_flag;
364 }
365
366 /*--------------------------------------------------------------------------
367 * hypre_ParCSRMatrixSetNumNonzeros
368 *--------------------------------------------------------------------------*/
369 HYPRE_Int
hypre_ParCSRMatrixSetNumNonzeros(hypre_ParCSRMatrix * matrix)370 hypre_ParCSRMatrixSetNumNonzeros( hypre_ParCSRMatrix *matrix )
371 {
372 return hypre_ParCSRMatrixSetNumNonzeros_core(matrix, "Int");
373 }
374
375 /*--------------------------------------------------------------------------
376 * hypre_ParCSRMatrixSetDNumNonzeros
377 *--------------------------------------------------------------------------*/
378
379 HYPRE_Int
hypre_ParCSRMatrixSetDNumNonzeros(hypre_ParCSRMatrix * matrix)380 hypre_ParCSRMatrixSetDNumNonzeros( hypre_ParCSRMatrix *matrix )
381 {
382 return hypre_ParCSRMatrixSetNumNonzeros_core(matrix, "Double");
383 }
384
385 /*--------------------------------------------------------------------------
386 * hypre_ParCSRMatrixSetNumRownnz
387 *--------------------------------------------------------------------------*/
388
389 HYPRE_Int
hypre_ParCSRMatrixSetNumRownnz(hypre_ParCSRMatrix * matrix)390 hypre_ParCSRMatrixSetNumRownnz( hypre_ParCSRMatrix *matrix )
391 {
392 MPI_Comm comm = hypre_ParCSRMatrixComm(matrix);
393 hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix);
394 hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix);
395 HYPRE_Int *rownnz_diag = hypre_CSRMatrixRownnz(diag);
396 HYPRE_Int *rownnz_offd = hypre_CSRMatrixRownnz(offd);
397 HYPRE_Int num_rownnz_diag = hypre_CSRMatrixNumRownnz(diag);
398 HYPRE_Int num_rownnz_offd = hypre_CSRMatrixNumRownnz(offd);
399
400 HYPRE_BigInt local_num_rownnz;
401 HYPRE_BigInt global_num_rownnz;
402 HYPRE_Int i, j;
403
404 if (!matrix)
405 {
406 hypre_error_in_arg(1);
407 return hypre_error_flag;
408 }
409
410 local_num_rownnz = i = j = 0;
411 while (i < num_rownnz_diag && j < num_rownnz_offd)
412 {
413 local_num_rownnz++;
414 if (rownnz_diag[i] < rownnz_offd[j])
415 {
416 i++;
417 }
418 else
419 {
420 j++;
421 }
422 }
423
424 local_num_rownnz += (HYPRE_BigInt) ((num_rownnz_diag - i) + (num_rownnz_offd - j));
425
426 hypre_MPI_Allreduce(&local_num_rownnz, &global_num_rownnz, 1,
427 HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
428
429 hypre_ParCSRMatrixGlobalNumRownnz(matrix) = global_num_rownnz;
430
431 return hypre_error_flag;
432 }
433
434 /*--------------------------------------------------------------------------
435 * hypre_ParCSRMatrixSetDataOwner
436 *--------------------------------------------------------------------------*/
437
438 HYPRE_Int
hypre_ParCSRMatrixSetDataOwner(hypre_ParCSRMatrix * matrix,HYPRE_Int owns_data)439 hypre_ParCSRMatrixSetDataOwner( hypre_ParCSRMatrix *matrix,
440 HYPRE_Int owns_data )
441 {
442 if (!matrix)
443 {
444 hypre_error_in_arg(1);
445 return hypre_error_flag;
446 }
447
448 hypre_ParCSRMatrixOwnsData(matrix) = owns_data;
449
450 return hypre_error_flag;
451 }
452
453 /*--------------------------------------------------------------------------
454 * hypre_ParCSRMatrixRead
455 *--------------------------------------------------------------------------*/
456
457 hypre_ParCSRMatrix *
hypre_ParCSRMatrixRead(MPI_Comm comm,const char * file_name)458 hypre_ParCSRMatrixRead( MPI_Comm comm,
459 const char *file_name )
460 {
461 hypre_ParCSRMatrix *matrix;
462 hypre_CSRMatrix *diag;
463 hypre_CSRMatrix *offd;
464
465 HYPRE_Int my_id, num_procs;
466 HYPRE_Int num_cols_offd;
467 HYPRE_Int i, local_num_rows;
468
469 HYPRE_BigInt row_starts[2];
470 HYPRE_BigInt col_starts[2];
471 HYPRE_BigInt *col_map_offd;
472 HYPRE_BigInt row_s, row_e, col_s, col_e;
473 HYPRE_BigInt global_num_rows, global_num_cols;
474
475 FILE *fp;
476 char new_file_d[80], new_file_o[80], new_file_info[80];
477
478 hypre_MPI_Comm_rank(comm,&my_id);
479 hypre_MPI_Comm_size(comm,&num_procs);
480
481 hypre_sprintf(new_file_d,"%s.D.%d",file_name,my_id);
482 hypre_sprintf(new_file_o,"%s.O.%d",file_name,my_id);
483 hypre_sprintf(new_file_info,"%s.INFO.%d",file_name,my_id);
484 fp = fopen(new_file_info, "r");
485 hypre_fscanf(fp, "%b", &global_num_rows);
486 hypre_fscanf(fp, "%b", &global_num_cols);
487 hypre_fscanf(fp, "%d", &num_cols_offd);
488 /* the bgl input file should only contain the EXACT range for local processor */
489 hypre_fscanf(fp, "%d %d %d %d", &row_s, &row_e, &col_s, &col_e);
490 row_starts[0] = row_s;
491 row_starts[1] = row_e;
492 col_starts[0] = col_s;
493 col_starts[1] = col_e;
494
495 col_map_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd, HYPRE_MEMORY_HOST);
496
497 for (i = 0; i < num_cols_offd; i++)
498 {
499 hypre_fscanf(fp, "%b", &col_map_offd[i]);
500 }
501
502 fclose(fp);
503
504 diag = hypre_CSRMatrixRead(new_file_d);
505 local_num_rows = hypre_CSRMatrixNumRows(diag);
506
507 if (num_cols_offd)
508 {
509 offd = hypre_CSRMatrixRead(new_file_o);
510 }
511 else
512 {
513 offd = hypre_CSRMatrixCreate(local_num_rows,0,0);
514 hypre_CSRMatrixInitialize(offd);
515 }
516
517 matrix = hypre_CTAlloc(hypre_ParCSRMatrix, 1, HYPRE_MEMORY_HOST);
518
519 hypre_ParCSRMatrixComm(matrix) = comm;
520 hypre_ParCSRMatrixGlobalNumRows(matrix) = global_num_rows;
521 hypre_ParCSRMatrixGlobalNumCols(matrix) = global_num_cols;
522 hypre_ParCSRMatrixFirstRowIndex(matrix) = row_s;
523 hypre_ParCSRMatrixFirstColDiag(matrix) = col_s;
524 hypre_ParCSRMatrixLastRowIndex(matrix) = row_e - 1;
525 hypre_ParCSRMatrixLastColDiag(matrix) = col_e - 1;
526
527 hypre_ParCSRMatrixRowStarts(matrix)[0] = row_starts[0];
528 hypre_ParCSRMatrixRowStarts(matrix)[1] = row_starts[1];
529 hypre_ParCSRMatrixColStarts(matrix)[0] = col_starts[0];
530 hypre_ParCSRMatrixColStarts(matrix)[1] = col_starts[1];
531
532 hypre_ParCSRMatrixCommPkg(matrix) = NULL;
533
534 /* set defaults */
535 hypre_ParCSRMatrixOwnsData(matrix) = 1;
536 hypre_ParCSRMatrixDiag(matrix) = diag;
537 hypre_ParCSRMatrixOffd(matrix) = offd;
538 if (num_cols_offd)
539 {
540 hypre_ParCSRMatrixColMapOffd(matrix) = col_map_offd;
541 }
542 else
543 {
544 hypre_ParCSRMatrixColMapOffd(matrix) = NULL;
545 }
546
547 return matrix;
548 }
549
550 /*--------------------------------------------------------------------------
551 * hypre_ParCSRMatrixPrint
552 *--------------------------------------------------------------------------*/
553
554 HYPRE_Int
hypre_ParCSRMatrixPrint(hypre_ParCSRMatrix * matrix,const char * file_name)555 hypre_ParCSRMatrixPrint( hypre_ParCSRMatrix *matrix,
556 const char *file_name )
557 {
558 MPI_Comm comm;
559 HYPRE_BigInt global_num_rows;
560 HYPRE_BigInt global_num_cols;
561 HYPRE_BigInt *col_map_offd;
562 HYPRE_Int my_id, i, num_procs;
563 char new_file_d[80], new_file_o[80], new_file_info[80];
564 FILE *fp;
565 HYPRE_Int num_cols_offd = 0;
566 HYPRE_BigInt row_s, row_e, col_s, col_e;
567 if (!matrix)
568 {
569 hypre_error_in_arg(1);
570 return hypre_error_flag;
571 }
572 comm = hypre_ParCSRMatrixComm(matrix);
573 global_num_rows = hypre_ParCSRMatrixGlobalNumRows(matrix);
574 global_num_cols = hypre_ParCSRMatrixGlobalNumCols(matrix);
575 col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix);
576 if (hypre_ParCSRMatrixOffd(matrix))
577 num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(matrix));
578
579 hypre_MPI_Comm_rank(comm, &my_id);
580 hypre_MPI_Comm_size(comm, &num_procs);
581
582 hypre_sprintf(new_file_d,"%s.D.%d",file_name,my_id);
583 hypre_sprintf(new_file_o,"%s.O.%d",file_name,my_id);
584 hypre_sprintf(new_file_info,"%s.INFO.%d",file_name,my_id);
585 hypre_CSRMatrixPrint(hypre_ParCSRMatrixDiag(matrix),new_file_d);
586 if (num_cols_offd != 0)
587 hypre_CSRMatrixPrint(hypre_ParCSRMatrixOffd(matrix),new_file_o);
588
589 fp = fopen(new_file_info, "w");
590 hypre_fprintf(fp, "%b\n", global_num_rows);
591 hypre_fprintf(fp, "%b\n", global_num_cols);
592 hypre_fprintf(fp, "%d\n", num_cols_offd);
593 row_s = hypre_ParCSRMatrixFirstRowIndex(matrix);
594 row_e = hypre_ParCSRMatrixLastRowIndex(matrix);
595 col_s = hypre_ParCSRMatrixFirstColDiag(matrix);
596 col_e = hypre_ParCSRMatrixLastColDiag(matrix);
597 /* add 1 to the ends because this is a starts partition */
598 hypre_fprintf(fp, "%b %b %b %b\n", row_s, row_e + 1, col_s, col_e + 1);
599 for (i=0; i < num_cols_offd; i++)
600 hypre_fprintf(fp, "%b\n", col_map_offd[i]);
601 fclose(fp);
602
603 return hypre_error_flag;
604 }
605
606 /*--------------------------------------------------------------------------
607 * hypre_ParCSRMatrixPrintIJ
608 *--------------------------------------------------------------------------*/
609
610 HYPRE_Int
hypre_ParCSRMatrixPrintIJ(const hypre_ParCSRMatrix * matrix,const HYPRE_Int base_i,const HYPRE_Int base_j,const char * filename)611 hypre_ParCSRMatrixPrintIJ( const hypre_ParCSRMatrix *matrix,
612 const HYPRE_Int base_i,
613 const HYPRE_Int base_j,
614 const char *filename )
615 {
616 MPI_Comm comm;
617 HYPRE_BigInt first_row_index;
618 HYPRE_BigInt first_col_diag;
619 hypre_CSRMatrix *diag;
620 hypre_CSRMatrix *offd;
621 HYPRE_BigInt *col_map_offd;
622 HYPRE_Int num_rows;
623 const HYPRE_BigInt *row_starts;
624 const HYPRE_BigInt *col_starts;
625 HYPRE_Complex *diag_data;
626 HYPRE_Int *diag_i;
627 HYPRE_Int *diag_j;
628 HYPRE_Complex *offd_data;
629 HYPRE_Int *offd_i;
630 HYPRE_Int *offd_j;
631 HYPRE_Int myid, num_procs, i, j;
632 HYPRE_BigInt I, J;
633 char new_filename[255];
634 FILE *file;
635 HYPRE_Int num_nonzeros_offd;
636 HYPRE_BigInt ilower, iupper, jlower, jupper;
637
638 if (!matrix)
639 {
640 hypre_error_in_arg(1);
641 return hypre_error_flag;
642 }
643
644 comm = hypre_ParCSRMatrixComm(matrix);
645 first_row_index = hypre_ParCSRMatrixFirstRowIndex(matrix);
646 first_col_diag = hypre_ParCSRMatrixFirstColDiag(matrix);
647 diag = hypre_ParCSRMatrixDiag(matrix);
648 offd = hypre_ParCSRMatrixOffd(matrix);
649 col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix);
650 num_rows = hypre_ParCSRMatrixNumRows(matrix);
651 row_starts = hypre_ParCSRMatrixRowStarts(matrix);
652 col_starts = hypre_ParCSRMatrixColStarts(matrix);
653 hypre_MPI_Comm_rank(comm, &myid);
654 hypre_MPI_Comm_size(comm, &num_procs);
655
656 hypre_sprintf(new_filename,"%s.%05d", filename, myid);
657
658 if ((file = fopen(new_filename, "w")) == NULL)
659 {
660 hypre_error_w_msg(HYPRE_ERROR_GENERIC,"Error: can't open output file %s\n");
661 return hypre_error_flag;
662 }
663
664 diag_data = hypre_CSRMatrixData(diag);
665 diag_i = hypre_CSRMatrixI(diag);
666 diag_j = hypre_CSRMatrixJ(diag);
667
668 num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(offd);
669 if (num_nonzeros_offd)
670 {
671 offd_data = hypre_CSRMatrixData(offd);
672 offd_i = hypre_CSRMatrixI(offd);
673 offd_j = hypre_CSRMatrixJ(offd);
674 }
675
676 ilower = row_starts[0] + (HYPRE_BigInt) base_i;
677 iupper = row_starts[1] + (HYPRE_BigInt) base_i - 1;
678 jlower = col_starts[0] + (HYPRE_BigInt) base_j;
679 jupper = col_starts[1] + (HYPRE_BigInt) base_j - 1;
680
681 hypre_fprintf(file, "%b %b %b %b\n", ilower, iupper, jlower, jupper);
682
683 for (i = 0; i < num_rows; i++)
684 {
685 I = first_row_index + (HYPRE_BigInt)(i + base_i);
686
687 /* print diag columns */
688 for (j = diag_i[i]; j < diag_i[i+1]; j++)
689 {
690 J = first_col_diag + (HYPRE_BigInt)(diag_j[j] + base_j);
691 if (diag_data)
692 {
693 #ifdef HYPRE_COMPLEX
694 hypre_fprintf(file, "%b %b %.14e , %.14e\n", I, J,
695 hypre_creal(diag_data[j]), hypre_cimag(diag_data[j]));
696 #else
697 hypre_fprintf(file, "%b %b %.14e\n", I, J, diag_data[j]);
698 #endif
699 }
700 else
701 {
702 hypre_fprintf(file, "%b %b\n", I, J);
703 }
704 }
705
706 /* print offd columns */
707 if (num_nonzeros_offd)
708 {
709 for (j = offd_i[i]; j < offd_i[i+1]; j++)
710 {
711 J = col_map_offd[offd_j[j]] + (HYPRE_BigInt) base_j;
712 if (offd_data)
713 {
714 #ifdef HYPRE_COMPLEX
715 hypre_fprintf(file, "%b %b %.14e , %.14e\n", I, J,
716 hypre_creal(offd_data[j]), hypre_cimag(offd_data[j]));
717 #else
718 hypre_fprintf(file, "%b %b %.14e\n", I, J, offd_data[j]);
719 #endif
720 }
721 else
722 {
723 hypre_fprintf(file, "%b %b\n", I, J);
724 }
725 }
726 }
727 }
728
729 fclose(file);
730
731 return hypre_error_flag;
732 }
733
734 /*--------------------------------------------------------------------------
735 * hypre_ParCSRMatrixReadIJ
736 *--------------------------------------------------------------------------*/
737
738 HYPRE_Int
hypre_ParCSRMatrixReadIJ(MPI_Comm comm,const char * filename,HYPRE_Int * base_i_ptr,HYPRE_Int * base_j_ptr,hypre_ParCSRMatrix ** matrix_ptr)739 hypre_ParCSRMatrixReadIJ( MPI_Comm comm,
740 const char *filename,
741 HYPRE_Int *base_i_ptr,
742 HYPRE_Int *base_j_ptr,
743 hypre_ParCSRMatrix **matrix_ptr)
744 {
745 HYPRE_BigInt global_num_rows;
746 HYPRE_BigInt global_num_cols;
747 HYPRE_BigInt first_row_index;
748 HYPRE_BigInt first_col_diag;
749 HYPRE_BigInt last_col_diag;
750 hypre_ParCSRMatrix *matrix;
751 hypre_CSRMatrix *diag;
752 hypre_CSRMatrix *offd;
753 HYPRE_BigInt *col_map_offd;
754 HYPRE_BigInt row_starts[2];
755 HYPRE_BigInt col_starts[2];
756 HYPRE_Int num_rows;
757 HYPRE_BigInt big_base_i, big_base_j;
758 HYPRE_Int base_i, base_j;
759 HYPRE_Complex *diag_data;
760 HYPRE_Int *diag_i;
761 HYPRE_Int *diag_j;
762 HYPRE_Complex *offd_data;
763 HYPRE_Int *offd_i;
764 HYPRE_Int *offd_j;
765 HYPRE_BigInt *tmp_j;
766 HYPRE_BigInt *aux_offd_j;
767 HYPRE_BigInt I, J;
768 HYPRE_Int myid, num_procs, i, i2, j;
769 char new_filename[255];
770 FILE *file;
771 HYPRE_Int num_cols_offd, num_nonzeros_diag, num_nonzeros_offd;
772 HYPRE_Int i_col, num_cols;
773 HYPRE_Int diag_cnt, offd_cnt, row_cnt;
774 HYPRE_Complex data;
775
776 hypre_MPI_Comm_size(comm, &num_procs);
777 hypre_MPI_Comm_rank(comm, &myid);
778
779 hypre_sprintf(new_filename,"%s.%05d", filename, myid);
780
781 if ((file = fopen(new_filename, "r")) == NULL)
782 {
783 hypre_error_w_msg(HYPRE_ERROR_GENERIC,"Error: can't open output file %s\n");
784 return hypre_error_flag;
785 }
786
787 hypre_fscanf(file, "%b %b", &global_num_rows, &global_num_cols);
788 hypre_fscanf(file, "%d %d %d", &num_rows, &num_cols, &num_cols_offd);
789 hypre_fscanf(file, "%d %d", &num_nonzeros_diag, &num_nonzeros_offd);
790 hypre_fscanf(file, "%b %b %b %b", &row_starts[0], &col_starts[0], &row_starts[1], &col_starts[1]);
791
792 big_base_i = row_starts[0];
793 big_base_j = col_starts[0];
794 base_i = (HYPRE_Int) row_starts[0];
795 base_j = (HYPRE_Int) col_starts[0];
796
797 matrix = hypre_ParCSRMatrixCreate(comm, global_num_rows, global_num_cols,
798 row_starts, col_starts, num_cols_offd,
799 num_nonzeros_diag, num_nonzeros_offd);
800 hypre_ParCSRMatrixInitialize(matrix);
801
802 diag = hypre_ParCSRMatrixDiag(matrix);
803 offd = hypre_ParCSRMatrixOffd(matrix);
804
805 diag_data = hypre_CSRMatrixData(diag);
806 diag_i = hypre_CSRMatrixI(diag);
807 diag_j = hypre_CSRMatrixJ(diag);
808
809 offd_i = hypre_CSRMatrixI(offd);
810 if (num_nonzeros_offd)
811 {
812 offd_data = hypre_CSRMatrixData(offd);
813 offd_j = hypre_CSRMatrixJ(offd);
814 tmp_j = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros_offd, HYPRE_MEMORY_HOST);
815 }
816
817 first_row_index = hypre_ParCSRMatrixFirstRowIndex(matrix);
818 first_col_diag = hypre_ParCSRMatrixFirstColDiag(matrix);
819 last_col_diag = first_col_diag+(HYPRE_BigInt)num_cols-1;
820
821 diag_cnt = 0;
822 offd_cnt = 0;
823 row_cnt = 0;
824 for (i = 0; i < num_nonzeros_diag+num_nonzeros_offd; i++)
825 {
826 /* read values */
827 hypre_fscanf(file, "%b %b %le", &I, &J, &data);
828 i2 = (HYPRE_Int)(I-big_base_i-first_row_index);
829 J -= big_base_j;
830 if (i2 > row_cnt)
831 {
832 diag_i[i2] = diag_cnt;
833 offd_i[i2] = offd_cnt;
834 row_cnt++;
835 }
836 if (J < first_col_diag || J > last_col_diag)
837 {
838 tmp_j[offd_cnt] = J;
839 offd_data[offd_cnt++] = data;
840 }
841 else
842 {
843 diag_j[diag_cnt] = (HYPRE_Int)(J - first_col_diag);
844 diag_data[diag_cnt++] = data;
845 }
846 }
847 diag_i[num_rows] = diag_cnt;
848 offd_i[num_rows] = offd_cnt;
849
850 fclose(file);
851
852 /* generate col_map_offd */
853 if (num_nonzeros_offd)
854 {
855 aux_offd_j = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros_offd, HYPRE_MEMORY_HOST);
856 for (i=0; i < num_nonzeros_offd; i++)
857 aux_offd_j[i] = (HYPRE_BigInt)offd_j[i];
858 hypre_BigQsort0(aux_offd_j,0,num_nonzeros_offd-1);
859 col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix);
860 col_map_offd[0] = aux_offd_j[0];
861 offd_cnt = 0;
862 for (i=1; i < num_nonzeros_offd; i++)
863 {
864 if (aux_offd_j[i] > col_map_offd[offd_cnt])
865 col_map_offd[++offd_cnt] = aux_offd_j[i];
866 }
867 for (i=0; i < num_nonzeros_offd; i++)
868 {
869 offd_j[i] = hypre_BigBinarySearch(col_map_offd, tmp_j[i], num_cols_offd);
870 }
871 hypre_TFree(aux_offd_j, HYPRE_MEMORY_HOST);
872 hypre_TFree(tmp_j, HYPRE_MEMORY_HOST);
873 }
874
875 /* move diagonal element in first position in each row */
876 for (i=0; i < num_rows; i++)
877 {
878 i_col = diag_i[i];
879 for (j=i_col; j < diag_i[i+1]; j++)
880 {
881 if (diag_j[j] == i)
882 {
883 diag_j[j] = diag_j[i_col];
884 data = diag_data[j];
885 diag_data[j] = diag_data[i_col];
886 diag_data[i_col] = data;
887 diag_j[i_col] = i;
888 break;
889 }
890 }
891 }
892
893 *base_i_ptr = base_i;
894 *base_j_ptr = base_j;
895 *matrix_ptr = matrix;
896
897 return hypre_error_flag;
898 }
899
900 /*--------------------------------------------------------------------------
901 * hypre_ParCSRMatrixGetLocalRange
902 * returns the row numbers of the rows stored on this processor.
903 * "End" is actually the row number of the last row on this processor.
904 *--------------------------------------------------------------------------*/
905
906 HYPRE_Int
hypre_ParCSRMatrixGetLocalRange(hypre_ParCSRMatrix * matrix,HYPRE_BigInt * row_start,HYPRE_BigInt * row_end,HYPRE_BigInt * col_start,HYPRE_BigInt * col_end)907 hypre_ParCSRMatrixGetLocalRange( hypre_ParCSRMatrix *matrix,
908 HYPRE_BigInt *row_start,
909 HYPRE_BigInt *row_end,
910 HYPRE_BigInt *col_start,
911 HYPRE_BigInt *col_end )
912 {
913 HYPRE_Int my_id;
914
915 if (!matrix)
916 {
917 hypre_error_in_arg(1);
918 return hypre_error_flag;
919 }
920
921 hypre_MPI_Comm_rank( hypre_ParCSRMatrixComm(matrix), &my_id );
922
923 *row_start = hypre_ParCSRMatrixFirstRowIndex(matrix);
924 *row_end = hypre_ParCSRMatrixLastRowIndex(matrix);
925 *col_start = hypre_ParCSRMatrixFirstColDiag(matrix);
926 *col_end = hypre_ParCSRMatrixLastColDiag(matrix);
927
928 return hypre_error_flag;
929 }
930
931 /*--------------------------------------------------------------------------
932 * hypre_ParCSRMatrixGetRow
933 * Returns global column indices and/or values for a given row in the global
934 * matrix. Global row number is used, but the row must be stored locally or
935 * an error is returned. This implementation copies from the two matrices that
936 * store the local data, storing them in the hypre_ParCSRMatrix structure.
937 * Only a single row can be accessed via this function at any one time; the
938 * corresponding RestoreRow function must be called, to avoid bleeding memory,
939 * and to be able to look at another row.
940 * Either one of col_ind and values can be left null, and those values will
941 * not be returned.
942 * All indices are returned in 0-based indexing, no matter what is used under
943 * the hood. EXCEPTION: currently this only works if the local CSR matrices
944 * use 0-based indexing.
945 * This code, semantics, implementation, etc., are all based on PETSc's hypre_MPI_AIJ
946 * matrix code, adjusted for our data and software structures.
947 * AJC 4/99.
948 *--------------------------------------------------------------------------*/
949
950 HYPRE_Int
hypre_ParCSRMatrixGetRowHost(hypre_ParCSRMatrix * mat,HYPRE_BigInt row,HYPRE_Int * size,HYPRE_BigInt ** col_ind,HYPRE_Complex ** values)951 hypre_ParCSRMatrixGetRowHost( hypre_ParCSRMatrix *mat,
952 HYPRE_BigInt row,
953 HYPRE_Int *size,
954 HYPRE_BigInt **col_ind,
955 HYPRE_Complex **values )
956 {
957 HYPRE_Int my_id;
958 HYPRE_BigInt row_start, row_end;
959 hypre_CSRMatrix *Aa;
960 hypre_CSRMatrix *Ba;
961
962 if (!mat)
963 {
964 hypre_error_in_arg(1);
965 return hypre_error_flag;
966 }
967
968 Aa = (hypre_CSRMatrix *) hypre_ParCSRMatrixDiag(mat);
969 Ba = (hypre_CSRMatrix *) hypre_ParCSRMatrixOffd(mat);
970
971 if (hypre_ParCSRMatrixGetrowactive(mat))
972 {
973 return(-1);
974 }
975
976 hypre_MPI_Comm_rank( hypre_ParCSRMatrixComm(mat), &my_id );
977
978 hypre_ParCSRMatrixGetrowactive(mat) = 1;
979 row_start = hypre_ParCSRMatrixFirstRowIndex(mat);
980 row_end = hypre_ParCSRMatrixLastRowIndex(mat) + 1;
981 if (row < row_start || row >= row_end)
982 {
983 return(-1);
984 }
985
986 /* if buffer is not allocated and some information is requested,
987 allocate buffer */
988 if (!hypre_ParCSRMatrixRowvalues(mat) && ( col_ind || values ))
989 {
990 /*
991 allocate enough space to hold information from the longest row.
992 */
993 HYPRE_Int max = 1,tmp;
994 HYPRE_Int i;
995 HYPRE_Int m = row_end - row_start;
996
997 for ( i = 0; i < m; i++ )
998 {
999 tmp = hypre_CSRMatrixI(Aa)[i+1] - hypre_CSRMatrixI(Aa)[i] +
1000 hypre_CSRMatrixI(Ba)[i+1] - hypre_CSRMatrixI(Ba)[i];
1001 if (max < tmp)
1002 {
1003 max = tmp;
1004 }
1005 }
1006
1007 hypre_ParCSRMatrixRowvalues(mat) =
1008 (HYPRE_Complex *) hypre_CTAlloc(HYPRE_Complex, max, hypre_ParCSRMatrixMemoryLocation(mat));
1009 hypre_ParCSRMatrixRowindices(mat) =
1010 (HYPRE_BigInt *) hypre_CTAlloc(HYPRE_BigInt, max, hypre_ParCSRMatrixMemoryLocation(mat));
1011 }
1012
1013 /* Copy from dual sequential matrices into buffer */
1014 {
1015 HYPRE_Complex *vworkA, *vworkB, *v_p;
1016 HYPRE_Int i, *cworkA, *cworkB;
1017 HYPRE_BigInt cstart = hypre_ParCSRMatrixFirstColDiag(mat);
1018 HYPRE_Int nztot, nzA, nzB, lrow = (HYPRE_Int)(row-row_start);
1019 HYPRE_BigInt *cmap, *idx_p;
1020
1021 nzA = hypre_CSRMatrixI(Aa)[lrow+1] - hypre_CSRMatrixI(Aa)[lrow];
1022 cworkA = &( hypre_CSRMatrixJ(Aa)[ hypre_CSRMatrixI(Aa)[lrow] ] );
1023 vworkA = &( hypre_CSRMatrixData(Aa)[ hypre_CSRMatrixI(Aa)[lrow] ] );
1024
1025 nzB = hypre_CSRMatrixI(Ba)[lrow+1] - hypre_CSRMatrixI(Ba)[lrow];
1026 cworkB = &( hypre_CSRMatrixJ(Ba)[ hypre_CSRMatrixI(Ba)[lrow] ] );
1027 vworkB = &( hypre_CSRMatrixData(Ba)[ hypre_CSRMatrixI(Ba)[lrow] ] );
1028
1029 nztot = nzA + nzB;
1030
1031 cmap = hypre_ParCSRMatrixColMapOffd(mat);
1032
1033 if (values || col_ind)
1034 {
1035 if (nztot)
1036 {
1037 /* Sort by increasing column numbers, assuming A and B already sorted */
1038 HYPRE_Int imark = -1;
1039
1040 if (values)
1041 {
1042 *values = v_p = hypre_ParCSRMatrixRowvalues(mat);
1043 for ( i = 0; i < nzB; i++ )
1044 {
1045 if (cmap[cworkB[i]] < cstart)
1046 {
1047 v_p[i] = vworkB[i];
1048 }
1049 else
1050 {
1051 break;
1052 }
1053 }
1054 imark = i;
1055 for ( i = 0; i < nzA; i++ )
1056 {
1057 v_p[imark+i] = vworkA[i];
1058 }
1059 for ( i = imark; i < nzB; i++ )
1060 {
1061 v_p[nzA+i] = vworkB[i];
1062 }
1063 }
1064
1065 if (col_ind)
1066 {
1067 *col_ind = idx_p = hypre_ParCSRMatrixRowindices(mat);
1068 if (imark > -1)
1069 {
1070 for ( i = 0; i < imark; i++ )
1071 {
1072 idx_p[i] = cmap[cworkB[i]];
1073 }
1074 }
1075 else
1076 {
1077 for ( i = 0; i < nzB; i++ )
1078 {
1079 if (cmap[cworkB[i]] < cstart)
1080 {
1081 idx_p[i] = cmap[cworkB[i]];
1082 }
1083 else
1084 {
1085 break;
1086 }
1087 }
1088 imark = i;
1089 }
1090 for ( i = 0; i < nzA; i++ )
1091 {
1092 idx_p[imark+i] = cstart + cworkA[i];
1093 }
1094 for ( i = imark; i < nzB; i++ )
1095 {
1096 idx_p[nzA+i] = cmap[cworkB[i]];
1097 }
1098 }
1099 }
1100 else
1101 {
1102 if (col_ind)
1103 {
1104 *col_ind = 0;
1105 }
1106 if (values)
1107 {
1108 *values = 0;
1109 }
1110 }
1111 }
1112
1113 *size = nztot;
1114 } /* End of copy */
1115
1116 return hypre_error_flag;
1117 }
1118
1119
1120 HYPRE_Int
hypre_ParCSRMatrixGetRow(hypre_ParCSRMatrix * mat,HYPRE_BigInt row,HYPRE_Int * size,HYPRE_BigInt ** col_ind,HYPRE_Complex ** values)1121 hypre_ParCSRMatrixGetRow( hypre_ParCSRMatrix *mat,
1122 HYPRE_BigInt row,
1123 HYPRE_Int *size,
1124 HYPRE_BigInt **col_ind,
1125 HYPRE_Complex **values )
1126 {
1127 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
1128 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(mat) );
1129
1130 if (exec == HYPRE_EXEC_DEVICE)
1131 {
1132 return hypre_ParCSRMatrixGetRowDevice(mat, row, size, col_ind, values);
1133 }
1134 else
1135 #endif
1136 {
1137 return hypre_ParCSRMatrixGetRowHost(mat, row, size, col_ind, values);
1138 }
1139
1140 return hypre_error_flag;
1141 }
1142
1143 /*--------------------------------------------------------------------------
1144 * hypre_ParCSRMatrixRestoreRow
1145 *--------------------------------------------------------------------------*/
1146
1147 HYPRE_Int
hypre_ParCSRMatrixRestoreRow(hypre_ParCSRMatrix * matrix,HYPRE_BigInt row,HYPRE_Int * size,HYPRE_BigInt ** col_ind,HYPRE_Complex ** values)1148 hypre_ParCSRMatrixRestoreRow( hypre_ParCSRMatrix *matrix,
1149 HYPRE_BigInt row,
1150 HYPRE_Int *size,
1151 HYPRE_BigInt **col_ind,
1152 HYPRE_Complex **values )
1153 {
1154 if (!hypre_ParCSRMatrixGetrowactive(matrix))
1155 {
1156 hypre_error(HYPRE_ERROR_GENERIC);
1157 return hypre_error_flag;
1158 }
1159
1160 hypre_ParCSRMatrixGetrowactive(matrix) = 0;
1161
1162 return hypre_error_flag;
1163 }
1164
1165 /*--------------------------------------------------------------------------
1166 * hypre_CSRMatrixToParCSRMatrix:
1167 *
1168 * Generates a ParCSRMatrix distributed across the processors in comm
1169 * from a CSRMatrix on proc 0 .
1170 *
1171 *--------------------------------------------------------------------------*/
1172
1173 hypre_ParCSRMatrix *
hypre_CSRMatrixToParCSRMatrix(MPI_Comm comm,hypre_CSRMatrix * A,HYPRE_BigInt * global_row_starts,HYPRE_BigInt * global_col_starts)1174 hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm,
1175 hypre_CSRMatrix *A,
1176 HYPRE_BigInt *global_row_starts,
1177 HYPRE_BigInt *global_col_starts )
1178 {
1179 hypre_ParCSRMatrix *parcsr_A;
1180
1181 HYPRE_BigInt *global_data;
1182 HYPRE_BigInt global_size;
1183 HYPRE_BigInt global_num_rows;
1184 HYPRE_BigInt global_num_cols;
1185
1186 HYPRE_Int num_procs, my_id;
1187 HYPRE_Int *num_rows_proc;
1188 HYPRE_Int *num_nonzeros_proc;
1189 HYPRE_BigInt *row_starts = NULL;
1190 HYPRE_BigInt *col_starts = NULL;
1191
1192 hypre_CSRMatrix *local_A;
1193 HYPRE_Complex *A_data;
1194 HYPRE_Int *A_i;
1195 HYPRE_Int *A_j;
1196
1197 hypre_MPI_Request *requests;
1198 hypre_MPI_Status *status, status0;
1199 hypre_MPI_Datatype *csr_matrix_datatypes;
1200
1201 HYPRE_Int free_global_row_starts = 0;
1202 HYPRE_Int free_global_col_starts = 0;
1203
1204 HYPRE_Int total_size;
1205 HYPRE_BigInt first_col_diag;
1206 HYPRE_BigInt last_col_diag;
1207 HYPRE_Int num_rows;
1208 HYPRE_Int num_nonzeros;
1209 HYPRE_Int i, ind;
1210
1211 hypre_MPI_Comm_rank(comm, &my_id);
1212 hypre_MPI_Comm_size(comm, &num_procs);
1213
1214 total_size = 4;
1215 if (my_id == 0)
1216 {
1217 total_size += 2*(num_procs + 1);
1218 }
1219
1220 global_data = hypre_CTAlloc(HYPRE_BigInt, total_size, HYPRE_MEMORY_HOST);
1221 if (my_id == 0)
1222 {
1223 global_size = 3;
1224 if (global_row_starts)
1225 {
1226 if (global_col_starts)
1227 {
1228 if (global_col_starts != global_row_starts)
1229 {
1230 /* contains code for what to expect,
1231 if 0: global_row_starts = global_col_starts, only global_row_starts given
1232 if 1: only global_row_starts given, global_col_starts = NULL
1233 if 2: both global_row_starts and global_col_starts given
1234 if 3: only global_col_starts given, global_row_starts = NULL */
1235 global_data[3] = 2;
1236 global_size += (HYPRE_BigInt) (2*(num_procs + 1) + 1);
1237 for (i = 0; i < (num_procs + 1); i++)
1238 {
1239 global_data[i+4] = global_row_starts[i];
1240 }
1241 for (i = 0; i < (num_procs + 1); i++)
1242 {
1243 global_data[i+num_procs+5] = global_col_starts[i];
1244 }
1245 }
1246 else
1247 {
1248 global_data[3] = 0;
1249 global_size += (HYPRE_BigInt) ((num_procs + 1) + 1);
1250 for (i = 0; i < (num_procs + 1); i++)
1251 {
1252 global_data[i+4] = global_row_starts[i];
1253 }
1254 }
1255 }
1256 else
1257 {
1258 global_data[3] = 1;
1259 global_size += (HYPRE_BigInt) ((num_procs + 1) + 1);
1260 for (i = 0; i < (num_procs + 1); i++)
1261 {
1262 global_data[i+4] = global_row_starts[i];
1263 }
1264 }
1265 }
1266 else
1267 {
1268 if (global_col_starts)
1269 {
1270 global_data[3] = 3;
1271 global_size += (HYPRE_BigInt) ((num_procs + 1) + 1);
1272 for (i = 0; i < (num_procs + 1); i++)
1273 {
1274 global_data[i+4] = global_col_starts[i];
1275 }
1276 }
1277 }
1278
1279 global_data[0] = (HYPRE_BigInt) hypre_CSRMatrixNumRows(A);
1280 global_data[1] = (HYPRE_BigInt) hypre_CSRMatrixNumCols(A);
1281 global_data[2] = global_size;
1282 A_data = hypre_CSRMatrixData(A);
1283 A_i = hypre_CSRMatrixI(A);
1284 A_j = hypre_CSRMatrixJ(A);
1285 }
1286 hypre_MPI_Bcast(global_data, 3, HYPRE_MPI_BIG_INT, 0, comm);
1287 global_num_rows = global_data[0];
1288 global_num_cols = global_data[1];
1289 global_size = global_data[2];
1290
1291 if (global_size > 3)
1292 {
1293 HYPRE_Int send_start;
1294
1295 if (global_data[3] == 2)
1296 {
1297 row_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1298 col_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1299
1300 send_start = 4;
1301 hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1302 &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm);
1303
1304 send_start = 5;
1305 hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1306 &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm);
1307
1308 send_start = 4 + (num_procs + 1);
1309 hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1310 &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm);
1311
1312 send_start = 5 + (num_procs + 1);
1313 hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1314 &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm);
1315 }
1316 else if ((global_data[3] == 0) || (global_data[3] == 1))
1317 {
1318 row_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1319
1320 send_start = 4;
1321 hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1322 &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm);
1323
1324 send_start = 5;
1325 hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1326 &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm);
1327
1328 if (global_data[3] == 0)
1329 {
1330 col_starts = row_starts;
1331 }
1332 }
1333 else
1334 {
1335 col_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
1336
1337 send_start = 4;
1338 hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1339 &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm);
1340
1341 send_start = 5;
1342 hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT,
1343 &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm);
1344 }
1345 }
1346 hypre_TFree(global_data, HYPRE_MEMORY_HOST);
1347
1348 // Create ParCSR matrix
1349 parcsr_A = hypre_ParCSRMatrixCreate(comm, global_num_rows, global_num_cols,
1350 row_starts, col_starts, 0, 0, 0);
1351 hypre_TFree(row_starts, HYPRE_MEMORY_HOST);
1352 hypre_TFree(col_starts, HYPRE_MEMORY_HOST);
1353
1354 // Allocate memory for building ParCSR matrix
1355 num_rows_proc = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST);
1356 num_nonzeros_proc = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST);
1357
1358 if (my_id == 0)
1359 {
1360 if (!global_row_starts)
1361 {
1362 hypre_GeneratePartitioning(global_num_rows, num_procs, &global_row_starts);
1363 free_global_row_starts = 1;
1364 }
1365 if (!global_col_starts)
1366 {
1367 hypre_GeneratePartitioning(global_num_rows, num_procs, &global_col_starts);
1368 free_global_col_starts = 1;
1369 }
1370
1371 for (i = 0; i < num_procs; i++)
1372 {
1373 num_rows_proc[i] = (HYPRE_Int) (global_row_starts[i+1] - global_row_starts[i]);
1374 num_nonzeros_proc[i] = A_i[(HYPRE_Int)global_row_starts[i+1]] -
1375 A_i[(HYPRE_Int)global_row_starts[i]];
1376 }
1377 //num_nonzeros_proc[num_procs-1] = A_i[(HYPRE_Int)global_num_rows] - A_i[(HYPRE_Int)row_starts[num_procs-1]];
1378 }
1379 hypre_MPI_Scatter(num_rows_proc, 1, HYPRE_MPI_INT, &num_rows, 1, HYPRE_MPI_INT, 0, comm);
1380 hypre_MPI_Scatter(num_nonzeros_proc, 1, HYPRE_MPI_INT, &num_nonzeros, 1, HYPRE_MPI_INT, 0, comm);
1381
1382 /* RL: this is not correct: (HYPRE_Int) global_num_cols */
1383 local_A = hypre_CSRMatrixCreate(num_rows, (HYPRE_Int) global_num_cols, num_nonzeros);
1384
1385 csr_matrix_datatypes = hypre_CTAlloc(hypre_MPI_Datatype, num_procs, HYPRE_MEMORY_HOST);
1386 if (my_id == 0)
1387 {
1388 requests = hypre_CTAlloc(hypre_MPI_Request, num_procs-1, HYPRE_MEMORY_HOST);
1389 status = hypre_CTAlloc(hypre_MPI_Status, num_procs-1, HYPRE_MEMORY_HOST);
1390 for (i = 1; i < num_procs; i++)
1391 {
1392 ind = A_i[(HYPRE_Int) global_row_starts[i]];
1393
1394 hypre_BuildCSRMatrixMPIDataType(num_nonzeros_proc[i],
1395 num_rows_proc[i],
1396 &A_data[ind],
1397 &A_i[(HYPRE_Int) global_row_starts[i]],
1398 &A_j[ind],
1399 &csr_matrix_datatypes[i]);
1400 hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, comm,
1401 &requests[i-1]);
1402 hypre_MPI_Type_free(&csr_matrix_datatypes[i]);
1403 }
1404 hypre_CSRMatrixData(local_A) = A_data;
1405 hypre_CSRMatrixI(local_A) = A_i;
1406 hypre_CSRMatrixJ(local_A) = A_j;
1407 hypre_CSRMatrixOwnsData(local_A) = 0;
1408
1409 hypre_MPI_Waitall(num_procs-1, requests, status);
1410
1411 hypre_TFree(requests, HYPRE_MEMORY_HOST);
1412 hypre_TFree(status, HYPRE_MEMORY_HOST);
1413 hypre_TFree(num_rows_proc, HYPRE_MEMORY_HOST);
1414 hypre_TFree(num_nonzeros_proc, HYPRE_MEMORY_HOST);
1415
1416 if (free_global_row_starts)
1417 {
1418 hypre_TFree(global_row_starts, HYPRE_MEMORY_HOST);
1419 }
1420 if (free_global_col_starts)
1421 {
1422 hypre_TFree(global_col_starts, HYPRE_MEMORY_HOST);
1423 }
1424 }
1425 else
1426 {
1427 hypre_CSRMatrixInitialize(local_A);
1428 hypre_BuildCSRMatrixMPIDataType(num_nonzeros,
1429 num_rows,
1430 hypre_CSRMatrixData(local_A),
1431 hypre_CSRMatrixI(local_A),
1432 hypre_CSRMatrixJ(local_A),
1433 &csr_matrix_datatypes[0]);
1434 hypre_MPI_Recv(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[0], 0, 0, comm, &status0);
1435 hypre_MPI_Type_free(csr_matrix_datatypes);
1436 }
1437
1438 first_col_diag = hypre_ParCSRMatrixFirstColDiag(parcsr_A);
1439 last_col_diag = hypre_ParCSRMatrixLastColDiag(parcsr_A);
1440
1441 GenerateDiagAndOffd(local_A, parcsr_A, first_col_diag, last_col_diag);
1442
1443 /* set pointers back to NULL before destroying */
1444 if (my_id == 0)
1445 {
1446 hypre_CSRMatrixData(local_A) = NULL;
1447 hypre_CSRMatrixI(local_A) = NULL;
1448 hypre_CSRMatrixJ(local_A) = NULL;
1449 }
1450 hypre_CSRMatrixDestroy(local_A);
1451 hypre_TFree(csr_matrix_datatypes, HYPRE_MEMORY_HOST);
1452
1453 return parcsr_A;
1454 }
1455
1456 /* RL: XXX this is not a scalable routine, see `marker' therein */
1457 HYPRE_Int
GenerateDiagAndOffd(hypre_CSRMatrix * A,hypre_ParCSRMatrix * matrix,HYPRE_BigInt first_col_diag,HYPRE_BigInt last_col_diag)1458 GenerateDiagAndOffd(hypre_CSRMatrix *A,
1459 hypre_ParCSRMatrix *matrix,
1460 HYPRE_BigInt first_col_diag,
1461 HYPRE_BigInt last_col_diag)
1462 {
1463 HYPRE_Int i, j;
1464 HYPRE_Int jo, jd;
1465 HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A);
1466 HYPRE_Int num_cols = hypre_CSRMatrixNumCols(A);
1467 HYPRE_Complex *a_data = hypre_CSRMatrixData(A);
1468 HYPRE_Int *a_i = hypre_CSRMatrixI(A);
1469 /*RL: XXX FIXME if A spans global column space, the following a_j should be bigJ */
1470 HYPRE_Int *a_j = hypre_CSRMatrixJ(A);
1471
1472 hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix);
1473 hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix);
1474
1475 HYPRE_BigInt *col_map_offd;
1476
1477 HYPRE_Complex *diag_data, *offd_data;
1478 HYPRE_Int *diag_i, *offd_i;
1479 HYPRE_Int *diag_j, *offd_j;
1480 HYPRE_Int *marker;
1481 HYPRE_Int num_cols_diag, num_cols_offd;
1482 HYPRE_Int first_elmt = a_i[0];
1483 HYPRE_Int num_nonzeros = a_i[num_rows]-first_elmt;
1484 HYPRE_Int counter;
1485
1486 num_cols_diag = (HYPRE_Int)(last_col_diag - first_col_diag +1);
1487 num_cols_offd = 0;
1488
1489 HYPRE_MemoryLocation memory_location = hypre_CSRMatrixMemoryLocation(A);
1490
1491 if (num_cols - num_cols_diag)
1492 {
1493 hypre_CSRMatrixInitialize_v2(diag, 0, memory_location);
1494 diag_i = hypre_CSRMatrixI(diag);
1495
1496 hypre_CSRMatrixInitialize_v2(offd, 0, memory_location);
1497 offd_i = hypre_CSRMatrixI(offd);
1498 marker = hypre_CTAlloc(HYPRE_Int, num_cols, HYPRE_MEMORY_HOST);
1499
1500 for (i=0; i < num_cols; i++)
1501 {
1502 marker[i] = 0;
1503 }
1504
1505 jo = 0;
1506 jd = 0;
1507 for (i = 0; i < num_rows; i++)
1508 {
1509 offd_i[i] = jo;
1510 diag_i[i] = jd;
1511
1512 for (j = a_i[i]-first_elmt; j < a_i[i+1]-first_elmt; j++)
1513 {
1514 if (a_j[j] < first_col_diag || a_j[j] > last_col_diag)
1515 {
1516 if (!marker[a_j[j]])
1517 {
1518 marker[a_j[j]] = 1;
1519 num_cols_offd++;
1520 }
1521 jo++;
1522 }
1523 else
1524 {
1525 jd++;
1526 }
1527 }
1528 }
1529 offd_i[num_rows] = jo;
1530 diag_i[num_rows] = jd;
1531
1532 hypre_ParCSRMatrixColMapOffd(matrix) = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd, HYPRE_MEMORY_HOST);
1533 col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix);
1534
1535 counter = 0;
1536 for (i = 0; i < num_cols; i++)
1537 {
1538 if (marker[i])
1539 {
1540 col_map_offd[counter] = (HYPRE_BigInt) i;
1541 marker[i] = counter;
1542 counter++;
1543 }
1544 }
1545
1546 hypre_CSRMatrixNumNonzeros(diag) = jd;
1547 hypre_CSRMatrixInitialize(diag);
1548 diag_data = hypre_CSRMatrixData(diag);
1549 diag_j = hypre_CSRMatrixJ(diag);
1550
1551 hypre_CSRMatrixNumNonzeros(offd) = jo;
1552 hypre_CSRMatrixNumCols(offd) = num_cols_offd;
1553 hypre_CSRMatrixInitialize(offd);
1554 offd_data = hypre_CSRMatrixData(offd);
1555 offd_j = hypre_CSRMatrixJ(offd);
1556
1557 jo = 0;
1558 jd = 0;
1559 for (i=0; i < num_rows; i++)
1560 {
1561 for (j=a_i[i]-first_elmt; j < a_i[i+1]-first_elmt; j++)
1562 {
1563 if (a_j[j] < (HYPRE_Int)first_col_diag || a_j[j] > (HYPRE_Int)last_col_diag)
1564 {
1565 offd_data[jo] = a_data[j];
1566 offd_j[jo++] = marker[a_j[j]];
1567 }
1568 else
1569 {
1570 diag_data[jd] = a_data[j];
1571 diag_j[jd++] = (HYPRE_Int)(a_j[j]-first_col_diag);
1572 }
1573 }
1574 }
1575 hypre_TFree(marker, HYPRE_MEMORY_HOST);
1576 }
1577 else
1578 {
1579 hypre_CSRMatrixNumNonzeros(diag) = num_nonzeros;
1580 hypre_CSRMatrixInitialize(diag);
1581 diag_data = hypre_CSRMatrixData(diag);
1582 diag_i = hypre_CSRMatrixI(diag);
1583 diag_j = hypre_CSRMatrixJ(diag);
1584
1585 for (i=0; i < num_nonzeros; i++)
1586 {
1587 diag_data[i] = a_data[i];
1588 diag_j[i] = a_j[i];
1589 }
1590 offd_i = hypre_CTAlloc(HYPRE_Int, num_rows+1, HYPRE_MEMORY_HOST);
1591
1592 for (i=0; i < num_rows+1; i++)
1593 {
1594 diag_i[i] = a_i[i];
1595 offd_i[i] = 0;
1596 }
1597
1598 hypre_CSRMatrixNumCols(offd) = 0;
1599 hypre_CSRMatrixI(offd) = offd_i;
1600 }
1601
1602 return hypre_error_flag;
1603 }
1604
1605 hypre_CSRMatrix *
hypre_MergeDiagAndOffd(hypre_ParCSRMatrix * par_matrix)1606 hypre_MergeDiagAndOffd(hypre_ParCSRMatrix *par_matrix)
1607 {
1608 hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(par_matrix);
1609 hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(par_matrix);
1610 hypre_CSRMatrix *matrix;
1611
1612 HYPRE_BigInt num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix);
1613 HYPRE_BigInt first_col_diag = hypre_ParCSRMatrixFirstColDiag(par_matrix);
1614 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(par_matrix);
1615 HYPRE_Int num_rows = hypre_CSRMatrixNumRows(diag);
1616
1617 HYPRE_Int *diag_i = hypre_CSRMatrixI(diag);
1618 HYPRE_Int *diag_j = hypre_CSRMatrixJ(diag);
1619 HYPRE_Complex *diag_data = hypre_CSRMatrixData(diag);
1620 HYPRE_Int *offd_i = hypre_CSRMatrixI(offd);
1621 HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd);
1622 HYPRE_Complex *offd_data = hypre_CSRMatrixData(offd);
1623
1624 HYPRE_Int *matrix_i;
1625 HYPRE_BigInt *matrix_j;
1626 HYPRE_Complex *matrix_data;
1627
1628 HYPRE_Int num_nonzeros, i, j;
1629 HYPRE_Int count;
1630 HYPRE_Int size, rest, num_threads, ii;
1631
1632 HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(par_matrix);
1633
1634 num_nonzeros = diag_i[num_rows] + offd_i[num_rows];
1635
1636 matrix = hypre_CSRMatrixCreate(num_rows,num_cols,num_nonzeros);
1637 hypre_CSRMatrixMemoryLocation(matrix) = memory_location;
1638 hypre_CSRMatrixBigInitialize(matrix);
1639
1640 matrix_i = hypre_CSRMatrixI(matrix);
1641 matrix_j = hypre_CSRMatrixBigJ(matrix);
1642 matrix_data = hypre_CSRMatrixData(matrix);
1643 num_threads = hypre_NumThreads();
1644 size = num_rows/num_threads;
1645 rest = num_rows - size*num_threads;
1646
1647 #ifdef HYPRE_USING_OPENMP
1648 #pragma omp parallel for private(ii, i, j, count) HYPRE_SMP_SCHEDULE
1649 #endif
1650 for (ii = 0; ii < num_threads; ii++)
1651 {
1652 HYPRE_Int ns, ne;
1653 if (ii < rest)
1654 {
1655 ns = ii*size+ii;
1656 ne = (ii+1)*size+ii+1;
1657 }
1658 else
1659 {
1660 ns = ii*size+rest;
1661 ne = (ii+1)*size+rest;
1662 }
1663 count = diag_i[ns]+offd_i[ns];;
1664 for (i = ns; i < ne; i++)
1665 {
1666 matrix_i[i] = count;
1667 for (j=diag_i[i]; j < diag_i[i+1]; j++)
1668 {
1669 matrix_data[count] = diag_data[j];
1670 matrix_j[count++] = (HYPRE_BigInt)diag_j[j]+first_col_diag;
1671 }
1672 for (j=offd_i[i]; j < offd_i[i+1]; j++)
1673 {
1674 matrix_data[count] = offd_data[j];
1675 matrix_j[count++] = col_map_offd[offd_j[j]];
1676 }
1677 }
1678 } /* end parallel region */
1679
1680 matrix_i[num_rows] = num_nonzeros;
1681
1682 return matrix;
1683 }
1684
1685 /*--------------------------------------------------------------------------
1686 * hypre_ParCSRMatrixToCSRMatrixAll:
1687 * generates a CSRMatrix from a ParCSRMatrix on all processors that have
1688 * parts of the ParCSRMatrix
1689 * Warning: this only works for a ParCSRMatrix that is smaller than 2^31-1
1690 *--------------------------------------------------------------------------*/
1691
1692 hypre_CSRMatrix *
hypre_ParCSRMatrixToCSRMatrixAll(hypre_ParCSRMatrix * par_matrix)1693 hypre_ParCSRMatrixToCSRMatrixAll(hypre_ParCSRMatrix *par_matrix)
1694 {
1695 MPI_Comm comm = hypre_ParCSRMatrixComm(par_matrix);
1696 hypre_CSRMatrix *matrix;
1697 hypre_CSRMatrix *local_matrix;
1698 HYPRE_Int num_rows = (HYPRE_Int)hypre_ParCSRMatrixGlobalNumRows(par_matrix);
1699 HYPRE_Int num_cols = (HYPRE_Int)hypre_ParCSRMatrixGlobalNumCols(par_matrix);
1700 HYPRE_Int *matrix_i;
1701 HYPRE_Int *matrix_j;
1702 HYPRE_Complex *matrix_data;
1703
1704 HYPRE_Int *local_matrix_i;
1705 HYPRE_Int *local_matrix_j;
1706 HYPRE_Complex *local_matrix_data;
1707
1708 HYPRE_Int i, j;
1709 HYPRE_Int local_num_rows;
1710 HYPRE_Int local_num_nonzeros;
1711 HYPRE_Int num_nonzeros;
1712 HYPRE_Int num_data;
1713 HYPRE_Int num_requests;
1714 HYPRE_Int vec_len, offset;
1715 HYPRE_Int start_index;
1716 HYPRE_Int proc_id;
1717 HYPRE_Int num_procs, my_id;
1718 HYPRE_Int num_types;
1719 HYPRE_Int *used_procs;
1720
1721 hypre_MPI_Request *requests;
1722 hypre_MPI_Status *status;
1723
1724 HYPRE_Int *new_vec_starts;
1725
1726 HYPRE_Int num_contacts;
1727 HYPRE_Int contact_proc_list[1];
1728 HYPRE_Int contact_send_buf[1];
1729 HYPRE_Int contact_send_buf_starts[2];
1730 HYPRE_Int max_response_size;
1731 HYPRE_Int *response_recv_buf=NULL;
1732 HYPRE_Int *response_recv_buf_starts = NULL;
1733 hypre_DataExchangeResponse response_obj;
1734 hypre_ProcListElements send_proc_obj;
1735
1736 HYPRE_Int *send_info = NULL;
1737 hypre_MPI_Status status1;
1738 HYPRE_Int count, tag1 = 11112, tag2 = 22223, tag3 = 33334;
1739 HYPRE_Int start;
1740
1741 hypre_MPI_Comm_size(comm, &num_procs);
1742 hypre_MPI_Comm_rank(comm, &my_id);
1743
1744 local_num_rows = (HYPRE_Int)(hypre_ParCSRMatrixLastRowIndex(par_matrix) -
1745 hypre_ParCSRMatrixFirstRowIndex(par_matrix) + 1);
1746
1747
1748 local_matrix = hypre_MergeDiagAndOffd(par_matrix); /* creates matrix */
1749 hypre_CSRMatrixBigJtoJ(local_matrix); /* copies big_j to j */
1750 local_matrix_i = hypre_CSRMatrixI(local_matrix);
1751 local_matrix_j = hypre_CSRMatrixJ(local_matrix);
1752 local_matrix_data = hypre_CSRMatrixData(local_matrix);
1753
1754
1755 /* determine procs that have vector data and store their ids in used_procs */
1756 /* we need to do an exchange data for this. If I own row then I will contact
1757 processor 0 with the endpoint of my local range */
1758
1759 if (local_num_rows > 0)
1760 {
1761 num_contacts = 1;
1762 contact_proc_list[0] = 0;
1763 contact_send_buf[0] = (HYPRE_Int)hypre_ParCSRMatrixLastRowIndex(par_matrix);
1764 contact_send_buf_starts[0] = 0;
1765 contact_send_buf_starts[1] = 1;
1766
1767 }
1768 else
1769 {
1770 num_contacts = 0;
1771 contact_send_buf_starts[0] = 0;
1772 contact_send_buf_starts[1] = 0;
1773 }
1774 /*build the response object*/
1775 /*send_proc_obj will be for saving info from contacts */
1776 send_proc_obj.length = 0;
1777 send_proc_obj.storage_length = 10;
1778 send_proc_obj.id = hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length, HYPRE_MEMORY_HOST);
1779 send_proc_obj.vec_starts =
1780 hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length + 1, HYPRE_MEMORY_HOST);
1781 send_proc_obj.vec_starts[0] = 0;
1782 send_proc_obj.element_storage_length = 10;
1783 send_proc_obj.elements =
1784 hypre_CTAlloc(HYPRE_BigInt, send_proc_obj.element_storage_length, HYPRE_MEMORY_HOST);
1785
1786 max_response_size = 0; /* each response is null */
1787 response_obj.fill_response = hypre_FillResponseParToCSRMatrix;
1788 response_obj.data1 = NULL;
1789 response_obj.data2 = &send_proc_obj; /*this is where we keep info from contacts*/
1790
1791
1792 hypre_DataExchangeList(num_contacts,
1793 contact_proc_list, contact_send_buf,
1794 contact_send_buf_starts, sizeof(HYPRE_Int),
1795 sizeof(HYPRE_Int), &response_obj,
1796 max_response_size, 1,
1797 comm, (void**) &response_recv_buf,
1798 &response_recv_buf_starts);
1799
1800 /* now processor 0 should have a list of ranges for processors that have rows -
1801 these are in send_proc_obj - it needs to create the new list of processors
1802 and also an array of vec starts - and send to those who own row*/
1803 if (my_id)
1804 {
1805 if (local_num_rows)
1806 {
1807 /* look for a message from processor 0 */
1808 hypre_MPI_Probe(0, tag1, comm, &status1);
1809 hypre_MPI_Get_count(&status1, HYPRE_MPI_INT, &count);
1810
1811 send_info = hypre_CTAlloc(HYPRE_Int, count, HYPRE_MEMORY_HOST);
1812 hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, comm, &status1);
1813
1814 /* now unpack */
1815 num_types = send_info[0];
1816 used_procs = hypre_CTAlloc(HYPRE_Int, num_types, HYPRE_MEMORY_HOST);
1817 new_vec_starts = hypre_CTAlloc(HYPRE_Int, num_types+1, HYPRE_MEMORY_HOST);
1818
1819 for (i=1; i<= num_types; i++)
1820 {
1821 used_procs[i-1] = send_info[i];
1822 }
1823 for (i=num_types+1; i< count; i++)
1824 {
1825 new_vec_starts[i-num_types-1] = send_info[i] ;
1826 }
1827 }
1828 else /* clean up and exit */
1829 {
1830 hypre_TFree(send_proc_obj.vec_starts, HYPRE_MEMORY_HOST);
1831 hypre_TFree(send_proc_obj.id, HYPRE_MEMORY_HOST);
1832 hypre_TFree(send_proc_obj.elements, HYPRE_MEMORY_HOST);
1833 if(response_recv_buf) hypre_TFree(response_recv_buf, HYPRE_MEMORY_HOST);
1834 if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts, HYPRE_MEMORY_HOST);
1835
1836
1837 if (hypre_CSRMatrixOwnsData(local_matrix))
1838 hypre_CSRMatrixDestroy(local_matrix);
1839 else
1840 hypre_TFree(local_matrix, HYPRE_MEMORY_HOST);
1841
1842 return NULL;
1843 }
1844 }
1845 else /* my_id ==0 */
1846 {
1847 num_types = send_proc_obj.length;
1848 used_procs = hypre_CTAlloc(HYPRE_Int, num_types, HYPRE_MEMORY_HOST);
1849 new_vec_starts = hypre_CTAlloc(HYPRE_Int, num_types+1, HYPRE_MEMORY_HOST);
1850
1851 new_vec_starts[0] = 0;
1852 for (i=0; i< num_types; i++)
1853 {
1854 used_procs[i] = send_proc_obj.id[i];
1855 new_vec_starts[i+1] = send_proc_obj.elements[i]+1;
1856 }
1857 hypre_qsort0(used_procs, 0, num_types-1);
1858 hypre_qsort0(new_vec_starts, 0, num_types);
1859 /*now we need to put into an array to send */
1860 count = 2*num_types+2;
1861 send_info = hypre_CTAlloc(HYPRE_Int, count, HYPRE_MEMORY_HOST);
1862 send_info[0] = num_types;
1863 for (i=1; i<= num_types; i++)
1864 {
1865 send_info[i] = (HYPRE_BigInt)used_procs[i-1];
1866 }
1867 for (i=num_types+1; i< count; i++)
1868 {
1869 send_info[i] = new_vec_starts[i-num_types-1];
1870 }
1871 requests = hypre_CTAlloc(hypre_MPI_Request, num_types, HYPRE_MEMORY_HOST);
1872 status = hypre_CTAlloc(hypre_MPI_Status, num_types, HYPRE_MEMORY_HOST);
1873
1874 /* don't send to myself - these are sorted so my id would be first*/
1875 start = 0;
1876 if (num_types && used_procs[0] == 0)
1877 {
1878 start = 1;
1879 }
1880
1881 for (i=start; i < num_types; i++)
1882 {
1883 hypre_MPI_Isend(send_info, count, HYPRE_MPI_INT, used_procs[i], tag1,
1884 comm, &requests[i-start]);
1885 }
1886 hypre_MPI_Waitall(num_types-start, requests, status);
1887
1888 hypre_TFree(status, HYPRE_MEMORY_HOST);
1889 hypre_TFree(requests, HYPRE_MEMORY_HOST);
1890 }
1891 /* clean up */
1892 hypre_TFree(send_proc_obj.vec_starts, HYPRE_MEMORY_HOST);
1893 hypre_TFree(send_proc_obj.id, HYPRE_MEMORY_HOST);
1894 hypre_TFree(send_proc_obj.elements, HYPRE_MEMORY_HOST);
1895 hypre_TFree(send_info, HYPRE_MEMORY_HOST);
1896 if(response_recv_buf) hypre_TFree(response_recv_buf, HYPRE_MEMORY_HOST);
1897 if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts, HYPRE_MEMORY_HOST);
1898
1899 /* now proc 0 can exit if it has no rows */
1900 if (!local_num_rows)
1901 {
1902 if (hypre_CSRMatrixOwnsData(local_matrix))
1903 hypre_CSRMatrixDestroy(local_matrix);
1904 else
1905 hypre_TFree(local_matrix, HYPRE_MEMORY_HOST);
1906
1907 hypre_TFree(new_vec_starts, HYPRE_MEMORY_HOST);
1908 hypre_TFree(used_procs, HYPRE_MEMORY_HOST);
1909
1910 return NULL;
1911 }
1912
1913 /* everyone left has rows and knows: new_vec_starts, num_types, and used_procs */
1914
1915 /* this matrix should be rather small */
1916 matrix_i = hypre_CTAlloc(HYPRE_Int, num_rows+1, HYPRE_MEMORY_HOST);
1917
1918 num_requests = 4*num_types;
1919 requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST);
1920 status = hypre_CTAlloc(hypre_MPI_Status, num_requests, HYPRE_MEMORY_HOST);
1921
1922 /* exchange contents of local_matrix_i - here we are sending to ourself also*/
1923
1924 j = 0;
1925 for (i = 0; i < num_types; i++)
1926 {
1927 proc_id = used_procs[i];
1928 vec_len = (HYPRE_Int)(new_vec_starts[i+1] - new_vec_starts[i]);
1929 hypre_MPI_Irecv(&matrix_i[new_vec_starts[i]+1], vec_len, HYPRE_MPI_INT,
1930 proc_id, tag2, comm, &requests[j++]);
1931 }
1932 for (i = 0; i < num_types; i++)
1933 {
1934 proc_id = used_procs[i];
1935 hypre_MPI_Isend(&local_matrix_i[1], local_num_rows, HYPRE_MPI_INT,
1936 proc_id, tag2, comm, &requests[j++]);
1937 }
1938
1939 hypre_MPI_Waitall(j, requests, status);
1940
1941 /* generate matrix_i from received data */
1942 /* global numbering?*/
1943 offset = matrix_i[new_vec_starts[1]];
1944 for (i=1; i < num_types; i++)
1945 {
1946 for (j = new_vec_starts[i]; j < new_vec_starts[i+1]; j++)
1947 matrix_i[j+1] += offset;
1948 offset = matrix_i[new_vec_starts[i+1]];
1949 }
1950
1951 num_nonzeros = matrix_i[num_rows];
1952
1953 matrix = hypre_CSRMatrixCreate(num_rows, num_cols, num_nonzeros);
1954
1955 hypre_CSRMatrixMemoryLocation(matrix) = HYPRE_MEMORY_HOST;
1956
1957 hypre_CSRMatrixI(matrix) = matrix_i;
1958 hypre_CSRMatrixInitialize(matrix);
1959 matrix_j = hypre_CSRMatrixJ(matrix);
1960 matrix_data = hypre_CSRMatrixData(matrix);
1961
1962 /* generate datatypes for further data exchange and exchange remaining
1963 data, i.e. column info and actual data */
1964
1965 j = 0;
1966 for (i = 0; i < num_types; i++)
1967 {
1968 proc_id = used_procs[i];
1969 start_index = matrix_i[(HYPRE_Int)new_vec_starts[i]];
1970 num_data = matrix_i[(HYPRE_Int)new_vec_starts[i+1]] - start_index;
1971 hypre_MPI_Irecv(&matrix_data[start_index], num_data, HYPRE_MPI_COMPLEX,
1972 used_procs[i], tag1, comm, &requests[j++]);
1973 hypre_MPI_Irecv(&matrix_j[start_index], num_data, HYPRE_MPI_INT,
1974 used_procs[i], tag3, comm, &requests[j++]);
1975 }
1976 local_num_nonzeros = local_matrix_i[local_num_rows];
1977 for (i=0; i < num_types; i++)
1978 {
1979 hypre_MPI_Isend(local_matrix_data, local_num_nonzeros, HYPRE_MPI_COMPLEX,
1980 used_procs[i], tag1, comm, &requests[j++]);
1981 hypre_MPI_Isend(local_matrix_j, local_num_nonzeros, HYPRE_MPI_INT,
1982 used_procs[i], tag3, comm, &requests[j++]);
1983 }
1984
1985
1986 hypre_MPI_Waitall(num_requests, requests, status);
1987
1988 hypre_TFree(new_vec_starts, HYPRE_MEMORY_HOST);
1989
1990 if (hypre_CSRMatrixOwnsData(local_matrix))
1991 hypre_CSRMatrixDestroy(local_matrix);
1992 else
1993 hypre_TFree(local_matrix, HYPRE_MEMORY_HOST);
1994
1995 if (num_requests)
1996 {
1997 hypre_TFree(requests, HYPRE_MEMORY_HOST);
1998 hypre_TFree(status, HYPRE_MEMORY_HOST);
1999 hypre_TFree(used_procs, HYPRE_MEMORY_HOST);
2000 }
2001
2002 return matrix;
2003 }
2004
2005 /*--------------------------------------------------------------------------
2006 * hypre_ParCSRMatrixCopy,
2007 * copies B to A,
2008 * if copy_data = 0, only the structure of A is copied to B
2009 * the routine does not check whether the dimensions of A and B are compatible
2010 *--------------------------------------------------------------------------*/
2011
2012 HYPRE_Int
hypre_ParCSRMatrixCopy(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B,HYPRE_Int copy_data)2013 hypre_ParCSRMatrixCopy( hypre_ParCSRMatrix *A,
2014 hypre_ParCSRMatrix *B,
2015 HYPRE_Int copy_data )
2016 {
2017 hypre_CSRMatrix *A_diag;
2018 hypre_CSRMatrix *A_offd;
2019 HYPRE_BigInt *col_map_offd_A;
2020 hypre_CSRMatrix *B_diag;
2021 hypre_CSRMatrix *B_offd;
2022 HYPRE_BigInt *col_map_offd_B;
2023 HYPRE_Int num_cols_offd_A;
2024 HYPRE_Int num_cols_offd_B;
2025
2026 if (!A)
2027 {
2028 hypre_error_in_arg(1);
2029 return hypre_error_flag;
2030 }
2031 if (!B)
2032 {
2033 hypre_error_in_arg(1);
2034 return hypre_error_flag;
2035 }
2036
2037 A_diag = hypre_ParCSRMatrixDiag(A);
2038 A_offd = hypre_ParCSRMatrixOffd(A);
2039 B_diag = hypre_ParCSRMatrixDiag(B);
2040 B_offd = hypre_ParCSRMatrixOffd(B);
2041
2042 num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
2043 num_cols_offd_B = hypre_CSRMatrixNumCols(B_offd);
2044
2045 hypre_assert(num_cols_offd_A == num_cols_offd_B);
2046
2047 col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
2048 col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B);
2049
2050 hypre_CSRMatrixCopy(A_diag, B_diag, copy_data);
2051 hypre_CSRMatrixCopy(A_offd, B_offd, copy_data);
2052
2053 /* should not happen if B has been initialized */
2054 if (num_cols_offd_B && col_map_offd_B == NULL)
2055 {
2056 col_map_offd_B = hypre_TAlloc(HYPRE_BigInt, num_cols_offd_B, HYPRE_MEMORY_HOST);
2057 hypre_ParCSRMatrixColMapOffd(B) = col_map_offd_B;
2058 }
2059
2060 hypre_TMemcpy(col_map_offd_B, col_map_offd_A, HYPRE_BigInt, num_cols_offd_B,
2061 HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
2062
2063 return hypre_error_flag;
2064 }
2065
2066 /*--------------------------------------------------------------------
2067 * hypre_FillResponseParToCSRMatrix
2068 * Fill response function for determining the send processors
2069 * data exchange
2070 *--------------------------------------------------------------------*/
2071
2072 HYPRE_Int
hypre_FillResponseParToCSRMatrix(void * p_recv_contact_buf,HYPRE_Int contact_size,HYPRE_Int contact_proc,void * ro,MPI_Comm comm,void ** p_send_response_buf,HYPRE_Int * response_message_size)2073 hypre_FillResponseParToCSRMatrix( void *p_recv_contact_buf,
2074 HYPRE_Int contact_size,
2075 HYPRE_Int contact_proc,
2076 void *ro,
2077 MPI_Comm comm,
2078 void **p_send_response_buf,
2079 HYPRE_Int *response_message_size )
2080 {
2081 HYPRE_Int myid;
2082 HYPRE_Int i, index, count, elength;
2083
2084 HYPRE_BigInt *recv_contact_buf = (HYPRE_BigInt * ) p_recv_contact_buf;
2085
2086 hypre_DataExchangeResponse *response_obj = (hypre_DataExchangeResponse*)ro;
2087
2088 hypre_ProcListElements *send_proc_obj = (hypre_ProcListElements*)response_obj->data2;
2089
2090 hypre_MPI_Comm_rank(comm, &myid );
2091
2092 /*check to see if we need to allocate more space in send_proc_obj for ids*/
2093 if (send_proc_obj->length == send_proc_obj->storage_length)
2094 {
2095 send_proc_obj->storage_length +=10; /*add space for 10 more processors*/
2096 send_proc_obj->id = hypre_TReAlloc(send_proc_obj->id, HYPRE_Int,
2097 send_proc_obj->storage_length, HYPRE_MEMORY_HOST);
2098 send_proc_obj->vec_starts =
2099 hypre_TReAlloc(send_proc_obj->vec_starts, HYPRE_Int,
2100 send_proc_obj->storage_length + 1, HYPRE_MEMORY_HOST);
2101 }
2102
2103 /*initialize*/
2104 count = send_proc_obj->length;
2105 index = send_proc_obj->vec_starts[count]; /*this is the number of elements*/
2106
2107 /*send proc*/
2108 send_proc_obj->id[count] = contact_proc;
2109
2110 /*do we need more storage for the elements?*/
2111 if (send_proc_obj->element_storage_length < index + contact_size)
2112 {
2113 elength = hypre_max(contact_size, 10);
2114 elength += index;
2115 send_proc_obj->elements = hypre_TReAlloc(send_proc_obj->elements,
2116 HYPRE_BigInt, elength, HYPRE_MEMORY_HOST);
2117 send_proc_obj->element_storage_length = elength;
2118 }
2119 /*populate send_proc_obj*/
2120 for (i=0; i< contact_size; i++)
2121 {
2122 send_proc_obj->elements[index++] = recv_contact_buf[i];
2123 }
2124 send_proc_obj->vec_starts[count+1] = index;
2125 send_proc_obj->length++;
2126
2127 /*output - no message to return (confirmation) */
2128 *response_message_size = 0;
2129
2130 return hypre_error_flag;
2131 }
2132
2133 /*--------------------------------------------------------------------------
2134 * hypre_ParCSRMatrixUnion
2135 * Creates and returns a new matrix whose elements are the union of A and B.
2136 * Data is not copied, only structural information is created.
2137 * A and B must have the same communicator, numbers and distributions of rows
2138 * and columns (they can differ in which row-column pairs are nonzero, thus
2139 * in which columns are in a offd block)
2140 *--------------------------------------------------------------------------*/
2141
hypre_ParCSRMatrixUnion(hypre_ParCSRMatrix * A,hypre_ParCSRMatrix * B)2142 hypre_ParCSRMatrix * hypre_ParCSRMatrixUnion( hypre_ParCSRMatrix * A,
2143 hypre_ParCSRMatrix * B )
2144 {
2145 hypre_ParCSRMatrix *C;
2146 HYPRE_BigInt *col_map_offd_C = NULL;
2147 HYPRE_Int my_id, p;
2148 MPI_Comm comm = hypre_ParCSRMatrixComm( A );
2149
2150 hypre_MPI_Comm_rank(comm, &my_id);
2151
2152 C = hypre_CTAlloc( hypre_ParCSRMatrix, 1 , HYPRE_MEMORY_HOST);
2153 hypre_ParCSRMatrixComm( C ) = hypre_ParCSRMatrixComm( A );
2154 hypre_ParCSRMatrixGlobalNumRows( C ) = hypre_ParCSRMatrixGlobalNumRows( A );
2155 hypre_ParCSRMatrixGlobalNumCols( C ) = hypre_ParCSRMatrixGlobalNumCols( A );
2156 hypre_ParCSRMatrixFirstRowIndex( C ) = hypre_ParCSRMatrixFirstRowIndex( A );
2157 hypre_assert( hypre_ParCSRMatrixFirstRowIndex( B )
2158 == hypre_ParCSRMatrixFirstRowIndex( A ) );
2159 hypre_TMemcpy(hypre_ParCSRMatrixRowStarts(C), hypre_ParCSRMatrixRowStarts(A),
2160 HYPRE_BigInt, 2, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
2161 hypre_TMemcpy(hypre_ParCSRMatrixColStarts(C), hypre_ParCSRMatrixColStarts(A),
2162 HYPRE_BigInt, 2, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
2163 for (p = 0; p < 2; ++p)
2164 hypre_assert( hypre_ParCSRMatrixColStarts(A)[p]
2165 == hypre_ParCSRMatrixColStarts(B)[p] );
2166 hypre_ParCSRMatrixFirstColDiag( C ) = hypre_ParCSRMatrixFirstColDiag( A );
2167 hypre_ParCSRMatrixLastRowIndex( C ) = hypre_ParCSRMatrixLastRowIndex( A );
2168 hypre_ParCSRMatrixLastColDiag( C ) = hypre_ParCSRMatrixLastColDiag( A );
2169
2170 hypre_ParCSRMatrixDiag( C ) =
2171 hypre_CSRMatrixUnion( hypre_ParCSRMatrixDiag(A), hypre_ParCSRMatrixDiag(B),
2172 0, 0, 0 );
2173 hypre_ParCSRMatrixOffd( C ) =
2174 hypre_CSRMatrixUnion( hypre_ParCSRMatrixOffd(A), hypre_ParCSRMatrixOffd(B),
2175 hypre_ParCSRMatrixColMapOffd(A),
2176 hypre_ParCSRMatrixColMapOffd(B), &col_map_offd_C );
2177 hypre_ParCSRMatrixColMapOffd( C ) = col_map_offd_C;
2178 hypre_ParCSRMatrixCommPkg( C ) = NULL;
2179 hypre_ParCSRMatrixCommPkgT( C ) = NULL;
2180 hypre_ParCSRMatrixOwnsData( C ) = 1;
2181 /* SetNumNonzeros, SetDNumNonzeros are global, need hypre_MPI_Allreduce.
2182 I suspect, but don't know, that other parts of hypre do not assume that
2183 the correct values have been set.
2184 hypre_ParCSRMatrixSetNumNonzeros( C );
2185 hypre_ParCSRMatrixSetDNumNonzeros( C );*/
2186 hypre_ParCSRMatrixNumNonzeros( C ) = 0;
2187 hypre_ParCSRMatrixDNumNonzeros( C ) = 0.0;
2188 hypre_ParCSRMatrixRowindices( C ) = NULL;
2189 hypre_ParCSRMatrixRowvalues( C ) = NULL;
2190 hypre_ParCSRMatrixGetrowactive( C ) = 0;
2191
2192 return C;
2193 }
2194
2195 /* Perform dual truncation of ParCSR matrix.
2196 * This code is adapted from original BoomerAMGInterpTruncate()
2197 * A: parCSR matrix to be modified
2198 * tol: relative tolerance or truncation factor for dropping small terms
2199 * max_row_elmts: maximum number of (largest) nonzero elements to keep.
2200 * rescale: Boolean on whether or not to scale resulting matrix. Scaling for
2201 * each row satisfies: sum(nonzero values before dropping)/ sum(nonzero values after dropping),
2202 * this way, the application of the truncated matrix on a constant vector is the same as that of
2203 * the original matrix.
2204 * nrm_type: type of norm used for dropping with tol.
2205 * -- 0 = infinity-norm
2206 * -- 1 = 1-norm
2207 * -- 2 = 2-norm
2208 */
2209 HYPRE_Int
hypre_ParCSRMatrixTruncate(hypre_ParCSRMatrix * A,HYPRE_Real tol,HYPRE_Int max_row_elmts,HYPRE_Int rescale,HYPRE_Int nrm_type)2210 hypre_ParCSRMatrixTruncate(hypre_ParCSRMatrix *A,
2211 HYPRE_Real tol,
2212 HYPRE_Int max_row_elmts,
2213 HYPRE_Int rescale,
2214 HYPRE_Int nrm_type)
2215 {
2216 #ifdef HYPRE_PROFILE
2217 hypre_profile_times[HYPRE_TIMER_ID_INTERP_TRUNC] -= hypre_MPI_Wtime();
2218 #endif
2219
2220 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2221 HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
2222 HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
2223 HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
2224 HYPRE_Int *A_diag_j_new;
2225 HYPRE_Real *A_diag_data_new;
2226
2227 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
2228 HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
2229 HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
2230 HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
2231 HYPRE_Int *A_offd_j_new;
2232 HYPRE_Real *A_offd_data_new;
2233
2234 HYPRE_Int n_fine = hypre_CSRMatrixNumRows(A_diag);
2235 HYPRE_Int num_cols = hypre_CSRMatrixNumCols(A_diag);
2236 HYPRE_Int i, j, start_j;
2237 HYPRE_Int ierr = 0;
2238 HYPRE_Int next_open;
2239 HYPRE_Int now_checking;
2240 HYPRE_Int num_lost;
2241 HYPRE_Int num_lost_global=0;
2242 HYPRE_Int next_open_offd;
2243 HYPRE_Int now_checking_offd;
2244 HYPRE_Int num_lost_offd;
2245 HYPRE_Int num_lost_global_offd;
2246 HYPRE_Int A_diag_size;
2247 HYPRE_Int A_offd_size;
2248 HYPRE_Int num_elmts;
2249 HYPRE_Int cnt, cnt_diag, cnt_offd;
2250 HYPRE_Real row_nrm;
2251 HYPRE_Real drop_coeff;
2252 HYPRE_Real row_sum;
2253 HYPRE_Real scale;
2254
2255 HYPRE_MemoryLocation memory_location_diag = hypre_CSRMatrixMemoryLocation(A_diag);
2256 HYPRE_MemoryLocation memory_location_offd = hypre_CSRMatrixMemoryLocation(A_offd);
2257
2258 /* Threading variables. Entry i of num_lost_(offd_)per_thread holds the
2259 * number of dropped entries over thread i's row range. Cum_lost_per_thread
2260 * will temporarily store the cumulative number of dropped entries up to
2261 * each thread. */
2262 HYPRE_Int my_thread_num, num_threads, start, stop;
2263 HYPRE_Int * max_num_threads = hypre_CTAlloc(HYPRE_Int, 1, HYPRE_MEMORY_HOST);
2264 HYPRE_Int * cum_lost_per_thread;
2265 HYPRE_Int * num_lost_per_thread;
2266 HYPRE_Int * num_lost_offd_per_thread;
2267
2268 /* Initialize threading variables */
2269 max_num_threads[0] = hypre_NumThreads();
2270 cum_lost_per_thread = hypre_CTAlloc(HYPRE_Int, max_num_threads[0], HYPRE_MEMORY_HOST);
2271 num_lost_per_thread = hypre_CTAlloc(HYPRE_Int, max_num_threads[0], HYPRE_MEMORY_HOST);
2272 num_lost_offd_per_thread = hypre_CTAlloc(HYPRE_Int, max_num_threads[0], HYPRE_MEMORY_HOST);
2273 for (i = 0; i < max_num_threads[0]; i++)
2274 {
2275 num_lost_per_thread[i] = 0;
2276 num_lost_offd_per_thread[i] = 0;
2277 }
2278
2279 #ifdef HYPRE_USING_OPENMP
2280 #pragma omp parallel private(i,my_thread_num,num_threads,row_nrm, drop_coeff,j,start_j,row_sum,scale,num_lost,now_checking,next_open,num_lost_offd,now_checking_offd,next_open_offd,start,stop,cnt_diag,cnt_offd,num_elmts,cnt)
2281 #endif
2282 {
2283 my_thread_num = hypre_GetThreadNum();
2284 num_threads = hypre_NumActiveThreads();
2285
2286 /* Compute each thread's range of rows to truncate and compress. Note,
2287 * that i, j and data are all compressed as entries are dropped, but
2288 * that the compression only occurs locally over each thread's row
2289 * range. A_diag_i is only made globally consistent at the end of this
2290 * routine. During the dropping phases, A_diag_i[stop] will point to
2291 * the start of the next thread's row range. */
2292
2293 /* my row range */
2294 start = (n_fine / num_threads) * my_thread_num;
2295 if (my_thread_num == num_threads-1)
2296 {
2297 stop = n_fine;
2298 }
2299 else
2300 {
2301 stop = (n_fine / num_threads) * (my_thread_num + 1);
2302 }
2303
2304 /*
2305 * Truncate based on truncation tolerance
2306 */
2307 if (tol > 0)
2308 {
2309 num_lost = 0;
2310 num_lost_offd = 0;
2311
2312 next_open = A_diag_i[start];
2313 now_checking = A_diag_i[start];
2314 next_open_offd = A_offd_i[start];;
2315 now_checking_offd = A_offd_i[start];;
2316
2317 for (i = start; i < stop; i++)
2318 {
2319 row_nrm = 0;
2320 /* compute norm for dropping small terms */
2321 if (nrm_type == 0)
2322 {
2323 /* infty-norm */
2324 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2325 {
2326 row_nrm = (row_nrm < fabs(A_diag_data[j])) ?
2327 fabs(A_diag_data[j]) : row_nrm;
2328 }
2329 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
2330 {
2331 row_nrm = (row_nrm < fabs(A_offd_data[j])) ?
2332 fabs(A_offd_data[j]) : row_nrm;
2333 }
2334 }
2335 if (nrm_type == 1)
2336 {
2337 /* 1-norm */
2338 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2339 {
2340 row_nrm += fabs(A_diag_data[j]);
2341 }
2342 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
2343 {
2344 row_nrm += fabs(A_offd_data[j]);
2345 }
2346 }
2347 if (nrm_type == 2)
2348 {
2349 /* 2-norm */
2350 for (j = A_diag_i[i]; j < A_diag_i[i+1]; j++)
2351 {
2352 HYPRE_Complex v = A_diag_data[j];
2353 row_nrm += v*v;
2354 }
2355 for (j = A_offd_i[i]; j < A_offd_i[i+1]; j++)
2356 {
2357 HYPRE_Complex v = A_offd_data[j];
2358 row_nrm += v*v;
2359 }
2360 row_nrm = sqrt(row_nrm);
2361 }
2362 drop_coeff = tol * row_nrm;
2363
2364 start_j = A_diag_i[i];
2365 if (num_lost)
2366 {
2367 A_diag_i[i] -= num_lost;
2368 }
2369 row_sum = 0;
2370 scale = 0;
2371 for (j = start_j; j < A_diag_i[i+1]; j++)
2372 {
2373 row_sum += A_diag_data[now_checking];
2374 if (fabs(A_diag_data[now_checking]) < drop_coeff)
2375 {
2376 num_lost++;
2377 now_checking++;
2378 }
2379 else
2380 {
2381 scale += A_diag_data[now_checking];
2382 A_diag_data[next_open] = A_diag_data[now_checking];
2383 A_diag_j[next_open] = A_diag_j[now_checking];
2384 now_checking++;
2385 next_open++;
2386 }
2387 }
2388
2389 start_j = A_offd_i[i];
2390 if (num_lost_offd)
2391 {
2392 A_offd_i[i] -= num_lost_offd;
2393 }
2394
2395 for (j = start_j; j < A_offd_i[i+1]; j++)
2396 {
2397 row_sum += A_offd_data[now_checking_offd];
2398 if (fabs(A_offd_data[now_checking_offd]) < drop_coeff)
2399 {
2400 num_lost_offd++;
2401 now_checking_offd++;
2402 }
2403 else
2404 {
2405 scale += A_offd_data[now_checking_offd];
2406 A_offd_data[next_open_offd] = A_offd_data[now_checking_offd];
2407 A_offd_j[next_open_offd] = A_offd_j[now_checking_offd];
2408 now_checking_offd++;
2409 next_open_offd++;
2410 }
2411 }
2412
2413 /* scale row of A */
2414 if (rescale && scale != 0.)
2415 {
2416 if (scale != row_sum)
2417 {
2418 scale = row_sum/scale;
2419 for (j = A_diag_i[i]; j < (A_diag_i[i+1]-num_lost); j++)
2420 {
2421 A_diag_data[j] *= scale;
2422 }
2423 for (j = A_offd_i[i]; j < (A_offd_i[i+1]-num_lost_offd); j++)
2424 {
2425 A_offd_data[j] *= scale;
2426 }
2427 }
2428 }
2429 } /* end loop for (i = 0; i < n_fine; i++) */
2430
2431 /* store number of dropped elements and number of threads */
2432 if (my_thread_num == 0)
2433 {
2434 max_num_threads[0] = num_threads;
2435 }
2436 num_lost_per_thread[my_thread_num] = num_lost;
2437 num_lost_offd_per_thread[my_thread_num] = num_lost_offd;
2438
2439 } /* end if (trunc_factor > 0) */
2440
2441 /*
2442 * Truncate based on capping the nnz per row
2443 *
2444 */
2445 if (max_row_elmts > 0)
2446 {
2447 HYPRE_Int A_mxnum, cnt1, last_index, last_index_offd;
2448 HYPRE_Int *A_aux_j;
2449 HYPRE_Real *A_aux_data;
2450
2451 /* find maximum row length locally over this row range */
2452 A_mxnum = 0;
2453 for (i=start; i<stop; i++)
2454 {
2455 /* Note A_diag_i[stop] is the starting point for the next thread
2456 * in j and data, not the stop point for this thread */
2457 last_index = A_diag_i[i+1];
2458 last_index_offd = A_offd_i[i+1];
2459 if (i == stop-1)
2460 {
2461 last_index -= num_lost_per_thread[my_thread_num];
2462 last_index_offd -= num_lost_offd_per_thread[my_thread_num];
2463 }
2464 cnt1 = last_index-A_diag_i[i] + last_index_offd-A_offd_i[i];
2465 if (cnt1 > A_mxnum)
2466 {
2467 A_mxnum = cnt1;
2468 }
2469 }
2470
2471 /* Some rows exceed max_row_elmts, and require truncation. Essentially,
2472 * each thread truncates and compresses its range of rows locally. */
2473 if (A_mxnum > max_row_elmts)
2474 {
2475 num_lost = 0;
2476 num_lost_offd = 0;
2477
2478 /* two temporary arrays to hold row i for temporary operations */
2479 A_aux_j = hypre_CTAlloc(HYPRE_Int, A_mxnum, HYPRE_MEMORY_HOST);
2480 A_aux_data = hypre_CTAlloc(HYPRE_Real, A_mxnum, HYPRE_MEMORY_HOST);
2481 cnt_diag = A_diag_i[start];
2482 cnt_offd = A_offd_i[start];
2483
2484 for (i = start; i < stop; i++)
2485 {
2486 /* Note A_diag_i[stop] is the starting point for the next thread
2487 * in j and data, not the stop point for this thread */
2488 last_index = A_diag_i[i+1];
2489 last_index_offd = A_offd_i[i+1];
2490 if (i == stop-1)
2491 {
2492 last_index -= num_lost_per_thread[my_thread_num];
2493 last_index_offd -= num_lost_offd_per_thread[my_thread_num];
2494 }
2495
2496 row_sum = 0;
2497 num_elmts = last_index-A_diag_i[i] + last_index_offd-A_offd_i[i];
2498 if (max_row_elmts < num_elmts)
2499 {
2500 /* copy both diagonal and off-diag parts of row i to _aux_ arrays */
2501 cnt = 0;
2502 for (j = A_diag_i[i]; j < last_index; j++)
2503 {
2504 A_aux_j[cnt] = A_diag_j[j];
2505 A_aux_data[cnt++] = A_diag_data[j];
2506 row_sum += A_diag_data[j];
2507 }
2508 num_lost += cnt;
2509 cnt1 = cnt;
2510 for (j = A_offd_i[i]; j < last_index_offd; j++)
2511 {
2512 A_aux_j[cnt] = A_offd_j[j]+num_cols;
2513 A_aux_data[cnt++] = A_offd_data[j];
2514 row_sum += A_offd_data[j];
2515 }
2516 num_lost_offd += cnt-cnt1;
2517
2518 /* sort data */
2519 hypre_qsort2_abs(A_aux_j,A_aux_data,0,cnt-1);
2520 scale = 0;
2521 if (i > start)
2522 {
2523 A_diag_i[i] = cnt_diag;
2524 A_offd_i[i] = cnt_offd;
2525 }
2526 for (j = 0; j < max_row_elmts; j++)
2527 {
2528 scale += A_aux_data[j];
2529 if (A_aux_j[j] < num_cols)
2530 {
2531 A_diag_j[cnt_diag] = A_aux_j[j];
2532 A_diag_data[cnt_diag++] = A_aux_data[j];
2533 }
2534 else
2535 {
2536 A_offd_j[cnt_offd] = A_aux_j[j]-num_cols;
2537 A_offd_data[cnt_offd++] = A_aux_data[j];
2538 }
2539 }
2540 num_lost -= cnt_diag-A_diag_i[i];
2541 num_lost_offd -= cnt_offd-A_offd_i[i];
2542
2543 /* scale row of A */
2544 if (rescale && (scale != 0.))
2545 {
2546 if (scale != row_sum)
2547 {
2548 scale = row_sum/scale;
2549 for (j = A_diag_i[i]; j < cnt_diag; j++)
2550 {
2551 A_diag_data[j] *= scale;
2552 }
2553 for (j = A_offd_i[i]; j < cnt_offd; j++)
2554 {
2555 A_offd_data[j] *= scale;
2556 }
2557 }
2558 }
2559 } /* end if (max_row_elmts < num_elmts) */
2560 else
2561 {
2562 /* nothing dropped from this row, but still have to shift entries back
2563 * by the number dropped so far */
2564 if (A_diag_i[i] != cnt_diag)
2565 {
2566 start_j = A_diag_i[i];
2567 A_diag_i[i] = cnt_diag;
2568 for (j = start_j; j < last_index; j++)
2569 {
2570 A_diag_j[cnt_diag] = A_diag_j[j];
2571 A_diag_data[cnt_diag++] = A_diag_data[j];
2572 }
2573 }
2574 else
2575 {
2576 cnt_diag += last_index-A_diag_i[i];
2577 }
2578
2579 if (A_offd_i[i] != cnt_offd)
2580 {
2581 start_j = A_offd_i[i];
2582 A_offd_i[i] = cnt_offd;
2583 for (j = start_j; j < last_index_offd; j++)
2584 {
2585 A_offd_j[cnt_offd] = A_offd_j[j];
2586 A_offd_data[cnt_offd++] = A_offd_data[j];
2587 }
2588 }
2589 else
2590 {
2591 cnt_offd += last_index_offd-A_offd_i[i];
2592 }
2593 }
2594 } /* end for (i = 0; i < n_fine; i++) */
2595
2596 num_lost_per_thread[my_thread_num] += num_lost;
2597 num_lost_offd_per_thread[my_thread_num] += num_lost_offd;
2598 hypre_TFree(A_aux_j, HYPRE_MEMORY_HOST);
2599 hypre_TFree(A_aux_data, HYPRE_MEMORY_HOST);
2600
2601 } /* end if (A_mxnum > max_row_elmts) */
2602 } /* end if (max_row_elmts > 0) */
2603
2604
2605 /* Sum up num_lost_global */
2606 #ifdef HYPRE_USING_OPENMP
2607 #pragma omp barrier
2608 #endif
2609 if (my_thread_num == 0)
2610 {
2611 num_lost_global = 0;
2612 num_lost_global_offd = 0;
2613 for (i = 0; i < max_num_threads[0]; i++)
2614 {
2615 num_lost_global += num_lost_per_thread[i];
2616 num_lost_global_offd += num_lost_offd_per_thread[i];
2617 }
2618 }
2619 #ifdef HYPRE_USING_OPENMP
2620 #pragma omp barrier
2621 #endif
2622
2623 /*
2624 * Synchronize and create new diag data structures
2625 */
2626 if (num_lost_global)
2627 {
2628 /* Each thread has it's own locally compressed CSR matrix from rows start
2629 * to stop. Now, we have to copy each thread's chunk into the new
2630 * process-wide CSR data structures
2631 *
2632 * First, we compute the new process-wide number of nonzeros (i.e.,
2633 * A_diag_size), and compute cum_lost_per_thread[k] so that this
2634 * entry holds the cumulative sum of entries dropped up to and
2635 * including thread k. */
2636 if (my_thread_num == 0)
2637 {
2638 A_diag_size = A_diag_i[n_fine];
2639
2640 for (i = 0; i < max_num_threads[0]; i++)
2641 {
2642 A_diag_size -= num_lost_per_thread[i];
2643 if (i > 0)
2644 {
2645 cum_lost_per_thread[i] = num_lost_per_thread[i] + cum_lost_per_thread[i-1];
2646 }
2647 else
2648 {
2649 cum_lost_per_thread[i] = num_lost_per_thread[i];
2650 }
2651 }
2652
2653 A_diag_j_new = hypre_CTAlloc(HYPRE_Int, A_diag_size, memory_location_diag);
2654 A_diag_data_new = hypre_CTAlloc(HYPRE_Real, A_diag_size, memory_location_diag);
2655 }
2656 #ifdef HYPRE_USING_OPENMP
2657 #pragma omp barrier
2658 #endif
2659
2660 /* points to next open spot in new data structures for this thread */
2661 if (my_thread_num == 0)
2662 {
2663 next_open = 0;
2664 }
2665 else
2666 {
2667 /* remember, cum_lost_per_thread[k] stores the num dropped up to and
2668 * including thread k */
2669 next_open = A_diag_i[start] - cum_lost_per_thread[my_thread_num-1];
2670 }
2671
2672 /* copy the j and data arrays over */
2673 for (i = A_diag_i[start]; i < A_diag_i[stop] - num_lost_per_thread[my_thread_num]; i++)
2674 {
2675 A_diag_j_new[next_open] = A_diag_j[i];
2676 A_diag_data_new[next_open] = A_diag_data[i];
2677 next_open += 1;
2678 }
2679
2680 #ifdef HYPRE_USING_OPENMP
2681 #pragma omp barrier
2682 #endif
2683 /* update A_diag_i with number of dropped entries by all lower ranked
2684 * threads */
2685 if (my_thread_num > 0)
2686 {
2687 for (i=start; i<stop; i++)
2688 {
2689 A_diag_i[i] -= cum_lost_per_thread[my_thread_num-1];
2690 }
2691 }
2692
2693 if (my_thread_num == 0)
2694 {
2695 /* Set last entry */
2696 A_diag_i[n_fine] = A_diag_size ;
2697
2698 hypre_TFree(A_diag_j, memory_location_diag);
2699 hypre_TFree(A_diag_data, memory_location_diag);
2700 hypre_CSRMatrixJ(A_diag) = A_diag_j_new;
2701 hypre_CSRMatrixData(A_diag) = A_diag_data_new;
2702 hypre_CSRMatrixNumNonzeros(A_diag) = A_diag_size;
2703 }
2704 }
2705
2706 /*
2707 * Synchronize and create new offd data structures
2708 */
2709 #ifdef HYPRE_USING_OPENMP
2710 #pragma omp barrier
2711 #endif
2712 if (num_lost_global_offd)
2713 {
2714 /* Repeat process for off-diagonal */
2715 if (my_thread_num == 0)
2716 {
2717 A_offd_size = A_offd_i[n_fine];
2718 for (i = 0; i < max_num_threads[0]; i++)
2719 {
2720 A_offd_size -= num_lost_offd_per_thread[i];
2721 if (i > 0)
2722 {
2723 cum_lost_per_thread[i] = num_lost_offd_per_thread[i] + cum_lost_per_thread[i-1];
2724 }
2725 else
2726 {
2727 cum_lost_per_thread[i] = num_lost_offd_per_thread[i];
2728 }
2729 }
2730
2731 A_offd_j_new = hypre_CTAlloc(HYPRE_Int, A_offd_size, memory_location_offd);
2732 A_offd_data_new = hypre_CTAlloc(HYPRE_Real, A_offd_size, memory_location_offd);
2733 }
2734 #ifdef HYPRE_USING_OPENMP
2735 #pragma omp barrier
2736 #endif
2737
2738 /* points to next open spot in new data structures for this thread */
2739 if (my_thread_num == 0)
2740 {
2741 next_open = 0;
2742 }
2743 else
2744 {
2745 /* remember, cum_lost_per_thread[k] stores the num dropped up to and
2746 * including thread k */
2747 next_open = A_offd_i[start] - cum_lost_per_thread[my_thread_num-1];
2748 }
2749
2750 /* copy the j and data arrays over */
2751 for (i = A_offd_i[start]; i < A_offd_i[stop] - num_lost_offd_per_thread[my_thread_num]; i++)
2752 {
2753 A_offd_j_new[next_open] = A_offd_j[i];
2754 A_offd_data_new[next_open] = A_offd_data[i];
2755 next_open += 1;
2756 }
2757
2758 #ifdef HYPRE_USING_OPENMP
2759 #pragma omp barrier
2760 #endif
2761 /* update A_offd_i with number of dropped entries by all lower ranked
2762 * threads */
2763 if (my_thread_num > 0)
2764 {
2765 for (i=start; i<stop; i++)
2766 {
2767 A_offd_i[i] -= cum_lost_per_thread[my_thread_num-1];
2768 }
2769 }
2770
2771 if (my_thread_num == 0)
2772 {
2773 /* Set last entry */
2774 A_offd_i[n_fine] = A_offd_size ;
2775
2776 hypre_TFree(A_offd_j, memory_location_offd);
2777 hypre_TFree(A_offd_data, memory_location_offd);
2778 hypre_CSRMatrixJ(A_offd) = A_offd_j_new;
2779 hypre_CSRMatrixData(A_offd) = A_offd_data_new;
2780 hypre_CSRMatrixNumNonzeros(A_offd) = A_offd_size;
2781 }
2782 }
2783
2784 } /* end parallel region */
2785
2786 hypre_TFree(max_num_threads, HYPRE_MEMORY_HOST);
2787 hypre_TFree(cum_lost_per_thread, HYPRE_MEMORY_HOST);
2788 hypre_TFree(num_lost_per_thread, HYPRE_MEMORY_HOST);
2789 hypre_TFree(num_lost_offd_per_thread, HYPRE_MEMORY_HOST);
2790
2791 #ifdef HYPRE_PROFILE
2792 hypre_profile_times[HYPRE_TIMER_ID_INTERP_TRUNC] += hypre_MPI_Wtime();
2793 #endif
2794
2795 return ierr;
2796 }
2797
2798 HYPRE_Int
hypre_ParCSRMatrixSetConstantValues(hypre_ParCSRMatrix * A,HYPRE_Complex value)2799 hypre_ParCSRMatrixSetConstantValues( hypre_ParCSRMatrix *A,
2800 HYPRE_Complex value )
2801 {
2802 hypre_CSRMatrixSetConstantValues(hypre_ParCSRMatrixDiag(A), value);
2803 hypre_CSRMatrixSetConstantValues(hypre_ParCSRMatrixOffd(A), value);
2804
2805 return hypre_error_flag;
2806 }
2807
2808 void
hypre_ParCSRMatrixCopyColMapOffdToDevice(hypre_ParCSRMatrix * A)2809 hypre_ParCSRMatrixCopyColMapOffdToDevice(hypre_ParCSRMatrix *A)
2810 {
2811 #if defined(HYPRE_USING_GPU)
2812 if (hypre_ParCSRMatrixDeviceColMapOffd(A) == NULL)
2813 {
2814 const HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
2815 hypre_ParCSRMatrixDeviceColMapOffd(A) = hypre_TAlloc(HYPRE_BigInt, num_cols_A_offd, HYPRE_MEMORY_DEVICE);
2816 hypre_TMemcpy(hypre_ParCSRMatrixDeviceColMapOffd(A), hypre_ParCSRMatrixColMapOffd(A), HYPRE_BigInt, num_cols_A_offd,
2817 HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST);
2818 }
2819 #endif
2820 }
2821