1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 /******************************************************************************
9 *
10 * Member functions for hypre_CSRBlockMatrix class.
11 *
12 *****************************************************************************/
13
14 #include "_hypre_parcsr_block_mv.h"
15
16 /*--------------------------------------------------------------------------
17 * hypre_CSRBlockMatrixCreate
18 *--------------------------------------------------------------------------*/
19
20 hypre_CSRBlockMatrix *
hypre_CSRBlockMatrixCreate(HYPRE_Int block_size,HYPRE_Int num_rows,HYPRE_Int num_cols,HYPRE_Int num_nonzeros)21 hypre_CSRBlockMatrixCreate(HYPRE_Int block_size,
22 HYPRE_Int num_rows,
23 HYPRE_Int num_cols,
24 HYPRE_Int num_nonzeros)
25 {
26 hypre_CSRBlockMatrix *matrix;
27
28 matrix = hypre_CTAlloc(hypre_CSRBlockMatrix, 1, HYPRE_MEMORY_HOST);
29
30 hypre_CSRBlockMatrixData(matrix) = NULL;
31 hypre_CSRBlockMatrixI(matrix) = NULL;
32 hypre_CSRBlockMatrixJ(matrix) = NULL;
33 hypre_CSRBlockMatrixBigJ(matrix) = NULL;
34 hypre_CSRBlockMatrixBlockSize(matrix) = block_size;
35 hypre_CSRBlockMatrixNumRows(matrix) = num_rows;
36 hypre_CSRBlockMatrixNumCols(matrix) = num_cols;
37 hypre_CSRBlockMatrixNumNonzeros(matrix) = num_nonzeros;
38
39 /* set defaults */
40 hypre_CSRBlockMatrixOwnsData(matrix) = 1;
41
42 return matrix;
43 }
44
45 /*--------------------------------------------------------------------------
46 * hypre_CSRBlockMatrixDestroy
47 *--------------------------------------------------------------------------*/
48
49 HYPRE_Int
hypre_CSRBlockMatrixDestroy(hypre_CSRBlockMatrix * matrix)50 hypre_CSRBlockMatrixDestroy(hypre_CSRBlockMatrix *matrix)
51 {
52 HYPRE_Int ierr=0;
53
54 if (matrix)
55 {
56 hypre_TFree(hypre_CSRBlockMatrixI(matrix), HYPRE_MEMORY_HOST);
57 if ( hypre_CSRBlockMatrixOwnsData(matrix) )
58 {
59 hypre_TFree(hypre_CSRBlockMatrixData(matrix), HYPRE_MEMORY_HOST);
60 hypre_TFree(hypre_CSRBlockMatrixJ(matrix), HYPRE_MEMORY_HOST);
61 hypre_TFree(hypre_CSRBlockMatrixBigJ(matrix), HYPRE_MEMORY_HOST);
62 }
63 hypre_TFree(matrix, HYPRE_MEMORY_HOST);
64 }
65
66 return ierr;
67 }
68
69 /*--------------------------------------------------------------------------
70 * hypre_CSRBlockMatrixInitialize
71 *--------------------------------------------------------------------------*/
72
73 HYPRE_Int
hypre_CSRBlockMatrixInitialize(hypre_CSRBlockMatrix * matrix)74 hypre_CSRBlockMatrixInitialize(hypre_CSRBlockMatrix *matrix)
75 {
76 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(matrix);
77 HYPRE_Int num_rows = hypre_CSRBlockMatrixNumRows(matrix);
78 HYPRE_Int num_nonzeros = hypre_CSRBlockMatrixNumNonzeros(matrix);
79 HYPRE_Int ierr=0, nnz;
80
81 if ( ! hypre_CSRBlockMatrixI(matrix) )
82 hypre_TFree(hypre_CSRBlockMatrixI(matrix), HYPRE_MEMORY_HOST);
83 if ( ! hypre_CSRBlockMatrixJ(matrix) )
84 hypre_TFree(hypre_CSRBlockMatrixJ(matrix), HYPRE_MEMORY_HOST);
85 if ( ! hypre_CSRBlockMatrixBigJ(matrix) )
86 hypre_TFree(hypre_CSRBlockMatrixBigJ(matrix), HYPRE_MEMORY_HOST);
87 if ( ! hypre_CSRBlockMatrixData(matrix) )
88 hypre_TFree(hypre_CSRBlockMatrixData(matrix), HYPRE_MEMORY_HOST);
89
90 nnz = num_nonzeros * block_size * block_size;
91 hypre_CSRBlockMatrixI(matrix) = hypre_CTAlloc(HYPRE_Int, num_rows + 1, HYPRE_MEMORY_HOST);
92 if (nnz) hypre_CSRBlockMatrixData(matrix) = hypre_CTAlloc(HYPRE_Complex, nnz, HYPRE_MEMORY_HOST);
93 else hypre_CSRBlockMatrixData(matrix) = NULL;
94 if (nnz) hypre_CSRBlockMatrixJ(matrix) = hypre_CTAlloc(HYPRE_Int, num_nonzeros, HYPRE_MEMORY_HOST);
95 else hypre_CSRBlockMatrixJ(matrix) = NULL;
96
97 return ierr;
98 }
99
100 /*--------------------------------------------------------------------------
101 * hypre_CSRBlockMatrixBigInitialize
102 *--------------------------------------------------------------------------*/
103
104 HYPRE_Int
hypre_CSRBlockMatrixBigInitialize(hypre_CSRBlockMatrix * matrix)105 hypre_CSRBlockMatrixBigInitialize(hypre_CSRBlockMatrix *matrix)
106 {
107 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(matrix);
108 HYPRE_Int num_rows = hypre_CSRBlockMatrixNumRows(matrix);
109 HYPRE_Int num_nonzeros = hypre_CSRBlockMatrixNumNonzeros(matrix);
110 HYPRE_Int ierr=0, nnz;
111
112 if ( ! hypre_CSRBlockMatrixI(matrix) )
113 hypre_TFree(hypre_CSRBlockMatrixI(matrix), HYPRE_MEMORY_HOST);
114 if ( ! hypre_CSRBlockMatrixJ(matrix) )
115 hypre_TFree(hypre_CSRBlockMatrixJ(matrix), HYPRE_MEMORY_HOST);
116 if ( ! hypre_CSRBlockMatrixBigJ(matrix) )
117 hypre_TFree(hypre_CSRBlockMatrixBigJ(matrix), HYPRE_MEMORY_HOST);
118 if ( ! hypre_CSRBlockMatrixData(matrix) )
119 hypre_TFree(hypre_CSRBlockMatrixData(matrix), HYPRE_MEMORY_HOST);
120
121 nnz = num_nonzeros * block_size * block_size;
122 hypre_CSRBlockMatrixI(matrix) = hypre_CTAlloc(HYPRE_Int, num_rows + 1, HYPRE_MEMORY_HOST);
123 if (nnz) hypre_CSRBlockMatrixData(matrix) = hypre_CTAlloc(HYPRE_Complex, nnz, HYPRE_MEMORY_HOST);
124 else hypre_CSRBlockMatrixData(matrix) = NULL;
125 if (nnz) hypre_CSRBlockMatrixBigJ(matrix) = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_HOST);
126 else hypre_CSRBlockMatrixJ(matrix) = NULL;
127
128 return ierr;
129 }
130
131 /*--------------------------------------------------------------------------
132 * hypre_CSRBlockMatrixSetDataOwner
133 *--------------------------------------------------------------------------*/
134
135 HYPRE_Int
hypre_CSRBlockMatrixSetDataOwner(hypre_CSRBlockMatrix * matrix,HYPRE_Int owns_data)136 hypre_CSRBlockMatrixSetDataOwner(hypre_CSRBlockMatrix *matrix, HYPRE_Int owns_data)
137 {
138 HYPRE_Int ierr=0;
139
140 hypre_CSRBlockMatrixOwnsData(matrix) = owns_data;
141
142 return ierr;
143 }
144
145 /*--------------------------------------------------------------------------
146 * hypre_CSRBlockMatrixCompress
147 *--------------------------------------------------------------------------*/
148
149 hypre_CSRMatrix *
hypre_CSRBlockMatrixCompress(hypre_CSRBlockMatrix * matrix)150 hypre_CSRBlockMatrixCompress(hypre_CSRBlockMatrix *matrix)
151 {
152 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(matrix);
153 HYPRE_Int num_rows = hypre_CSRBlockMatrixNumRows(matrix);
154 HYPRE_Int num_cols = hypre_CSRBlockMatrixNumCols(matrix);
155 HYPRE_Int num_nonzeros = hypre_CSRBlockMatrixNumNonzeros(matrix);
156 HYPRE_Int *matrix_i = hypre_CSRBlockMatrixI(matrix);
157 HYPRE_Int *matrix_j = hypre_CSRBlockMatrixJ(matrix);
158 HYPRE_Complex *matrix_data = hypre_CSRBlockMatrixData(matrix);
159 hypre_CSRMatrix* matrix_C;
160 HYPRE_Int *matrix_C_i, *matrix_C_j, i, j, bnnz;
161 HYPRE_Complex *matrix_C_data, ddata;
162
163 matrix_C = hypre_CSRMatrixCreate(num_rows,num_cols,num_nonzeros);
164 hypre_CSRMatrixInitialize(matrix_C);
165 matrix_C_i = hypre_CSRMatrixI(matrix_C);
166 matrix_C_j = hypre_CSRMatrixJ(matrix_C);
167 matrix_C_data = hypre_CSRMatrixData(matrix_C);
168
169 bnnz = block_size * block_size;
170 for(i = 0; i < num_rows + 1; i++) matrix_C_i[i] = matrix_i[i];
171 for(i = 0; i < num_nonzeros; i++)
172 {
173 matrix_C_j[i] = matrix_j[i];
174 ddata = 0.0;
175 for(j = 0; j < bnnz; j++)
176 ddata += matrix_data[i*bnnz+j] * matrix_data[i*bnnz+j];
177 matrix_C_data[i] = sqrt(ddata);
178 }
179 return matrix_C;
180 }
181
182 /*--------------------------------------------------------------------------
183 * hypre_CSRBlockMatrixConvertToCSRMatrix
184 *--------------------------------------------------------------------------*/
185
186 hypre_CSRMatrix *
hypre_CSRBlockMatrixConvertToCSRMatrix(hypre_CSRBlockMatrix * matrix)187 hypre_CSRBlockMatrixConvertToCSRMatrix( hypre_CSRBlockMatrix *matrix )
188 {
189 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(matrix);
190 HYPRE_Int num_rows = hypre_CSRBlockMatrixNumRows(matrix);
191 HYPRE_Int num_cols = hypre_CSRBlockMatrixNumCols(matrix);
192 HYPRE_Int num_nonzeros = hypre_CSRBlockMatrixNumNonzeros(matrix);
193 HYPRE_Int *matrix_i = hypre_CSRBlockMatrixI(matrix);
194 HYPRE_Int *matrix_j = hypre_CSRBlockMatrixJ(matrix);
195 HYPRE_Complex* matrix_data = hypre_CSRBlockMatrixData(matrix);
196
197 hypre_CSRMatrix* matrix_C;
198 HYPRE_Int i, j, k, ii, C_ii, bnnz, new_nrows, new_ncols, new_num_nonzeros;
199 HYPRE_Int *matrix_C_i, *matrix_C_j;
200 HYPRE_Complex *matrix_C_data;
201
202 bnnz = block_size * block_size;
203 new_nrows = num_rows * block_size;
204 new_ncols = num_cols * block_size;
205 new_num_nonzeros = block_size * block_size * num_nonzeros;
206 matrix_C = hypre_CSRMatrixCreate(new_nrows,new_ncols,new_num_nonzeros);
207 hypre_CSRMatrixInitialize(matrix_C);
208 matrix_C_i = hypre_CSRMatrixI(matrix_C);
209 matrix_C_j = hypre_CSRMatrixJ(matrix_C);
210 matrix_C_data = hypre_CSRMatrixData(matrix_C);
211 for(i = 0; i < num_rows; i++)
212 {
213 for(j = 0; j < block_size; j++)
214 matrix_C_i[i*block_size + j] = matrix_i[i]*bnnz +
215 j * (matrix_i[i + 1] - matrix_i[i])*block_size;
216 }
217 matrix_C_i[new_nrows] = matrix_i[num_rows] * bnnz;
218
219 C_ii = 0;
220 for(i = 0; i < num_rows; i++)
221 {
222 for(j = 0; j < block_size; j++)
223 {
224 for(ii = matrix_i[i]; ii < matrix_i[i + 1]; ii++)
225 {
226 k = j;
227 matrix_C_j[C_ii] = matrix_j[ii]*block_size + k;
228 matrix_C_data[C_ii] = matrix_data[ii*bnnz+j*block_size+k];
229 C_ii++;
230 for(k = 0; k < block_size; k++)
231 {
232 if(j != k)
233 {
234 matrix_C_j[C_ii] = matrix_j[ii]*block_size + k;
235 matrix_C_data[C_ii] = matrix_data[ii*bnnz+j*block_size+k];
236 C_ii++;
237 }
238 }
239 }
240 }
241 }
242 return matrix_C;
243 }
244
245 /*--------------------------------------------------------------------------
246 * hypre_CSRBlockMatrixConvertFromCSRMatrix
247
248 * this doesn't properly convert the parcsr off_diag matrices - AB 12/7/05
249 (because here we assume the matrix is square - we don't check what the
250 number of columns should be ) - it can only be used for the diag part
251 *--------------------------------------------------------------------------*/
252
253 hypre_CSRBlockMatrix *
hypre_CSRBlockMatrixConvertFromCSRMatrix(hypre_CSRMatrix * matrix,HYPRE_Int matrix_C_block_size)254 hypre_CSRBlockMatrixConvertFromCSRMatrix(hypre_CSRMatrix *matrix,
255 HYPRE_Int matrix_C_block_size )
256 {
257 HYPRE_Int num_rows = hypre_CSRMatrixNumRows(matrix);
258 HYPRE_Int num_cols = hypre_CSRMatrixNumCols(matrix);
259 HYPRE_Int *matrix_i = hypre_CSRMatrixI(matrix);
260 HYPRE_Int *matrix_j = hypre_CSRMatrixJ(matrix);
261 HYPRE_Complex* matrix_data = hypre_CSRMatrixData(matrix);
262
263 hypre_CSRBlockMatrix* matrix_C;
264 HYPRE_Int *matrix_C_i, *matrix_C_j;
265 HYPRE_Complex *matrix_C_data;
266 HYPRE_Int matrix_C_num_rows, matrix_C_num_cols, matrix_C_num_nonzeros;
267 HYPRE_Int i, j, ii, jj, s_jj, index, *counter;
268
269 matrix_C_num_rows = num_rows/matrix_C_block_size;
270 matrix_C_num_cols = num_cols/matrix_C_block_size;
271
272 counter = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols, HYPRE_MEMORY_HOST);
273 for(i = 0; i < matrix_C_num_cols; i++) counter[i] = -1;
274 matrix_C_num_nonzeros = 0;
275 for(i = 0; i < matrix_C_num_rows; i++)
276 {
277 for(j = 0; j < matrix_C_block_size; j++)
278 {
279 for(ii = matrix_i[i*matrix_C_block_size+j];
280 ii < matrix_i[i*matrix_C_block_size+j+1]; ii++)
281 {
282 if(counter[matrix_j[ii]/matrix_C_block_size] < i)
283 {
284 counter[matrix_j[ii]/matrix_C_block_size] = i;
285 matrix_C_num_nonzeros++;
286 }
287 }
288 }
289 }
290 matrix_C = hypre_CSRBlockMatrixCreate(matrix_C_block_size, matrix_C_num_rows,
291 matrix_C_num_cols, matrix_C_num_nonzeros);
292 hypre_CSRBlockMatrixInitialize(matrix_C);
293 matrix_C_i = hypre_CSRBlockMatrixI(matrix_C);
294 matrix_C_j = hypre_CSRBlockMatrixJ(matrix_C);
295 matrix_C_data = hypre_CSRBlockMatrixData(matrix_C);
296
297 for(i = 0; i < matrix_C_num_cols; i++) counter[i] = -1;
298 jj = s_jj = 0;
299 for (i = 0; i < matrix_C_num_rows; i++)
300 {
301 matrix_C_i[i] = jj;
302 for(j = 0; j < matrix_C_block_size; j++)
303 {
304 for(ii = matrix_i[i*matrix_C_block_size+j];
305 ii < matrix_i[i*matrix_C_block_size+j+1]; ii++)
306 {
307 if(counter[matrix_j[ii]/matrix_C_block_size] < s_jj)
308 {
309 counter[matrix_j[ii]/matrix_C_block_size] = jj;
310 matrix_C_j[jj] = matrix_j[ii]/matrix_C_block_size;
311 jj++;
312 }
313 index = counter[matrix_j[ii]/matrix_C_block_size] * matrix_C_block_size *
314 matrix_C_block_size + j * matrix_C_block_size +
315 matrix_j[ii]%matrix_C_block_size;
316 matrix_C_data[index] = matrix_data[ii];
317 }
318 }
319 s_jj = jj;
320 }
321 matrix_C_i[matrix_C_num_rows] = matrix_C_num_nonzeros;
322
323 hypre_TFree(counter, HYPRE_MEMORY_HOST);
324
325
326 return matrix_C;
327 }
328
329 /*--------------------------------------------------------------------------
330 * hypre_CSRBlockMatrixBlockAdd
331 * (o = i1 + i2)
332 *--------------------------------------------------------------------------*/
333 HYPRE_Int
hypre_CSRBlockMatrixBlockAdd(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)334 hypre_CSRBlockMatrixBlockAdd(HYPRE_Complex* i1,
335 HYPRE_Complex* i2,
336 HYPRE_Complex* o,
337 HYPRE_Int block_size)
338 {
339 HYPRE_Int i;
340 HYPRE_Int sz = block_size*block_size;
341
342 for (i = 0; i < sz; i++)
343 o[i] = i1[i] + i2[i];
344
345 return 0;
346 }
347
348
349 /*--------------------------------------------------------------------------
350 * hypre_CSRBlockMatrixBlockAddAccumulate
351 * (o = i1 + o)
352 *--------------------------------------------------------------------------*/
353 HYPRE_Int
hypre_CSRBlockMatrixBlockAddAccumulate(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Int block_size)354 hypre_CSRBlockMatrixBlockAddAccumulate(HYPRE_Complex* i1,
355 HYPRE_Complex* o,
356 HYPRE_Int block_size)
357 {
358 HYPRE_Int i;
359 HYPRE_Int sz = block_size*block_size;
360
361 for (i = 0; i < sz; i++)
362 o[i] += i1[i];
363
364 return 0;
365 }
366
367 /*--------------------------------------------------------------------------
368 * hypre_CSRBlockMatrixBlockAddAccumulateDiag
369 * (diag(o) = diag(i1) + diag(o))
370 *--------------------------------------------------------------------------*/
371 HYPRE_Int
hypre_CSRBlockMatrixBlockAddAccumulateDiag(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Int block_size)372 hypre_CSRBlockMatrixBlockAddAccumulateDiag(HYPRE_Complex* i1,
373 HYPRE_Complex* o,
374 HYPRE_Int block_size)
375 {
376 HYPRE_Int i;
377
378 for (i = 0; i < block_size; i++)
379 o[i*block_size+i] += i1[i*block_size+i];
380 return 0;
381 }
382
383 /*--------------------------------------------------------------------------
384 * hypre_CSRBlockMatrixBlockAddAccumulateDiagCheckSign
385 * only add elements of sign*i1 that are negative (sign is size block_size)
386 * (diag(o) = diag(i1) + diag(o))
387 *--------------------------------------------------------------------------*/
388 HYPRE_Int
hypre_CSRBlockMatrixBlockAddAccumulateDiagCheckSign(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Int block_size,HYPRE_Real * sign)389 hypre_CSRBlockMatrixBlockAddAccumulateDiagCheckSign(HYPRE_Complex* i1,
390 HYPRE_Complex* o,
391 HYPRE_Int block_size,
392 HYPRE_Real *sign)
393 {
394 HYPRE_Int i;
395 HYPRE_Real tmp;
396
397 for (i = 0; i < block_size; i++)
398 {
399 tmp = (HYPRE_Real) i1[i*block_size+i]*sign[i];
400 if (tmp < 0)
401 o[i*block_size+i] += i1[i*block_size+i];
402 }
403
404 return 0;
405 }
406
407 /*--------------------------------------------------------------------------
408 * hypre_CSRBlockMatrixComputeSign
409
410 * o = sign(diag(i1))
411 *--------------------------------------------------------------------------*/
412
hypre_CSRBlockMatrixComputeSign(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Int block_size)413 HYPRE_Int hypre_CSRBlockMatrixComputeSign(HYPRE_Complex *i1,
414 HYPRE_Complex *o,
415 HYPRE_Int block_size)
416 {
417 HYPRE_Int i;
418
419 for (i = 0; i < block_size; i++)
420 {
421 if ((HYPRE_Real) i1[i*block_size+i] < 0)
422 o[i] = -1;
423 else
424 o[i] = 1;
425 }
426
427 return 0;
428 }
429
430
431 /*--------------------------------------------------------------------------
432 * hypre_CSRBlockMatrixBlockSetScalar
433 * (each entry in block o is set to beta )
434 *--------------------------------------------------------------------------*/
435 HYPRE_Int
hypre_CSRBlockMatrixBlockSetScalar(HYPRE_Complex * o,HYPRE_Complex beta,HYPRE_Int block_size)436 hypre_CSRBlockMatrixBlockSetScalar(HYPRE_Complex* o,
437 HYPRE_Complex beta,
438 HYPRE_Int block_size)
439 {
440 HYPRE_Int i;
441 HYPRE_Int sz = block_size*block_size;
442
443 for (i = 0; i < sz; i++)
444 o[i] = beta;
445
446 return 0;
447 }
448
449
450 /*--------------------------------------------------------------------------
451 * hypre_CSRBlockMatrixBlockCopyData
452 * (o = beta*i1 )
453 *--------------------------------------------------------------------------*/
454 HYPRE_Int
hypre_CSRBlockMatrixBlockCopyData(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Complex beta,HYPRE_Int block_size)455 hypre_CSRBlockMatrixBlockCopyData(HYPRE_Complex* i1,
456 HYPRE_Complex* o,
457 HYPRE_Complex beta,
458 HYPRE_Int block_size)
459 {
460 HYPRE_Int i;
461 HYPRE_Int sz = block_size*block_size;
462
463 for (i = 0; i < sz; i++)
464 o[i] = beta*i1[i];
465
466 return 0;
467 }
468
469 /*--------------------------------------------------------------------------
470 * hypre_CSRBlockMatrixBlockCopyDataDiag - zeros off-diag entries
471 * (o = beta*diag(i1))
472 *--------------------------------------------------------------------------*/
473 HYPRE_Int
hypre_CSRBlockMatrixBlockCopyDataDiag(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Complex beta,HYPRE_Int block_size)474 hypre_CSRBlockMatrixBlockCopyDataDiag(HYPRE_Complex* i1,
475 HYPRE_Complex* o,
476 HYPRE_Complex beta,
477 HYPRE_Int block_size)
478 {
479 HYPRE_Int i;
480
481 HYPRE_Int sz = block_size*block_size;
482
483 for (i = 0; i < sz; i++)
484 o[i] = 0.0;
485
486 for (i = 0; i < block_size; i++)
487 o[i*block_size+i] = beta*i1[i*block_size+i];
488
489 return 0;
490 }
491
492 /*--------------------------------------------------------------------------
493 * hypre_CSRBlockMatrixBlockTranspose
494 * (o = i1' )
495 *--------------------------------------------------------------------------*/
496 HYPRE_Int
hypre_CSRBlockMatrixBlockTranspose(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Int block_size)497 hypre_CSRBlockMatrixBlockTranspose(HYPRE_Complex* i1,
498 HYPRE_Complex* o,
499 HYPRE_Int block_size)
500 {
501 HYPRE_Int i, j;
502
503 for (i = 0; i < block_size; i++)
504 for (j = 0; j < block_size; j++)
505 o[i*block_size+j] = i1[j*block_size+i];
506 return 0;
507 }
508
509 /*--------------------------------------------------------------------------
510 * hypre_CSRBlockMatrixBlockNorm
511 * (out = norm(data) )
512 *
513 * (note: these are not all actually "norms")
514 *
515 *--------------------------------------------------------------------------*/
516 HYPRE_Int
hypre_CSRBlockMatrixBlockNorm(HYPRE_Int norm_type,HYPRE_Complex * data,HYPRE_Real * out,HYPRE_Int block_size)517 hypre_CSRBlockMatrixBlockNorm(HYPRE_Int norm_type, HYPRE_Complex* data, HYPRE_Real* out, HYPRE_Int block_size)
518 {
519 HYPRE_Int ierr = 0;
520 HYPRE_Int i,j;
521 HYPRE_Real sum = 0.0;
522 HYPRE_Real *totals;
523 HYPRE_Int sz = block_size*block_size;
524
525 switch (norm_type)
526 {
527 case 6: /* sum of all elements in the block */
528 {
529 for(i = 0; i < sz; i++)
530 {
531 sum += (HYPRE_Real)(data[i]);
532 }
533 break;
534 }
535 case 5: /* one norm - max col sum*/
536 {
537
538 totals = hypre_CTAlloc(HYPRE_Real, block_size, HYPRE_MEMORY_HOST);
539 for(i = 0; i < block_size; i++) /* row */
540 {
541 for(j = 0; j < block_size; j++) /* col */
542 {
543 totals[j] += hypre_cabs(data[i*block_size + j]);
544 }
545 }
546
547 sum = totals[0];
548 for(j = 1; j < block_size; j++) /* col */
549 {
550 if (totals[j] > sum) sum = totals[j];
551 }
552 hypre_TFree(totals, HYPRE_MEMORY_HOST);
553
554 break;
555
556 }
557 case 4: /* inf norm - max row sum */
558 {
559
560 totals = hypre_CTAlloc(HYPRE_Real, block_size, HYPRE_MEMORY_HOST);
561 for(i = 0; i < block_size; i++) /* row */
562 {
563 for(j = 0; j < block_size; j++) /* col */
564 {
565 totals[i] += hypre_cabs(data[i*block_size + j]);
566 }
567 }
568
569 sum = totals[0];
570 for(i = 1; i < block_size; i++) /* row */
571 {
572 if (totals[i] > sum) sum = totals[i];
573 }
574 hypre_TFree(totals, HYPRE_MEMORY_HOST);
575
576 break;
577 }
578
579 case 3: /* largest element of block (return value includes sign) */
580 {
581
582 sum = (HYPRE_Real)data[0];
583
584 for(i = 0; i < sz; i++)
585 {
586 if (hypre_cabs(data[i]) > hypre_cabs(sum)) sum = (HYPRE_Real)data[i];
587 }
588
589 break;
590 }
591 case 2: /* sum of abs values of all elements in the block */
592 {
593 for(i = 0; i < sz; i++)
594 {
595 sum += hypre_cabs(data[i]);
596 }
597 break;
598 }
599
600
601 default: /* 1 = frobenius*/
602 {
603 for(i = 0; i < sz; i++)
604 {
605 sum += ((HYPRE_Real)data[i])*((HYPRE_Real)data[i]);
606 }
607 sum = sqrt(sum);
608 }
609 }
610
611 *out = sum;
612
613 return ierr;
614 }
615
616 /*--------------------------------------------------------------------------
617 * hypre_CSRBlockMatrixBlockMultAdd
618 * (o = i1 * i2 + beta * o)
619 *--------------------------------------------------------------------------*/
620 HYPRE_Int
hypre_CSRBlockMatrixBlockMultAdd(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex beta,HYPRE_Complex * o,HYPRE_Int block_size)621 hypre_CSRBlockMatrixBlockMultAdd(HYPRE_Complex* i1,
622 HYPRE_Complex* i2,
623 HYPRE_Complex beta,
624 HYPRE_Complex* o,
625 HYPRE_Int block_size)
626 {
627
628 #if LB_VERSION
629 {
630 HYPRE_Complex alp = 1.0;
631 dgemm_("N","N", &block_size, &block_size, &block_size, &alp, i2, &block_size, i1,
632 &block_size, &beta, o, &block_size);
633 }
634 #else
635 {
636 HYPRE_Int i, j, k;
637 HYPRE_Complex ddata;
638
639 if (beta == 0.0)
640 {
641 for (i = 0; i < block_size; i++)
642 {
643 for (j = 0; j < block_size; j++)
644 {
645 ddata = 0.0;
646 for (k = 0; k < block_size; k++)
647 {
648 ddata += i1[i*block_size + k] * i2[k*block_size + j];
649 }
650 o[i*block_size + j] = ddata;
651 }
652 }
653 }
654 else if (beta == 1.0)
655 {
656 for(i = 0; i < block_size; i++)
657 {
658 for(j = 0; j < block_size; j++)
659 {
660 ddata = o[i*block_size + j];
661 for(k = 0; k < block_size; k++)
662 ddata += i1[i*block_size + k] * i2[k*block_size + j];
663 o[i*block_size + j] = ddata;
664 }
665 }
666 }
667 else
668 {
669 for(i = 0; i < block_size; i++)
670 {
671 for(j = 0; j < block_size; j++)
672 {
673 ddata = beta * o[i*block_size + j];
674 for(k = 0; k < block_size; k++)
675 ddata += i1[i*block_size + k] * i2[k*block_size + j];
676 o[i*block_size + j] = ddata;
677 }
678 }
679 }
680 }
681
682 #endif
683
684 return 0;
685 }
686
687
688 /*--------------------------------------------------------------------------
689 * hypre_CSRBlockMatrixBlockMultAddDiag
690 * (diag(o) = diag(i1) * diag(i2) + beta * diag(o))
691 *--------------------------------------------------------------------------*/
692 HYPRE_Int
hypre_CSRBlockMatrixBlockMultAddDiag(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex beta,HYPRE_Complex * o,HYPRE_Int block_size)693 hypre_CSRBlockMatrixBlockMultAddDiag(HYPRE_Complex* i1,
694 HYPRE_Complex* i2,
695 HYPRE_Complex beta,
696 HYPRE_Complex* o,
697 HYPRE_Int block_size)
698 {
699 HYPRE_Int i;
700
701 if (beta == 0.0)
702 {
703 for (i = 0; i < block_size; i++)
704 {
705 o[i*block_size + i] = i1[i*block_size + i] * i2[i*block_size + i];
706 }
707 }
708 else if (beta == 1.0)
709 {
710 for(i = 0; i < block_size; i++)
711 {
712 o[i*block_size + i] = o[i*block_size + i] + i1[i*block_size + i] * i2[i*block_size + i];
713 }
714 }
715 else
716 {
717 for(i = 0; i < block_size; i++)
718 {
719 o[i*block_size + i] = beta* o[i*block_size + i] + i1[i*block_size + i] * i2[i*block_size + i];
720 }
721 }
722 return 0;
723 }
724
725 /*--------------------------------------------------------------------------
726 * hypre_CSRBlockMatrixBlockMultAddDiagCheckSign
727 *
728 * only mult elements if sign*diag(i2) is negative
729 *(diag(o) = diag(i1) * diag(i2) + beta * diag(o))
730 *--------------------------------------------------------------------------*/
731 HYPRE_Int
hypre_CSRBlockMatrixBlockMultAddDiagCheckSign(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex beta,HYPRE_Complex * o,HYPRE_Int block_size,HYPRE_Real * sign)732 hypre_CSRBlockMatrixBlockMultAddDiagCheckSign(HYPRE_Complex *i1,
733 HYPRE_Complex *i2,
734 HYPRE_Complex beta,
735 HYPRE_Complex *o,
736 HYPRE_Int block_size,
737 HYPRE_Real *sign)
738 {
739 HYPRE_Int i;
740 HYPRE_Real tmp;
741
742 if (beta == 0.0)
743 {
744 for (i = 0; i < block_size; i++)
745 {
746 tmp = (HYPRE_Real) i2[i*block_size+i]*sign[i];
747 if (tmp < 0)
748 o[i*block_size + i] = i1[i*block_size + i] * i2[i*block_size + i];
749 }
750 }
751 else if (beta == 1.0)
752 {
753 for(i = 0; i < block_size; i++)
754 {
755 tmp = (HYPRE_Real) i2[i*block_size+i]*sign[i];
756 if (tmp < 0)
757 o[i*block_size + i] = o[i*block_size + i] + i1[i*block_size + i] * i2[i*block_size + i];
758 }
759 }
760 else
761 {
762 for(i = 0; i < block_size; i++)
763 {
764 tmp = i2[i*block_size+i]*sign[i];
765 if (tmp < 0)
766 o[i*block_size + i] = beta* o[i*block_size + i] + i1[i*block_size + i] * i2[i*block_size + i];
767 }
768 }
769 return 0;
770 }
771
772 /*--------------------------------------------------------------------------
773 * hypre_CSRBlockMatrixBlockMultAddDiag2 (scales cols of il by diag of i2)
774 * ((o) = (i1) * diag(i2) + beta * (o))
775 *--------------------------------------------------------------------------*/
776 HYPRE_Int
hypre_CSRBlockMatrixBlockMultAddDiag2(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex beta,HYPRE_Complex * o,HYPRE_Int block_size)777 hypre_CSRBlockMatrixBlockMultAddDiag2(HYPRE_Complex* i1,
778 HYPRE_Complex* i2,
779 HYPRE_Complex beta,
780 HYPRE_Complex* o,
781 HYPRE_Int block_size)
782 {
783 HYPRE_Int i, j;
784
785 if (beta == 0.0)
786 {
787 for (i = 0; i < block_size; i++)
788 {
789 for (j = 0; j < block_size; j++)
790 {
791 o[i*block_size + j] = i1[i*block_size + j] * i2[j*block_size + j];
792
793 }
794 }
795 }
796 else if (beta == 1.0)
797 {
798 for (i = 0; i < block_size; i++)
799 {
800 for (j = 0; j < block_size; j++)
801 {
802 o[i*block_size + j] = o[i*block_size + j] + i1[i*block_size + j] * i2[j*block_size + j];
803
804 }
805 }
806
807
808 }
809 else
810 {
811 for (i = 0; i < block_size; i++)
812 {
813 for (j = 0; j < block_size; j++)
814 {
815 o[i*block_size + j] = beta * o[i*block_size + j] + i1[i*block_size + j] * i2[j*block_size + j];
816
817 }
818 }
819 }
820 return 0;
821 }
822
823 /*--------------------------------------------------------------------------
824 * hypre_CSRBlockMatrixBlockMultAddDiag3 (scales cols of il by i2 -
825 whose diag elements are row sums)
826 * ((o) = (i1) * diag(i2) + beta * (o))
827 *--------------------------------------------------------------------------*/
828 HYPRE_Int
hypre_CSRBlockMatrixBlockMultAddDiag3(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex beta,HYPRE_Complex * o,HYPRE_Int block_size)829 hypre_CSRBlockMatrixBlockMultAddDiag3(HYPRE_Complex* i1,
830 HYPRE_Complex* i2,
831 HYPRE_Complex beta,
832 HYPRE_Complex* o,
833 HYPRE_Int block_size)
834 {
835 HYPRE_Int i, j;
836
837 HYPRE_Complex *row_sum;
838
839 row_sum = hypre_CTAlloc(HYPRE_Complex, block_size, HYPRE_MEMORY_HOST);
840 for (i = 0; i < block_size; i++)
841 {
842 for (j = 0; j < block_size; j++)
843 {
844 row_sum[i] += i2[i*block_size + j];
845 }
846 }
847
848 if (beta == 0.0)
849 {
850 for (i = 0; i < block_size; i++)
851 {
852 for (j = 0; j < block_size; j++)
853 {
854 o[i*block_size + j] = i1[i*block_size + j] * row_sum[j];
855
856 }
857 }
858 }
859 else if (beta == 1.0)
860 {
861 for (i = 0; i < block_size; i++)
862 {
863 for (j = 0; j < block_size; j++)
864 {
865 o[i*block_size + j] = o[i*block_size + j] + i1[i*block_size + j] * row_sum[j];
866
867 }
868 }
869 }
870 else
871 {
872 for (i = 0; i < block_size; i++)
873 {
874 for (j = 0; j < block_size; j++)
875 {
876 o[i*block_size + j] = beta * o[i*block_size + j] + i1[i*block_size + j] * row_sum[j];
877
878 }
879 }
880 }
881
882 hypre_TFree(row_sum, HYPRE_MEMORY_HOST);
883
884 return 0;
885 }
886 /*--------------------------------------------------------------------------
887 * hypre_CSRBlockMatrixBlockMatvec
888 * (ov = alpha* mat * v + beta * ov)
889 * mat is the matrix - size is block_size^2
890 * alpha and beta are scalars
891 *--------------------------------------------------------------------------*/
892
893 HYPRE_Int
hypre_CSRBlockMatrixBlockMatvec(HYPRE_Complex alpha,HYPRE_Complex * mat,HYPRE_Complex * v,HYPRE_Complex beta,HYPRE_Complex * ov,HYPRE_Int block_size)894 hypre_CSRBlockMatrixBlockMatvec(HYPRE_Complex alpha,
895 HYPRE_Complex* mat,
896 HYPRE_Complex* v,
897 HYPRE_Complex beta,
898 HYPRE_Complex* ov,
899 HYPRE_Int block_size)
900 {
901 HYPRE_Int ierr = 0;
902
903 #if LB_VERSION
904 {
905 HYPRE_Int one = 1;
906
907 dgemv_("T", &block_size, &block_size, &alpha, mat, &block_size, v,
908 &one, &beta, ov, &one);
909 }
910
911 #else
912 {
913 HYPRE_Int i, j;
914 HYPRE_Complex ddata;
915
916 /* if alpha = 0, then no matvec */
917 if (alpha == 0.0)
918 {
919 for (j = 0; j < block_size; j++)
920 {
921 ov[j] *= beta;
922 }
923 return ierr;
924 }
925
926 /* ov = (beta/alpha) * ov; */
927 ddata = beta / alpha;
928 if (ddata != 1.0)
929 {
930 if (ddata == 0.0)
931 {
932 for (j = 0; j < block_size; j++)
933 {
934 ov[j] = 0.0;
935 }
936 }
937 else
938 {
939 for (j = 0; j < block_size; j++)
940 {
941 ov[j] *= ddata;
942 }
943 }
944 }
945
946 /* ov = ov + mat*v */
947 for (i = 0; i < block_size; i++)
948 {
949 ddata = ov[i];
950 for (j = 0; j < block_size; j++)
951 {
952 ddata += mat[i*block_size + j] * v[j];
953 }
954 ov[i] = ddata;
955 }
956
957 /* ov = alpha*ov */
958 if (alpha != 1.0)
959 {
960 for (j = 0; j < block_size; j++)
961 {
962 ov[j] *= alpha;
963 }
964 }
965 }
966
967 #endif
968
969 return ierr;
970
971 }
972
973
974 /*--------------------------------------------------------------------------
975 * hypre_CSRBlockMatrixBlockInvMatvec
976 * (ov = mat^{-1} * v)
977 * o and v are vectors
978 * mat is the matrix - size is block_size^2
979 *--------------------------------------------------------------------------*/
980 HYPRE_Int
hypre_CSRBlockMatrixBlockInvMatvec(HYPRE_Complex * mat,HYPRE_Complex * v,HYPRE_Complex * ov,HYPRE_Int block_size)981 hypre_CSRBlockMatrixBlockInvMatvec(HYPRE_Complex* mat, HYPRE_Complex* v,
982 HYPRE_Complex* ov, HYPRE_Int block_size)
983 {
984 HYPRE_Int ierr = 0;
985 HYPRE_Complex *mat_i;
986
987 mat_i = hypre_CTAlloc(HYPRE_Complex, block_size*block_size, HYPRE_MEMORY_HOST);
988
989 #if LB_VERSION
990 {
991
992 HYPRE_Int one, info;
993 HYPRE_Int *piv;
994 HYPRE_Int sz;
995
996
997 one = 1;
998 piv = hypre_CTAlloc(HYPRE_Int, block_size, HYPRE_MEMORY_HOST);
999 sz = block_size*block_size;
1000
1001
1002 /* copy v to ov and mat to mat_i*/
1003
1004 dcopy_(&sz, mat, &one, mat_i, &one);
1005 dcopy_(&block_size, v, &one, ov, &one);
1006
1007 /* writes over mat_i with LU */
1008 dgetrf_(&block_size,&block_size, mat_i, &block_size, piv , &info);
1009 if (info)
1010 {
1011 hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1012 hypre_TFree(piv, HYPRE_MEMORY_HOST);
1013 return(-1);
1014 }
1015
1016 /* writes over ov */
1017 dgetrs_("T", &block_size, &one ,
1018 mat_i, &block_size, piv, ov, &block_size, &info);
1019 if (info)
1020 {
1021 hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1022 hypre_TFree(piv, HYPRE_MEMORY_HOST);
1023 return(-1);
1024 }
1025
1026 hypre_TFree(piv, HYPRE_MEMORY_HOST);
1027
1028 }
1029
1030 #else
1031 {
1032 HYPRE_Int m, j, k;
1033 HYPRE_Int piv_row;
1034 HYPRE_Real eps;
1035 HYPRE_Complex factor;
1036 HYPRE_Complex piv, tmp;
1037 eps = 1.0e-6;
1038
1039 if (block_size ==1 )
1040 {
1041 if (hypre_cabs(mat[0]) > 1e-10)
1042 {
1043 ov[0] = v[0]/mat[0];
1044 hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1045 return(ierr);
1046 }
1047 else
1048 {
1049 /* hypre_printf("GE zero pivot error\n"); */
1050 hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1051 return(-1);
1052 }
1053 }
1054 else
1055 {
1056 /* copy v to ov and mat to mat_i*/
1057 for (k = 0; k < block_size; k++)
1058 {
1059 ov[k] = v[k];
1060 for (j=0; j<block_size; j++)
1061 {
1062 mat_i[k*block_size + j] = mat[k*block_size + j];
1063 }
1064 }
1065 /* start ge - turning m_i into U factor (don't save L - just apply to
1066 rhs - which is ov)*/
1067 /* we do partial pivoting for size */
1068
1069 /* loop through the rows (row k) */
1070 for (k = 0; k < block_size-1; k++)
1071 {
1072 piv = mat_i[k*block_size+k];
1073 piv_row = k;
1074
1075 /* find the largest pivot in position k*/
1076 for (j=k+1; j < block_size; j++)
1077 {
1078 if (hypre_cabs(mat_i[j*block_size+k]) > hypre_cabs(piv))
1079 {
1080 piv = mat_i[j*block_size+k];
1081 piv_row = j;
1082 }
1083
1084 }
1085 if (piv_row !=k) /* do a row exchange - rows k and piv_row*/
1086 {
1087 for (j=0; j < block_size; j++)
1088 {
1089 tmp = mat_i[k*block_size + j];
1090 mat_i[k*block_size + j] = mat_i[piv_row*block_size + j];
1091 mat_i[piv_row*block_size + j] = tmp;
1092 }
1093 tmp = ov[k];
1094 ov[k] = ov[piv_row];
1095 ov[piv_row] = tmp;
1096 }
1097 /* end of pivoting */
1098
1099 if (hypre_cabs(piv) > eps)
1100 {
1101 /* now we can factor into U */
1102 for (j = k+1; j < block_size; j++)
1103 {
1104 factor = mat_i[j*block_size+k]/piv;
1105 for (m = k+1; m < block_size; m++)
1106 {
1107 mat_i[j*block_size+m] -= factor * mat_i[k*block_size+m];
1108 }
1109 /* Elimination step for rhs */
1110 ov[j] -= factor * ov[k];
1111 }
1112 }
1113 else
1114 {
1115 /* hypre_printf("Block of matrix is nearly singular: zero pivot error\n"); */
1116 hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1117 return(-1);
1118 }
1119 }
1120
1121 /* we also need to check the pivot in the last row to see if it is zero */
1122 k = block_size - 1; /* last row */
1123 if ( hypre_cabs(mat_i[k*block_size+k]) < eps)
1124 {
1125 /* hypre_printf("Block of matrix is nearly singular: zero pivot error\n"); */
1126 hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1127 return(-1);
1128 }
1129
1130 /* Back Substitution - do rhs (U is now in m_i1)*/
1131 for (k = block_size-1; k > 0; --k)
1132 {
1133 ov[k] /= mat_i[k*block_size+k];
1134 for (j = 0; j < k; j++)
1135 {
1136 if (mat_i[j*block_size+k] != 0.0)
1137 {
1138 ov[j] -= ov[k] * mat_i[j*block_size+k];
1139 }
1140 }
1141 }
1142 ov[0] /= mat_i[0];
1143
1144 }
1145
1146 }
1147 #endif
1148
1149
1150 hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1151
1152 return (ierr);
1153 }
1154
1155
1156
1157 /*--------------------------------------------------------------------------
1158 * hypre_CSRBlockMatrixBlockInvMult
1159 * (o = i1^{-1} * i2)
1160 *--------------------------------------------------------------------------*/
1161 HYPRE_Int
hypre_CSRBlockMatrixBlockInvMult(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)1162 hypre_CSRBlockMatrixBlockInvMult(HYPRE_Complex* i1, HYPRE_Complex* i2, HYPRE_Complex* o, HYPRE_Int block_size)
1163 {
1164
1165 HYPRE_Int ierr = 0;
1166 HYPRE_Int i, j;
1167 HYPRE_Complex *m_i1;
1168
1169 m_i1 = hypre_CTAlloc(HYPRE_Complex, block_size*block_size, HYPRE_MEMORY_HOST);
1170
1171 #if LB_VERSION
1172 {
1173
1174 HYPRE_Int one, info;
1175 HYPRE_Int *piv;
1176 HYPRE_Int sz;
1177
1178 HYPRE_Complex *i2_t;
1179
1180 one = 1;
1181 i2_t = hypre_CTAlloc(HYPRE_Complex, block_size*block_size, HYPRE_MEMORY_HOST);
1182 piv = hypre_CTAlloc(HYPRE_Int, block_size, HYPRE_MEMORY_HOST);
1183
1184
1185 /* copy i1 to m_i1*/
1186 sz = block_size*block_size;
1187 dcopy_(&sz, i1, &one, m_i1, &one);
1188
1189
1190 /* writes over m_i1 with LU */
1191 dgetrf_(&block_size, &block_size, m_i1, &block_size, piv , &info);
1192 if (info)
1193 {
1194 hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1195 hypre_TFree(i2_t, HYPRE_MEMORY_HOST);
1196 hypre_TFree(piv, HYPRE_MEMORY_HOST);
1197 return (-1);
1198 }
1199
1200 /* need the transpose of i_2*/
1201 for (i=0; i < block_size; i++)
1202 {
1203 for (j=0; j< block_size; j++)
1204 {
1205 i2_t[i*block_size+j] = i2[j*block_size+i];
1206 }
1207 }
1208
1209 /* writes over i2_t */
1210 dgetrs_("T", &block_size, &block_size,
1211 m_i1, &block_size, piv, i2_t, &block_size, &info);
1212 if (info)
1213 {
1214 hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1215 hypre_TFree(i2_t, HYPRE_MEMORY_HOST);
1216 hypre_TFree(piv, HYPRE_MEMORY_HOST);
1217 return (-1);
1218 }
1219
1220 /* ans. is the transpose of i2_t*/
1221 for (i=0; i < block_size; i++)
1222 {
1223 for (j=0; j< block_size; j++)
1224 {
1225 o[i*block_size+j] = i2_t[j*block_size+i];
1226 }
1227 }
1228
1229 hypre_TFree(i2_t, HYPRE_MEMORY_HOST);
1230 hypre_TFree(piv, HYPRE_MEMORY_HOST);
1231
1232 }
1233
1234 #else
1235 {
1236 HYPRE_Int m, k;
1237 HYPRE_Int piv_row;
1238 HYPRE_Real eps;
1239 HYPRE_Complex factor;
1240 HYPRE_Complex piv, tmp;
1241
1242 eps = 1.0e-6;
1243
1244 if (block_size ==1 )
1245 {
1246 if (hypre_cabs(m_i1[0]) > 1e-10)
1247 {
1248 o[0] = i2[0]/i1[0];
1249 hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1250 return(ierr);
1251 }
1252 else
1253 {
1254 /* hypre_printf("GE zero pivot error\n"); */
1255 hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1256 return(-1);
1257 }
1258 }
1259 else
1260 {
1261 /* copy i2 to o and i1 to m_i1*/
1262 for (k = 0; k < block_size*block_size; k++)
1263 {
1264 o[k] = i2[k];
1265 m_i1[k] = i1[k];
1266 }
1267
1268
1269 /* start ge - turning m_i1 into U factor (don't save L - just apply to
1270 rhs - which is o)*/
1271 /* we do partial pivoting for size */
1272
1273 /* loop through the rows (row k) */
1274 for (k = 0; k < block_size-1; k++)
1275 {
1276 piv = m_i1[k*block_size+k];
1277 piv_row = k;
1278
1279 /* find the largest pivot in position k*/
1280 for (j=k+1; j < block_size; j++)
1281 {
1282 if (hypre_cabs(m_i1[j*block_size+k]) > hypre_cabs(piv))
1283 {
1284 piv = m_i1[j*block_size+k];
1285 piv_row = j;
1286 }
1287
1288 }
1289 if (piv_row !=k) /* do a row exchange - rows k and piv_row*/
1290 {
1291 for (j=0; j < block_size; j++)
1292 {
1293 tmp = m_i1[k*block_size + j];
1294 m_i1[k*block_size + j] = m_i1[piv_row*block_size + j];
1295 m_i1[piv_row*block_size + j] = tmp;
1296
1297 tmp = o[k*block_size + j];
1298 o[k*block_size + j] = o[piv_row*block_size + j];
1299 o[piv_row*block_size + j] = tmp;
1300
1301 }
1302 }
1303 /* end of pivoting */
1304
1305
1306 if (hypre_cabs(piv) > eps)
1307 {
1308 /* now we can factor into U */
1309 for (j = k+1; j < block_size; j++)
1310 {
1311 factor = m_i1[j*block_size+k]/piv;
1312 for (m = k+1; m < block_size; m++)
1313 {
1314 m_i1[j*block_size+m] -= factor * m_i1[k*block_size+m];
1315 }
1316 /* Elimination step for rhs */
1317 /* do for each of the "rhs" */
1318 for (i=0; i < block_size; i++)
1319 {
1320 /* o(row, col) = o(row*block_size + col) */
1321 o[j*block_size+i] -= factor * o[k*block_size + i];
1322 }
1323 }
1324 }
1325 else
1326 {
1327 /* hypre_printf("Block of matrix is nearly singular: zero pivot error\n"); */
1328 hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1329 return(-1);
1330 }
1331 }
1332
1333
1334 /* we also need to check the pivot in the last row to see if it is zero */
1335 k = block_size - 1; /* last row */
1336 if ( hypre_cabs(m_i1[k*block_size+k]) < eps)
1337 {
1338 /* hypre_printf("Block of matrix is nearly singular: zero pivot error\n"); */
1339 hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1340 return(-1);
1341 }
1342
1343
1344 /* Back Substitution - do for each "rhs" (U is now in m_i1)*/
1345 for (i=0; i < block_size; i++)
1346 {
1347 for (k = block_size-1; k > 0; --k)
1348 {
1349 o[k*block_size + i] /= m_i1[k*block_size+k];
1350 for (j = 0; j < k; j++)
1351 {
1352 if (m_i1[j*block_size+k] != 0.0)
1353 {
1354 o[j*block_size + i] -= o[k*block_size + i] * m_i1[j*block_size+k];
1355 }
1356 }
1357 }
1358 o[0*block_size + i] /= m_i1[0];
1359 }
1360 }
1361 }
1362
1363 #endif
1364 hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1365
1366 return ierr;
1367 }
1368
1369
1370 /*--------------------------------------------------------------------------
1371 * hypre_CSRBlockMatrixBlockMultInv
1372 * (o = i2*il^(-1))
1373 *--------------------------------------------------------------------------*/
1374 HYPRE_Int
hypre_CSRBlockMatrixBlockMultInv(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)1375 hypre_CSRBlockMatrixBlockMultInv(HYPRE_Complex* i1, HYPRE_Complex* i2, HYPRE_Complex* o, HYPRE_Int block_size)
1376 {
1377
1378 HYPRE_Int ierr = 0;
1379
1380
1381 #if LB_VERSION
1382
1383 {
1384 /* same as solving A^T C^T = B^T */
1385 HYPRE_Complex *m_i1;
1386 HYPRE_Int info;
1387 HYPRE_Int *piv;
1388 HYPRE_Int sz, one;
1389
1390
1391 piv = hypre_CTAlloc(HYPRE_Int, block_size, HYPRE_MEMORY_HOST);
1392 m_i1 = hypre_CTAlloc(HYPRE_Complex, block_size*block_size, HYPRE_MEMORY_HOST);
1393 one = 1;
1394 sz = block_size*block_size;
1395
1396 /* copy i1 to m_i1 and i2 to o*/
1397
1398 dcopy_(&sz, i1, &one, m_i1, &one);
1399 dcopy_(&sz, i2, &one, o, &one);
1400
1401 /* writes over m_i1 with LU */
1402 dgetrf_(&block_size, &block_size, m_i1, &block_size, piv , &info);
1403 if (info)
1404 {
1405 hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1406 hypre_TFree(piv, HYPRE_MEMORY_HOST);
1407 return (-1);
1408 }
1409 /* writes over B */
1410 dgetrs_("N", &block_size, &block_size,
1411 m_i1, &block_size, piv, o, &block_size, &info);
1412 if (info)
1413 {
1414 hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1415 hypre_TFree(piv, HYPRE_MEMORY_HOST);
1416 return (-1);
1417 }
1418
1419 hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1420 hypre_TFree(piv, HYPRE_MEMORY_HOST);
1421 }
1422
1423 #else
1424 {
1425 HYPRE_Real eps;
1426 HYPRE_Complex *i1_t, *i2_t, *o_t;
1427
1428 eps = 1.0e-12;
1429
1430 if (block_size ==1 )
1431 {
1432 if (hypre_cabs(i1[0]) > eps)
1433 {
1434 o[0] = i2[0]/i1[0];
1435 return(ierr);
1436 }
1437 else
1438 {
1439 /* hypre_printf("GE zero pivot error\n"); */
1440 return(-1);
1441 }
1442 }
1443 else
1444 {
1445
1446 i1_t = hypre_CTAlloc(HYPRE_Complex, block_size*block_size, HYPRE_MEMORY_HOST);
1447 i2_t = hypre_CTAlloc(HYPRE_Complex, block_size*block_size, HYPRE_MEMORY_HOST);
1448 o_t = hypre_CTAlloc(HYPRE_Complex, block_size*block_size, HYPRE_MEMORY_HOST);
1449
1450 /* TO DO:: this could be done more efficiently! */
1451 hypre_CSRBlockMatrixBlockTranspose(i1, i1_t, block_size);
1452 hypre_CSRBlockMatrixBlockTranspose(i2, i2_t, block_size);
1453 ierr = hypre_CSRBlockMatrixBlockInvMult(i1_t, i2_t, o_t, block_size);
1454
1455 if (!ierr) hypre_CSRBlockMatrixBlockTranspose(o_t, o, block_size);
1456
1457 hypre_TFree(i1_t, HYPRE_MEMORY_HOST);
1458 hypre_TFree(i2_t, HYPRE_MEMORY_HOST);
1459 hypre_TFree(o_t, HYPRE_MEMORY_HOST);
1460
1461 }
1462 }
1463
1464 #endif
1465 return (ierr);
1466 }
1467
1468
1469
1470 /*--------------------------------------------------------------------------
1471 * hypre_CSRBlockMatrixBlockInvMultDiag - zeros off-d entires
1472 * (o = diag(i1)^{-1} * diag(i2))
1473 *--------------------------------------------------------------------------*/
1474 HYPRE_Int
hypre_CSRBlockMatrixBlockInvMultDiag(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)1475 hypre_CSRBlockMatrixBlockInvMultDiag(HYPRE_Complex* i1, HYPRE_Complex* i2, HYPRE_Complex* o, HYPRE_Int block_size)
1476 {
1477
1478 HYPRE_Int ierr = 0;
1479 HYPRE_Int i;
1480 HYPRE_Int sz = block_size*block_size;
1481 HYPRE_Real eps = 1.0e-8;
1482
1483 for (i = 0; i < sz; i++)
1484 o[i] = 0.0;
1485
1486 for (i = 0; i < block_size; i++)
1487 {
1488 if (hypre_cabs(i1[i*block_size + i]) > eps)
1489 {
1490 o[i*block_size + i] = i2[i*block_size + i] / i1[i*block_size + i];
1491 }
1492 else
1493 {
1494 /* hypre_printf("GE zero pivot error\n"); */
1495 return(-1);
1496 }
1497 }
1498
1499 return (ierr);
1500 }
1501
1502 /*--------------------------------------------------------------------------
1503 * hypre_CSRBlockMatrixBlockInvMultDiag2
1504 * (o = (i1)* diag(i2)^-1) - so this scales the cols of il by
1505 the diag entries in i2
1506 *--------------------------------------------------------------------------*/
1507 HYPRE_Int
hypre_CSRBlockMatrixBlockInvMultDiag2(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)1508 hypre_CSRBlockMatrixBlockInvMultDiag2(HYPRE_Complex* i1, HYPRE_Complex* i2, HYPRE_Complex* o, HYPRE_Int block_size)
1509 {
1510
1511 HYPRE_Int ierr = 0;
1512 HYPRE_Int i, j;
1513
1514 HYPRE_Real eps = 1.0e-8;
1515 HYPRE_Complex tmp;
1516
1517 for (i = 0; i < block_size; i++)
1518 {
1519 if (hypre_cabs(i2[i*block_size + i]) > eps)
1520 {
1521 tmp = 1 / i2[i*block_size + i];
1522 }
1523 else
1524 {
1525 tmp = 1.0;
1526 }
1527 for (j=0; j< block_size; j++) /* this should be re-written to access by row (not col)! */
1528 {
1529 o[j*block_size + i] = i1[j*block_size + i] *tmp;
1530 }
1531 }
1532
1533 return (ierr);
1534 }
1535
1536
1537 /*--------------------------------------------------------------------------
1538 * hypre_CSRBlockMatrixBlockInvMultDiag3
1539 * (o = (i1)* diag(i2)^-1) - so this scales the cols of il by
1540 the i2 whose diags are row sums
1541 *--------------------------------------------------------------------------*/
1542 HYPRE_Int
hypre_CSRBlockMatrixBlockInvMultDiag3(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)1543 hypre_CSRBlockMatrixBlockInvMultDiag3(HYPRE_Complex* i1, HYPRE_Complex* i2, HYPRE_Complex* o, HYPRE_Int block_size)
1544 {
1545
1546 HYPRE_Int ierr = 0;
1547 HYPRE_Int i, j;
1548 HYPRE_Real eps = 1.0e-8;
1549 HYPRE_Complex tmp, row_sum;
1550
1551 for (i = 0; i < block_size; i++)
1552 {
1553 /* get row sum of i2, row i */
1554 row_sum = 0.0;
1555 for (j=0; j< block_size; j++)
1556 {
1557 row_sum += i2[i*block_size + j];
1558 }
1559
1560 /* invert */
1561 if (hypre_cabs(row_sum) > eps)
1562 {
1563 tmp = 1 / row_sum;
1564 }
1565 else
1566 {
1567 tmp = 1.0;
1568 }
1569 /* scale col of i1 */
1570 for (j=0; j< block_size; j++) /* this should be re-written to access by row (not col)! */
1571 {
1572 o[j*block_size + i] = i1[j*block_size + i] *tmp;
1573 }
1574 }
1575
1576 return (ierr);
1577 }
1578
1579
1580
1581
1582 /*--------------------------------------------------------------------------
1583 * hypre_CSRBlockMatrixTranspose
1584 *--------------------------------------------------------------------------*/
1585
hypre_CSRBlockMatrixTranspose(hypre_CSRBlockMatrix * A,hypre_CSRBlockMatrix ** AT,HYPRE_Int data)1586 HYPRE_Int hypre_CSRBlockMatrixTranspose(hypre_CSRBlockMatrix *A,
1587 hypre_CSRBlockMatrix **AT, HYPRE_Int data)
1588
1589 {
1590 HYPRE_Complex *A_data = hypre_CSRBlockMatrixData(A);
1591 HYPRE_Int *A_i = hypre_CSRBlockMatrixI(A);
1592 HYPRE_Int *A_j = hypre_CSRBlockMatrixJ(A);
1593 HYPRE_Int num_rowsA = hypre_CSRBlockMatrixNumRows(A);
1594 HYPRE_Int num_colsA = hypre_CSRBlockMatrixNumCols(A);
1595 HYPRE_Int num_nonzerosA = hypre_CSRBlockMatrixNumNonzeros(A);
1596 HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(A);
1597
1598 HYPRE_Complex *AT_data;
1599 HYPRE_Int *AT_i;
1600 HYPRE_Int *AT_j;
1601 HYPRE_Int num_rowsAT;
1602 HYPRE_Int num_colsAT;
1603 HYPRE_Int num_nonzerosAT;
1604
1605 HYPRE_Int max_col;
1606 HYPRE_Int i, j, k, m, offset, bnnz;
1607
1608 /*--------------------------------------------------------------
1609 * First, ascertain that num_cols and num_nonzeros has been set.
1610 * If not, set them.
1611 *--------------------------------------------------------------*/
1612
1613 if (! num_nonzerosA) num_nonzerosA = A_i[num_rowsA];
1614 if (num_rowsA && ! num_colsA)
1615 {
1616 max_col = -1;
1617 for (i = 0; i < num_rowsA; ++i)
1618 for (j = A_i[i]; j < A_i[i+1]; j++)
1619 if (A_j[j] > max_col) max_col = A_j[j];
1620 num_colsA = max_col+1;
1621 }
1622 num_rowsAT = num_colsA;
1623 num_colsAT = num_rowsA;
1624 num_nonzerosAT = num_nonzerosA;
1625 bnnz = block_size * block_size;
1626
1627 *AT = hypre_CSRBlockMatrixCreate(block_size, num_rowsAT, num_colsAT,
1628 num_nonzerosAT);
1629
1630 AT_i = hypre_CTAlloc(HYPRE_Int, num_rowsAT+1, HYPRE_MEMORY_HOST);
1631 AT_j = hypre_CTAlloc(HYPRE_Int, num_nonzerosAT, HYPRE_MEMORY_HOST);
1632 hypre_CSRBlockMatrixI(*AT) = AT_i;
1633 hypre_CSRBlockMatrixJ(*AT) = AT_j;
1634 if (data)
1635 {
1636 AT_data = hypre_CTAlloc(HYPRE_Complex, num_nonzerosAT*bnnz, HYPRE_MEMORY_HOST);
1637 hypre_CSRBlockMatrixData(*AT) = AT_data;
1638 }
1639
1640 /*-----------------------------------------------------------------
1641 * Count the number of entries in each column of A (row of AT)
1642 * and fill the AT_i array.
1643 *-----------------------------------------------------------------*/
1644
1645 for (i = 0; i < num_nonzerosA; i++) ++AT_i[A_j[i]+1];
1646 for (i = 2; i <= num_rowsAT; i++) AT_i[i] += AT_i[i-1];
1647
1648 /*----------------------------------------------------------------
1649 * Load the data and column numbers of AT
1650 *----------------------------------------------------------------*/
1651
1652 for (i = 0; i < num_rowsA; i++)
1653 {
1654 for (j = A_i[i]; j < A_i[i+1]; j++)
1655 {
1656 AT_j[AT_i[A_j[j]]] = i;
1657 if (data)
1658 {
1659 offset = AT_i[A_j[j]] * bnnz;
1660 for (k = 0; k < block_size; k++)
1661 for (m = 0; m < block_size; m++)
1662 AT_data[offset+k*block_size+m] =
1663 A_data[j*bnnz+m*block_size+k];
1664 }
1665 AT_i[A_j[j]]++;
1666 }
1667 }
1668
1669 /*------------------------------------------------------------
1670 * AT_i[j] now points to the *end* of the jth row of entries
1671 * instead of the beginning. Restore AT_i to front of row.
1672 *------------------------------------------------------------*/
1673
1674 for (i = num_rowsAT; i > 0; i--) AT_i[i] = AT_i[i-1];
1675 AT_i[0] = 0;
1676
1677 return(0);
1678 }
1679
1680