1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /******************************************************************************
9  *
10  * Member functions for hypre_CSRBlockMatrix class.
11  *
12  *****************************************************************************/
13 
14 #include "_hypre_parcsr_block_mv.h"
15 
16 /*--------------------------------------------------------------------------
17  * hypre_CSRBlockMatrixCreate
18  *--------------------------------------------------------------------------*/
19 
20 hypre_CSRBlockMatrix *
hypre_CSRBlockMatrixCreate(HYPRE_Int block_size,HYPRE_Int num_rows,HYPRE_Int num_cols,HYPRE_Int num_nonzeros)21 hypre_CSRBlockMatrixCreate(HYPRE_Int block_size,
22                            HYPRE_Int num_rows,
23                            HYPRE_Int num_cols,
24                            HYPRE_Int num_nonzeros)
25 {
26    hypre_CSRBlockMatrix  *matrix;
27 
28    matrix = hypre_CTAlloc(hypre_CSRBlockMatrix,  1, HYPRE_MEMORY_HOST);
29 
30    hypre_CSRBlockMatrixData(matrix) = NULL;
31    hypre_CSRBlockMatrixI(matrix)    = NULL;
32    hypre_CSRBlockMatrixJ(matrix)    = NULL;
33    hypre_CSRBlockMatrixBigJ(matrix)    = NULL;
34    hypre_CSRBlockMatrixBlockSize(matrix) = block_size;
35    hypre_CSRBlockMatrixNumRows(matrix) = num_rows;
36    hypre_CSRBlockMatrixNumCols(matrix) = num_cols;
37    hypre_CSRBlockMatrixNumNonzeros(matrix) = num_nonzeros;
38 
39    /* set defaults */
40    hypre_CSRBlockMatrixOwnsData(matrix) = 1;
41 
42    return matrix;
43 }
44 
45 /*--------------------------------------------------------------------------
46  * hypre_CSRBlockMatrixDestroy
47  *--------------------------------------------------------------------------*/
48 
49 HYPRE_Int
hypre_CSRBlockMatrixDestroy(hypre_CSRBlockMatrix * matrix)50 hypre_CSRBlockMatrixDestroy(hypre_CSRBlockMatrix *matrix)
51 {
52    HYPRE_Int  ierr=0;
53 
54    if (matrix)
55    {
56       hypre_TFree(hypre_CSRBlockMatrixI(matrix), HYPRE_MEMORY_HOST);
57       if ( hypre_CSRBlockMatrixOwnsData(matrix) )
58       {
59          hypre_TFree(hypre_CSRBlockMatrixData(matrix), HYPRE_MEMORY_HOST);
60          hypre_TFree(hypre_CSRBlockMatrixJ(matrix), HYPRE_MEMORY_HOST);
61          hypre_TFree(hypre_CSRBlockMatrixBigJ(matrix), HYPRE_MEMORY_HOST);
62       }
63       hypre_TFree(matrix, HYPRE_MEMORY_HOST);
64    }
65 
66    return ierr;
67 }
68 
69 /*--------------------------------------------------------------------------
70  * hypre_CSRBlockMatrixInitialize
71  *--------------------------------------------------------------------------*/
72 
73 HYPRE_Int
hypre_CSRBlockMatrixInitialize(hypre_CSRBlockMatrix * matrix)74 hypre_CSRBlockMatrixInitialize(hypre_CSRBlockMatrix *matrix)
75 {
76    HYPRE_Int block_size   = hypre_CSRBlockMatrixBlockSize(matrix);
77    HYPRE_Int num_rows     = hypre_CSRBlockMatrixNumRows(matrix);
78    HYPRE_Int num_nonzeros = hypre_CSRBlockMatrixNumNonzeros(matrix);
79    HYPRE_Int ierr=0, nnz;
80 
81    if ( ! hypre_CSRBlockMatrixI(matrix) )
82       hypre_TFree(hypre_CSRBlockMatrixI(matrix), HYPRE_MEMORY_HOST);
83    if ( ! hypre_CSRBlockMatrixJ(matrix) )
84       hypre_TFree(hypre_CSRBlockMatrixJ(matrix), HYPRE_MEMORY_HOST);
85    if ( ! hypre_CSRBlockMatrixBigJ(matrix) )
86       hypre_TFree(hypre_CSRBlockMatrixBigJ(matrix), HYPRE_MEMORY_HOST);
87    if ( ! hypre_CSRBlockMatrixData(matrix) )
88       hypre_TFree(hypre_CSRBlockMatrixData(matrix), HYPRE_MEMORY_HOST);
89 
90    nnz = num_nonzeros * block_size * block_size;
91    hypre_CSRBlockMatrixI(matrix) = hypre_CTAlloc(HYPRE_Int,  num_rows + 1, HYPRE_MEMORY_HOST);
92    if (nnz) hypre_CSRBlockMatrixData(matrix) = hypre_CTAlloc(HYPRE_Complex,  nnz, HYPRE_MEMORY_HOST);
93    else     hypre_CSRBlockMatrixData(matrix) = NULL;
94    if (nnz) hypre_CSRBlockMatrixJ(matrix) = hypre_CTAlloc(HYPRE_Int, num_nonzeros, HYPRE_MEMORY_HOST);
95    else     hypre_CSRBlockMatrixJ(matrix) = NULL;
96 
97    return ierr;
98 }
99 
100 /*--------------------------------------------------------------------------
101  * hypre_CSRBlockMatrixBigInitialize
102  *--------------------------------------------------------------------------*/
103 
104 HYPRE_Int
hypre_CSRBlockMatrixBigInitialize(hypre_CSRBlockMatrix * matrix)105 hypre_CSRBlockMatrixBigInitialize(hypre_CSRBlockMatrix *matrix)
106 {
107    HYPRE_Int block_size   = hypre_CSRBlockMatrixBlockSize(matrix);
108    HYPRE_Int num_rows     = hypre_CSRBlockMatrixNumRows(matrix);
109    HYPRE_Int num_nonzeros = hypre_CSRBlockMatrixNumNonzeros(matrix);
110    HYPRE_Int ierr=0, nnz;
111 
112    if ( ! hypre_CSRBlockMatrixI(matrix) )
113       hypre_TFree(hypre_CSRBlockMatrixI(matrix), HYPRE_MEMORY_HOST);
114    if ( ! hypre_CSRBlockMatrixJ(matrix) )
115       hypre_TFree(hypre_CSRBlockMatrixJ(matrix), HYPRE_MEMORY_HOST);
116    if ( ! hypre_CSRBlockMatrixBigJ(matrix) )
117       hypre_TFree(hypre_CSRBlockMatrixBigJ(matrix), HYPRE_MEMORY_HOST);
118    if ( ! hypre_CSRBlockMatrixData(matrix) )
119       hypre_TFree(hypre_CSRBlockMatrixData(matrix), HYPRE_MEMORY_HOST);
120 
121    nnz = num_nonzeros * block_size * block_size;
122    hypre_CSRBlockMatrixI(matrix) = hypre_CTAlloc(HYPRE_Int,  num_rows + 1, HYPRE_MEMORY_HOST);
123    if (nnz) hypre_CSRBlockMatrixData(matrix) = hypre_CTAlloc(HYPRE_Complex,  nnz, HYPRE_MEMORY_HOST);
124    else     hypre_CSRBlockMatrixData(matrix) = NULL;
125    if (nnz) hypre_CSRBlockMatrixBigJ(matrix) = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_HOST);
126    else     hypre_CSRBlockMatrixJ(matrix) = NULL;
127 
128    return ierr;
129 }
130 
131 /*--------------------------------------------------------------------------
132  * hypre_CSRBlockMatrixSetDataOwner
133  *--------------------------------------------------------------------------*/
134 
135 HYPRE_Int
hypre_CSRBlockMatrixSetDataOwner(hypre_CSRBlockMatrix * matrix,HYPRE_Int owns_data)136 hypre_CSRBlockMatrixSetDataOwner(hypre_CSRBlockMatrix *matrix, HYPRE_Int owns_data)
137 {
138    HYPRE_Int    ierr=0;
139 
140    hypre_CSRBlockMatrixOwnsData(matrix) = owns_data;
141 
142    return ierr;
143 }
144 
145 /*--------------------------------------------------------------------------
146  * hypre_CSRBlockMatrixCompress
147  *--------------------------------------------------------------------------*/
148 
149 hypre_CSRMatrix *
hypre_CSRBlockMatrixCompress(hypre_CSRBlockMatrix * matrix)150 hypre_CSRBlockMatrixCompress(hypre_CSRBlockMatrix *matrix)
151 {
152    HYPRE_Int      block_size = hypre_CSRBlockMatrixBlockSize(matrix);
153    HYPRE_Int      num_rows = hypre_CSRBlockMatrixNumRows(matrix);
154    HYPRE_Int      num_cols = hypre_CSRBlockMatrixNumCols(matrix);
155    HYPRE_Int      num_nonzeros = hypre_CSRBlockMatrixNumNonzeros(matrix);
156    HYPRE_Int     *matrix_i = hypre_CSRBlockMatrixI(matrix);
157    HYPRE_Int     *matrix_j = hypre_CSRBlockMatrixJ(matrix);
158    HYPRE_Complex *matrix_data = hypre_CSRBlockMatrixData(matrix);
159    hypre_CSRMatrix* matrix_C;
160    HYPRE_Int     *matrix_C_i, *matrix_C_j, i, j, bnnz;
161    HYPRE_Complex *matrix_C_data, ddata;
162 
163    matrix_C = hypre_CSRMatrixCreate(num_rows,num_cols,num_nonzeros);
164    hypre_CSRMatrixInitialize(matrix_C);
165    matrix_C_i = hypre_CSRMatrixI(matrix_C);
166    matrix_C_j = hypre_CSRMatrixJ(matrix_C);
167    matrix_C_data = hypre_CSRMatrixData(matrix_C);
168 
169    bnnz = block_size * block_size;
170    for(i = 0; i < num_rows + 1; i++) matrix_C_i[i] = matrix_i[i];
171    for(i = 0; i < num_nonzeros; i++)
172    {
173       matrix_C_j[i] = matrix_j[i];
174       ddata = 0.0;
175       for(j = 0; j < bnnz; j++)
176          ddata += matrix_data[i*bnnz+j] * matrix_data[i*bnnz+j];
177       matrix_C_data[i] = sqrt(ddata);
178    }
179    return matrix_C;
180 }
181 
182 /*--------------------------------------------------------------------------
183  * hypre_CSRBlockMatrixConvertToCSRMatrix
184  *--------------------------------------------------------------------------*/
185 
186 hypre_CSRMatrix *
hypre_CSRBlockMatrixConvertToCSRMatrix(hypre_CSRBlockMatrix * matrix)187 hypre_CSRBlockMatrixConvertToCSRMatrix( hypre_CSRBlockMatrix *matrix )
188 {
189    HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(matrix);
190    HYPRE_Int num_rows = hypre_CSRBlockMatrixNumRows(matrix);
191    HYPRE_Int num_cols = hypre_CSRBlockMatrixNumCols(matrix);
192    HYPRE_Int num_nonzeros = hypre_CSRBlockMatrixNumNonzeros(matrix);
193    HYPRE_Int *matrix_i = hypre_CSRBlockMatrixI(matrix);
194    HYPRE_Int *matrix_j = hypre_CSRBlockMatrixJ(matrix);
195    HYPRE_Complex* matrix_data = hypre_CSRBlockMatrixData(matrix);
196 
197    hypre_CSRMatrix* matrix_C;
198    HYPRE_Int    i, j, k, ii, C_ii, bnnz, new_nrows, new_ncols, new_num_nonzeros;
199    HYPRE_Int    *matrix_C_i, *matrix_C_j;
200    HYPRE_Complex *matrix_C_data;
201 
202    bnnz      = block_size * block_size;
203    new_nrows = num_rows * block_size;
204    new_ncols = num_cols * block_size;
205    new_num_nonzeros = block_size * block_size * num_nonzeros;
206    matrix_C = hypre_CSRMatrixCreate(new_nrows,new_ncols,new_num_nonzeros);
207    hypre_CSRMatrixInitialize(matrix_C);
208    matrix_C_i    = hypre_CSRMatrixI(matrix_C);
209    matrix_C_j    = hypre_CSRMatrixJ(matrix_C);
210    matrix_C_data = hypre_CSRMatrixData(matrix_C);
211    for(i = 0; i < num_rows; i++)
212    {
213       for(j = 0; j < block_size; j++)
214          matrix_C_i[i*block_size + j] = matrix_i[i]*bnnz +
215                                j * (matrix_i[i + 1] - matrix_i[i])*block_size;
216    }
217    matrix_C_i[new_nrows] = matrix_i[num_rows] * bnnz;
218 
219    C_ii = 0;
220    for(i = 0; i < num_rows; i++)
221    {
222       for(j = 0; j < block_size; j++)
223       {
224          for(ii = matrix_i[i]; ii < matrix_i[i + 1]; ii++)
225          {
226             k = j;
227             matrix_C_j[C_ii] = matrix_j[ii]*block_size + k;
228             matrix_C_data[C_ii] = matrix_data[ii*bnnz+j*block_size+k];
229             C_ii++;
230             for(k = 0; k < block_size; k++)
231             {
232                if(j != k)
233                {
234                   matrix_C_j[C_ii] = matrix_j[ii]*block_size + k;
235                   matrix_C_data[C_ii] = matrix_data[ii*bnnz+j*block_size+k];
236                   C_ii++;
237                }
238             }
239          }
240       }
241    }
242    return matrix_C;
243 }
244 
245 /*--------------------------------------------------------------------------
246  * hypre_CSRBlockMatrixConvertFromCSRMatrix
247 
248  * this doesn't properly convert the parcsr off_diag matrices - AB 12/7/05
249    (because here we assume the matrix is square - we don't check what the
250     number of columns should be ) - it can only be used for the diag part
251  *--------------------------------------------------------------------------*/
252 
253 hypre_CSRBlockMatrix *
hypre_CSRBlockMatrixConvertFromCSRMatrix(hypre_CSRMatrix * matrix,HYPRE_Int matrix_C_block_size)254 hypre_CSRBlockMatrixConvertFromCSRMatrix(hypre_CSRMatrix *matrix,
255                                          HYPRE_Int matrix_C_block_size )
256 {
257    HYPRE_Int num_rows = hypre_CSRMatrixNumRows(matrix);
258    HYPRE_Int num_cols = hypre_CSRMatrixNumCols(matrix);
259    HYPRE_Int *matrix_i = hypre_CSRMatrixI(matrix);
260    HYPRE_Int *matrix_j = hypre_CSRMatrixJ(matrix);
261    HYPRE_Complex* matrix_data = hypre_CSRMatrixData(matrix);
262 
263    hypre_CSRBlockMatrix* matrix_C;
264    HYPRE_Int    *matrix_C_i, *matrix_C_j;
265    HYPRE_Complex *matrix_C_data;
266    HYPRE_Int    matrix_C_num_rows, matrix_C_num_cols, matrix_C_num_nonzeros;
267    HYPRE_Int    i, j, ii, jj, s_jj, index, *counter;
268 
269    matrix_C_num_rows = num_rows/matrix_C_block_size;
270    matrix_C_num_cols = num_cols/matrix_C_block_size;
271 
272    counter = hypre_CTAlloc(HYPRE_Int,  matrix_C_num_cols, HYPRE_MEMORY_HOST);
273    for(i = 0; i < matrix_C_num_cols; i++) counter[i] = -1;
274    matrix_C_num_nonzeros = 0;
275    for(i = 0; i < matrix_C_num_rows; i++)
276    {
277       for(j = 0; j < matrix_C_block_size; j++)
278       {
279          for(ii = matrix_i[i*matrix_C_block_size+j];
280              ii < matrix_i[i*matrix_C_block_size+j+1]; ii++)
281          {
282             if(counter[matrix_j[ii]/matrix_C_block_size] < i)
283             {
284                counter[matrix_j[ii]/matrix_C_block_size] = i;
285                matrix_C_num_nonzeros++;
286             }
287          }
288       }
289    }
290    matrix_C = hypre_CSRBlockMatrixCreate(matrix_C_block_size, matrix_C_num_rows,
291                                        matrix_C_num_cols, matrix_C_num_nonzeros);
292    hypre_CSRBlockMatrixInitialize(matrix_C);
293    matrix_C_i = hypre_CSRBlockMatrixI(matrix_C);
294    matrix_C_j = hypre_CSRBlockMatrixJ(matrix_C);
295    matrix_C_data = hypre_CSRBlockMatrixData(matrix_C);
296 
297    for(i = 0; i < matrix_C_num_cols; i++) counter[i] = -1;
298    jj = s_jj = 0;
299    for (i = 0; i < matrix_C_num_rows; i++)
300    {
301       matrix_C_i[i] = jj;
302       for(j = 0; j < matrix_C_block_size; j++)
303       {
304          for(ii = matrix_i[i*matrix_C_block_size+j];
305              ii < matrix_i[i*matrix_C_block_size+j+1]; ii++)
306          {
307             if(counter[matrix_j[ii]/matrix_C_block_size] < s_jj)
308             {
309                counter[matrix_j[ii]/matrix_C_block_size] = jj;
310                matrix_C_j[jj] = matrix_j[ii]/matrix_C_block_size;
311                jj++;
312             }
313             index = counter[matrix_j[ii]/matrix_C_block_size] * matrix_C_block_size *
314                     matrix_C_block_size + j * matrix_C_block_size +
315                     matrix_j[ii]%matrix_C_block_size;
316             matrix_C_data[index] = matrix_data[ii];
317          }
318       }
319       s_jj = jj;
320    }
321    matrix_C_i[matrix_C_num_rows] = matrix_C_num_nonzeros;
322 
323    hypre_TFree(counter, HYPRE_MEMORY_HOST);
324 
325 
326    return matrix_C;
327 }
328 
329 /*--------------------------------------------------------------------------
330  * hypre_CSRBlockMatrixBlockAdd
331  * (o = i1 + i2)
332  *--------------------------------------------------------------------------*/
333 HYPRE_Int
hypre_CSRBlockMatrixBlockAdd(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)334 hypre_CSRBlockMatrixBlockAdd(HYPRE_Complex* i1,
335                              HYPRE_Complex* i2,
336                              HYPRE_Complex* o,
337                              HYPRE_Int block_size)
338 {
339    HYPRE_Int i;
340    HYPRE_Int sz = block_size*block_size;
341 
342    for (i = 0; i < sz; i++)
343       o[i] = i1[i] + i2[i];
344 
345    return 0;
346 }
347 
348 
349 /*--------------------------------------------------------------------------
350  * hypre_CSRBlockMatrixBlockAddAccumulate
351  * (o = i1 + o)
352  *--------------------------------------------------------------------------*/
353 HYPRE_Int
hypre_CSRBlockMatrixBlockAddAccumulate(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Int block_size)354 hypre_CSRBlockMatrixBlockAddAccumulate(HYPRE_Complex* i1,
355                                        HYPRE_Complex* o,
356                                        HYPRE_Int block_size)
357 {
358    HYPRE_Int i;
359    HYPRE_Int sz = block_size*block_size;
360 
361    for (i = 0; i < sz; i++)
362       o[i] += i1[i];
363 
364    return 0;
365 }
366 
367 /*--------------------------------------------------------------------------
368  * hypre_CSRBlockMatrixBlockAddAccumulateDiag
369  * (diag(o) = diag(i1) + diag(o))
370  *--------------------------------------------------------------------------*/
371 HYPRE_Int
hypre_CSRBlockMatrixBlockAddAccumulateDiag(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Int block_size)372 hypre_CSRBlockMatrixBlockAddAccumulateDiag(HYPRE_Complex* i1,
373                                            HYPRE_Complex* o,
374                                            HYPRE_Int block_size)
375 {
376    HYPRE_Int i;
377 
378    for (i = 0; i < block_size; i++)
379          o[i*block_size+i] += i1[i*block_size+i];
380    return 0;
381 }
382 
383 /*--------------------------------------------------------------------------
384  * hypre_CSRBlockMatrixBlockAddAccumulateDiagCheckSign
385  * only add elements of sign*i1 that are negative (sign is size block_size)
386  * (diag(o) = diag(i1) + diag(o))
387  *--------------------------------------------------------------------------*/
388 HYPRE_Int
hypre_CSRBlockMatrixBlockAddAccumulateDiagCheckSign(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Int block_size,HYPRE_Real * sign)389 hypre_CSRBlockMatrixBlockAddAccumulateDiagCheckSign(HYPRE_Complex* i1,
390                                                     HYPRE_Complex* o,
391                                                     HYPRE_Int block_size,
392                                                     HYPRE_Real *sign)
393 {
394    HYPRE_Int i;
395    HYPRE_Real tmp;
396 
397    for (i = 0; i < block_size; i++)
398    {
399       tmp = (HYPRE_Real) i1[i*block_size+i]*sign[i];
400       if (tmp < 0)
401          o[i*block_size+i] += i1[i*block_size+i];
402    }
403 
404    return 0;
405 }
406 
407 /*--------------------------------------------------------------------------
408  *  hypre_CSRBlockMatrixComputeSign
409 
410  * o = sign(diag(i1))
411  *--------------------------------------------------------------------------*/
412 
hypre_CSRBlockMatrixComputeSign(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Int block_size)413 HYPRE_Int hypre_CSRBlockMatrixComputeSign(HYPRE_Complex *i1,
414                                           HYPRE_Complex *o,
415                                           HYPRE_Int block_size)
416 {
417    HYPRE_Int i;
418 
419     for (i = 0; i < block_size; i++)
420    {
421       if ((HYPRE_Real) i1[i*block_size+i] < 0)
422          o[i] = -1;
423       else
424          o[i] = 1;
425    }
426 
427    return 0;
428 }
429 
430 
431 /*--------------------------------------------------------------------------
432  * hypre_CSRBlockMatrixBlockSetScalar
433  * (each entry in block o is set to beta )
434  *--------------------------------------------------------------------------*/
435 HYPRE_Int
hypre_CSRBlockMatrixBlockSetScalar(HYPRE_Complex * o,HYPRE_Complex beta,HYPRE_Int block_size)436 hypre_CSRBlockMatrixBlockSetScalar(HYPRE_Complex* o,
437                                    HYPRE_Complex beta,
438                                    HYPRE_Int block_size)
439 {
440    HYPRE_Int i;
441    HYPRE_Int sz = block_size*block_size;
442 
443    for (i = 0; i < sz; i++)
444       o[i] = beta;
445 
446    return 0;
447 }
448 
449 
450 /*--------------------------------------------------------------------------
451  * hypre_CSRBlockMatrixBlockCopyData
452  * (o = beta*i1 )
453  *--------------------------------------------------------------------------*/
454 HYPRE_Int
hypre_CSRBlockMatrixBlockCopyData(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Complex beta,HYPRE_Int block_size)455 hypre_CSRBlockMatrixBlockCopyData(HYPRE_Complex* i1,
456                                   HYPRE_Complex* o,
457                                   HYPRE_Complex beta,
458                                   HYPRE_Int block_size)
459 {
460    HYPRE_Int i;
461    HYPRE_Int sz = block_size*block_size;
462 
463    for (i = 0; i < sz; i++)
464       o[i] = beta*i1[i];
465 
466    return 0;
467 }
468 
469 /*--------------------------------------------------------------------------
470  * hypre_CSRBlockMatrixBlockCopyDataDiag - zeros off-diag entries
471  * (o = beta*diag(i1))
472  *--------------------------------------------------------------------------*/
473 HYPRE_Int
hypre_CSRBlockMatrixBlockCopyDataDiag(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Complex beta,HYPRE_Int block_size)474 hypre_CSRBlockMatrixBlockCopyDataDiag(HYPRE_Complex* i1,
475                                       HYPRE_Complex* o,
476                                       HYPRE_Complex beta,
477                                       HYPRE_Int block_size)
478 {
479     HYPRE_Int i;
480 
481     HYPRE_Int sz = block_size*block_size;
482 
483     for (i = 0; i < sz; i++)
484        o[i] = 0.0;
485 
486     for (i = 0; i < block_size; i++)
487        o[i*block_size+i] = beta*i1[i*block_size+i];
488 
489    return 0;
490 }
491 
492 /*--------------------------------------------------------------------------
493  * hypre_CSRBlockMatrixBlockTranspose
494  * (o = i1' )
495  *--------------------------------------------------------------------------*/
496 HYPRE_Int
hypre_CSRBlockMatrixBlockTranspose(HYPRE_Complex * i1,HYPRE_Complex * o,HYPRE_Int block_size)497 hypre_CSRBlockMatrixBlockTranspose(HYPRE_Complex* i1,
498                                    HYPRE_Complex* o,
499                                    HYPRE_Int block_size)
500 {
501    HYPRE_Int i, j;
502 
503    for (i = 0; i < block_size; i++)
504       for (j = 0; j < block_size; j++)
505          o[i*block_size+j] = i1[j*block_size+i];
506    return 0;
507 }
508 
509 /*--------------------------------------------------------------------------
510  * hypre_CSRBlockMatrixBlockNorm
511  * (out = norm(data) )
512  *
513  *  (note: these are not all actually "norms")
514  *
515  *--------------------------------------------------------------------------*/
516 HYPRE_Int
hypre_CSRBlockMatrixBlockNorm(HYPRE_Int norm_type,HYPRE_Complex * data,HYPRE_Real * out,HYPRE_Int block_size)517 hypre_CSRBlockMatrixBlockNorm(HYPRE_Int norm_type, HYPRE_Complex* data, HYPRE_Real* out, HYPRE_Int block_size)
518 {
519    HYPRE_Int ierr = 0;
520    HYPRE_Int i,j;
521    HYPRE_Real sum = 0.0;
522    HYPRE_Real *totals;
523    HYPRE_Int sz = block_size*block_size;
524 
525    switch (norm_type)
526    {
527       case 6: /* sum of all elements in the block  */
528       {
529          for(i = 0; i < sz; i++)
530          {
531             sum += (HYPRE_Real)(data[i]);
532          }
533          break;
534       }
535       case 5: /* one norm  - max col sum*/
536       {
537 
538          totals = hypre_CTAlloc(HYPRE_Real,  block_size, HYPRE_MEMORY_HOST);
539          for(i = 0; i < block_size; i++) /* row */
540          {
541             for(j = 0; j < block_size; j++) /* col */
542             {
543                totals[j] += hypre_cabs(data[i*block_size + j]);
544             }
545          }
546 
547          sum = totals[0];
548          for(j = 1; j < block_size; j++) /* col */
549          {
550             if (totals[j] > sum) sum = totals[j];
551          }
552          hypre_TFree(totals, HYPRE_MEMORY_HOST);
553 
554          break;
555 
556       }
557       case 4: /* inf norm - max row sum */
558       {
559 
560          totals = hypre_CTAlloc(HYPRE_Real,  block_size, HYPRE_MEMORY_HOST);
561          for(i = 0; i < block_size; i++) /* row */
562          {
563             for(j = 0; j < block_size; j++) /* col */
564             {
565                totals[i] += hypre_cabs(data[i*block_size + j]);
566             }
567          }
568 
569          sum = totals[0];
570          for(i = 1; i < block_size; i++) /* row */
571          {
572             if (totals[i] > sum) sum = totals[i];
573          }
574          hypre_TFree(totals, HYPRE_MEMORY_HOST);
575 
576          break;
577       }
578 
579       case 3: /* largest element of block (return value includes sign) */
580       {
581 
582          sum = (HYPRE_Real)data[0];
583 
584          for(i = 0; i < sz; i++)
585          {
586             if (hypre_cabs(data[i]) > hypre_cabs(sum))  sum = (HYPRE_Real)data[i];
587          }
588 
589          break;
590       }
591       case 2: /* sum of abs values of all elements in the block  */
592       {
593          for(i = 0; i < sz; i++)
594          {
595                sum += hypre_cabs(data[i]);
596          }
597          break;
598       }
599 
600 
601       default: /* 1 = frobenius*/
602       {
603           for(i = 0; i < sz; i++)
604           {
605              sum += ((HYPRE_Real)data[i])*((HYPRE_Real)data[i]);
606           }
607           sum = sqrt(sum);
608       }
609    }
610 
611    *out = sum;
612 
613    return ierr;
614 }
615 
616 /*--------------------------------------------------------------------------
617  * hypre_CSRBlockMatrixBlockMultAdd
618  * (o = i1 * i2 + beta * o)
619  *--------------------------------------------------------------------------*/
620 HYPRE_Int
hypre_CSRBlockMatrixBlockMultAdd(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex beta,HYPRE_Complex * o,HYPRE_Int block_size)621 hypre_CSRBlockMatrixBlockMultAdd(HYPRE_Complex* i1,
622                                  HYPRE_Complex* i2,
623                                  HYPRE_Complex beta,
624                                  HYPRE_Complex* o,
625                                  HYPRE_Int block_size)
626 {
627 
628 #if LB_VERSION
629    {
630       HYPRE_Complex alp = 1.0;
631       dgemm_("N","N", &block_size, &block_size, &block_size, &alp, i2, &block_size, i1,
632           &block_size, &beta, o, &block_size);
633    }
634 #else
635    {
636       HYPRE_Int    i, j, k;
637       HYPRE_Complex ddata;
638 
639       if (beta == 0.0)
640       {
641          for (i = 0; i < block_size; i++)
642          {
643             for (j = 0; j < block_size; j++)
644             {
645                ddata = 0.0;
646                for (k = 0; k < block_size; k++)
647                {
648                   ddata += i1[i*block_size + k] * i2[k*block_size + j];
649                }
650                o[i*block_size + j] = ddata;
651             }
652          }
653       }
654       else if (beta == 1.0)
655       {
656          for(i = 0; i < block_size; i++)
657          {
658             for(j = 0; j < block_size; j++)
659             {
660                ddata = o[i*block_size + j];
661                for(k = 0; k < block_size; k++)
662                   ddata += i1[i*block_size + k] * i2[k*block_size + j];
663                o[i*block_size + j] = ddata;
664             }
665          }
666       }
667       else
668       {
669          for(i = 0; i < block_size; i++)
670          {
671             for(j = 0; j < block_size; j++)
672             {
673                ddata = beta * o[i*block_size + j];
674                for(k = 0; k < block_size; k++)
675                   ddata += i1[i*block_size + k] * i2[k*block_size + j];
676                o[i*block_size + j] = ddata;
677             }
678          }
679       }
680    }
681 
682 #endif
683 
684    return 0;
685 }
686 
687 
688 /*--------------------------------------------------------------------------
689  * hypre_CSRBlockMatrixBlockMultAddDiag
690  * (diag(o) = diag(i1) * diag(i2) + beta * diag(o))
691  *--------------------------------------------------------------------------*/
692 HYPRE_Int
hypre_CSRBlockMatrixBlockMultAddDiag(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex beta,HYPRE_Complex * o,HYPRE_Int block_size)693 hypre_CSRBlockMatrixBlockMultAddDiag(HYPRE_Complex* i1,
694                                      HYPRE_Complex* i2,
695                                      HYPRE_Complex beta,
696                                      HYPRE_Complex* o,
697                                      HYPRE_Int block_size)
698 {
699    HYPRE_Int    i;
700 
701    if (beta == 0.0)
702    {
703       for (i = 0; i < block_size; i++)
704       {
705          o[i*block_size + i] = i1[i*block_size + i] * i2[i*block_size + i];
706       }
707    }
708    else if (beta == 1.0)
709    {
710       for(i = 0; i < block_size; i++)
711       {
712          o[i*block_size + i] = o[i*block_size + i] + i1[i*block_size + i] * i2[i*block_size + i];
713       }
714    }
715    else
716    {
717       for(i = 0; i < block_size; i++)
718       {
719             o[i*block_size + i] = beta* o[i*block_size + i] + i1[i*block_size + i] * i2[i*block_size + i];
720       }
721    }
722    return 0;
723 }
724 
725 /*--------------------------------------------------------------------------
726  * hypre_CSRBlockMatrixBlockMultAddDiagCheckSign
727  *
728  *  only mult elements if sign*diag(i2) is negative
729  *(diag(o) = diag(i1) * diag(i2) + beta * diag(o))
730  *--------------------------------------------------------------------------*/
731 HYPRE_Int
hypre_CSRBlockMatrixBlockMultAddDiagCheckSign(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex beta,HYPRE_Complex * o,HYPRE_Int block_size,HYPRE_Real * sign)732 hypre_CSRBlockMatrixBlockMultAddDiagCheckSign(HYPRE_Complex *i1,
733                                               HYPRE_Complex *i2,
734                                               HYPRE_Complex  beta,
735                                               HYPRE_Complex *o,
736                                               HYPRE_Int      block_size,
737                                               HYPRE_Real    *sign)
738 {
739    HYPRE_Int    i;
740    HYPRE_Real tmp;
741 
742    if (beta == 0.0)
743    {
744       for (i = 0; i < block_size; i++)
745       {
746          tmp = (HYPRE_Real) i2[i*block_size+i]*sign[i];
747          if (tmp < 0)
748             o[i*block_size + i] = i1[i*block_size + i] * i2[i*block_size + i];
749       }
750    }
751    else if (beta == 1.0)
752    {
753       for(i = 0; i < block_size; i++)
754       {
755          tmp = (HYPRE_Real) i2[i*block_size+i]*sign[i];
756          if (tmp < 0)
757             o[i*block_size + i] = o[i*block_size + i] + i1[i*block_size + i] * i2[i*block_size + i];
758       }
759    }
760    else
761    {
762       for(i = 0; i < block_size; i++)
763       {
764          tmp = i2[i*block_size+i]*sign[i];
765          if (tmp < 0)
766             o[i*block_size + i] = beta* o[i*block_size + i] + i1[i*block_size + i] * i2[i*block_size + i];
767       }
768    }
769    return 0;
770 }
771 
772 /*--------------------------------------------------------------------------
773  * hypre_CSRBlockMatrixBlockMultAddDiag2 (scales cols of il by diag of i2)
774  * ((o) = (i1) * diag(i2) + beta * (o))
775  *--------------------------------------------------------------------------*/
776 HYPRE_Int
hypre_CSRBlockMatrixBlockMultAddDiag2(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex beta,HYPRE_Complex * o,HYPRE_Int block_size)777 hypre_CSRBlockMatrixBlockMultAddDiag2(HYPRE_Complex* i1,
778                                       HYPRE_Complex* i2,
779                                       HYPRE_Complex beta,
780                                       HYPRE_Complex* o,
781                                       HYPRE_Int block_size)
782 {
783    HYPRE_Int    i, j;
784 
785    if (beta == 0.0)
786    {
787       for (i = 0; i < block_size; i++)
788       {
789          for (j = 0; j < block_size; j++)
790          {
791             o[i*block_size + j] =  i1[i*block_size + j] * i2[j*block_size + j];
792 
793          }
794       }
795    }
796    else if (beta == 1.0)
797    {
798       for (i = 0; i < block_size; i++)
799       {
800          for (j = 0; j < block_size; j++)
801          {
802             o[i*block_size + j] =  o[i*block_size + j] +  i1[i*block_size + j] * i2[j*block_size + j];
803 
804          }
805       }
806 
807 
808    }
809    else
810    {
811      for (i = 0; i < block_size; i++)
812       {
813          for (j = 0; j < block_size; j++)
814          {
815             o[i*block_size + j] =  beta * o[i*block_size + j] +  i1[i*block_size + j] * i2[j*block_size + j];
816 
817          }
818       }
819    }
820    return 0;
821 }
822 
823 /*--------------------------------------------------------------------------
824  * hypre_CSRBlockMatrixBlockMultAddDiag3 (scales cols of il by i2 -
825                                           whose diag elements are row sums)
826  * ((o) = (i1) * diag(i2) + beta * (o))
827  *--------------------------------------------------------------------------*/
828 HYPRE_Int
hypre_CSRBlockMatrixBlockMultAddDiag3(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex beta,HYPRE_Complex * o,HYPRE_Int block_size)829 hypre_CSRBlockMatrixBlockMultAddDiag3(HYPRE_Complex* i1,
830                                       HYPRE_Complex* i2,
831                                       HYPRE_Complex beta,
832                                       HYPRE_Complex* o,
833                                       HYPRE_Int block_size)
834 {
835    HYPRE_Int    i, j;
836 
837    HYPRE_Complex *row_sum;
838 
839    row_sum = hypre_CTAlloc(HYPRE_Complex,  block_size, HYPRE_MEMORY_HOST);
840    for (i = 0; i < block_size; i++)
841       {
842          for (j = 0; j < block_size; j++)
843          {
844             row_sum[i] +=  i2[i*block_size + j];
845          }
846       }
847 
848    if (beta == 0.0)
849    {
850       for (i = 0; i < block_size; i++)
851       {
852          for (j = 0; j < block_size; j++)
853          {
854             o[i*block_size + j] =  i1[i*block_size + j] * row_sum[j];
855 
856          }
857       }
858    }
859    else if (beta == 1.0)
860    {
861       for (i = 0; i < block_size; i++)
862       {
863          for (j = 0; j < block_size; j++)
864          {
865             o[i*block_size + j] =  o[i*block_size + j] +  i1[i*block_size + j] * row_sum[j];
866 
867          }
868       }
869    }
870    else
871    {
872      for (i = 0; i < block_size; i++)
873       {
874          for (j = 0; j < block_size; j++)
875          {
876             o[i*block_size + j] =  beta * o[i*block_size + j] +  i1[i*block_size + j] * row_sum[j];
877 
878          }
879       }
880    }
881 
882    hypre_TFree(row_sum, HYPRE_MEMORY_HOST);
883 
884    return 0;
885 }
886 /*--------------------------------------------------------------------------
887  * hypre_CSRBlockMatrixBlockMatvec
888  * (ov = alpha* mat * v + beta * ov)
889  * mat is the matrix - size is block_size^2
890  * alpha and beta are scalars
891  *--------------------------------------------------------------------------*/
892 
893 HYPRE_Int
hypre_CSRBlockMatrixBlockMatvec(HYPRE_Complex alpha,HYPRE_Complex * mat,HYPRE_Complex * v,HYPRE_Complex beta,HYPRE_Complex * ov,HYPRE_Int block_size)894 hypre_CSRBlockMatrixBlockMatvec(HYPRE_Complex alpha,
895                                 HYPRE_Complex* mat,
896                                 HYPRE_Complex* v,
897                                 HYPRE_Complex beta,
898                                 HYPRE_Complex* ov,
899                                 HYPRE_Int block_size)
900 {
901    HYPRE_Int ierr = 0;
902 
903 #if LB_VERSION
904    {
905       HYPRE_Int one = 1;
906 
907       dgemv_("T",  &block_size, &block_size, &alpha, mat, &block_size, v,
908              &one, &beta, ov, &one);
909    }
910 
911 #else
912    {
913       HYPRE_Int    i, j;
914       HYPRE_Complex ddata;
915 
916       /* if alpha = 0, then no matvec */
917       if (alpha == 0.0)
918       {
919          for (j = 0; j < block_size; j++)
920          {
921             ov[j] *= beta;
922          }
923          return ierr;
924       }
925 
926       /* ov = (beta/alpha) * ov; */
927       ddata = beta / alpha;
928       if (ddata != 1.0)
929       {
930          if (ddata == 0.0)
931          {
932             for (j = 0; j < block_size; j++)
933             {
934                ov[j] = 0.0;
935             }
936          }
937          else
938          {
939             for (j = 0; j < block_size; j++)
940             {
941                ov[j] *= ddata;
942             }
943          }
944       }
945 
946       /* ov = ov + mat*v */
947       for (i = 0; i < block_size; i++)
948       {
949          ddata =  ov[i];
950          for (j = 0; j < block_size; j++)
951          {
952             ddata += mat[i*block_size + j] * v[j];
953          }
954          ov[i] = ddata;
955       }
956 
957       /* ov = alpha*ov */
958       if (alpha != 1.0)
959       {
960          for (j = 0; j < block_size; j++)
961          {
962             ov[j] *= alpha;
963          }
964       }
965    }
966 
967 #endif
968 
969    return ierr;
970 
971 }
972 
973 
974 /*--------------------------------------------------------------------------
975  * hypre_CSRBlockMatrixBlockInvMatvec
976  * (ov = mat^{-1} * v)
977  * o and v are vectors
978  * mat is the matrix - size is block_size^2
979  *--------------------------------------------------------------------------*/
980 HYPRE_Int
hypre_CSRBlockMatrixBlockInvMatvec(HYPRE_Complex * mat,HYPRE_Complex * v,HYPRE_Complex * ov,HYPRE_Int block_size)981 hypre_CSRBlockMatrixBlockInvMatvec(HYPRE_Complex* mat, HYPRE_Complex* v,
982                                    HYPRE_Complex* ov, HYPRE_Int block_size)
983 {
984    HYPRE_Int ierr = 0;
985    HYPRE_Complex *mat_i;
986 
987    mat_i = hypre_CTAlloc(HYPRE_Complex,  block_size*block_size, HYPRE_MEMORY_HOST);
988 
989 #if LB_VERSION
990    {
991 
992       HYPRE_Int one, info;
993       HYPRE_Int *piv;
994       HYPRE_Int sz;
995 
996 
997       one = 1;
998       piv = hypre_CTAlloc(HYPRE_Int,  block_size, HYPRE_MEMORY_HOST);
999       sz = block_size*block_size;
1000 
1001 
1002      /* copy v to ov and  mat to mat_i*/
1003 
1004       dcopy_(&sz, mat, &one, mat_i, &one);
1005       dcopy_(&block_size, v, &one, ov, &one);
1006 
1007       /* writes over mat_i with LU */
1008       dgetrf_(&block_size,&block_size, mat_i, &block_size, piv , &info);
1009       if (info)
1010       {
1011          hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1012          hypre_TFree(piv, HYPRE_MEMORY_HOST);
1013          return(-1);
1014       }
1015 
1016       /* writes over ov */
1017       dgetrs_("T", &block_size, &one ,
1018            mat_i, &block_size, piv, ov, &block_size, &info);
1019       if (info)
1020       {
1021          hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1022          hypre_TFree(piv, HYPRE_MEMORY_HOST);
1023          return(-1);
1024       }
1025 
1026       hypre_TFree(piv, HYPRE_MEMORY_HOST);
1027 
1028    }
1029 
1030 #else
1031    {
1032       HYPRE_Int m, j, k;
1033       HYPRE_Int piv_row;
1034       HYPRE_Real eps;
1035       HYPRE_Complex factor;
1036       HYPRE_Complex piv, tmp;
1037       eps = 1.0e-6;
1038 
1039       if (block_size ==1 )
1040       {
1041          if (hypre_cabs(mat[0]) > 1e-10)
1042          {
1043             ov[0] = v[0]/mat[0];
1044             hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1045             return(ierr);
1046          }
1047          else
1048          {
1049             /* hypre_printf("GE zero pivot error\n"); */
1050             hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1051             return(-1);
1052          }
1053       }
1054       else
1055       {
1056          /* copy v to ov and mat to mat_i*/
1057          for (k = 0; k < block_size; k++)
1058          {
1059             ov[k] = v[k];
1060             for (j=0; j<block_size; j++)
1061             {
1062                mat_i[k*block_size + j] =  mat[k*block_size + j];
1063             }
1064          }
1065          /* start ge  - turning m_i into U factor (don't save L - just apply to
1066             rhs - which is ov)*/
1067          /* we do partial pivoting for size */
1068 
1069          /* loop through the rows (row k) */
1070          for (k = 0; k < block_size-1; k++)
1071          {
1072             piv = mat_i[k*block_size+k];
1073             piv_row = k;
1074 
1075             /* find the largest pivot in position k*/
1076             for (j=k+1; j < block_size; j++)
1077             {
1078                if (hypre_cabs(mat_i[j*block_size+k]) > hypre_cabs(piv))
1079                {
1080                   piv =  mat_i[j*block_size+k];
1081                   piv_row = j;
1082                }
1083 
1084             }
1085             if (piv_row !=k) /* do a row exchange  - rows k and piv_row*/
1086             {
1087                for (j=0; j < block_size; j++)
1088                {
1089                   tmp = mat_i[k*block_size + j];
1090                   mat_i[k*block_size + j] = mat_i[piv_row*block_size + j];
1091                   mat_i[piv_row*block_size + j] = tmp;
1092                }
1093                tmp = ov[k];
1094                ov[k] = ov[piv_row];
1095                ov[piv_row] = tmp;
1096             }
1097             /* end of pivoting */
1098 
1099             if (hypre_cabs(piv) > eps)
1100             {
1101                /* now we can factor into U */
1102                for (j = k+1; j < block_size; j++)
1103                {
1104                   factor = mat_i[j*block_size+k]/piv;
1105                   for (m = k+1; m < block_size; m++)
1106                   {
1107                      mat_i[j*block_size+m]  -= factor * mat_i[k*block_size+m];
1108                   }
1109                   /* Elimination step for rhs */
1110                   ov[j]  -= factor * ov[k];
1111                }
1112             }
1113             else
1114             {
1115                /* hypre_printf("Block of matrix is nearly singular: zero pivot error\n");  */
1116                hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1117                return(-1);
1118             }
1119          }
1120 
1121          /* we also need to check the pivot in the last row to see if it is zero */
1122          k = block_size - 1; /* last row */
1123          if ( hypre_cabs(mat_i[k*block_size+k]) < eps)
1124          {
1125             /* hypre_printf("Block of matrix is nearly singular: zero pivot error\n");  */
1126             hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1127             return(-1);
1128          }
1129 
1130          /* Back Substitution  - do rhs (U is now in m_i1)*/
1131          for (k = block_size-1; k > 0; --k)
1132          {
1133             ov[k] /= mat_i[k*block_size+k];
1134             for (j = 0; j < k; j++)
1135             {
1136                if (mat_i[j*block_size+k] != 0.0)
1137                {
1138                   ov[j] -= ov[k] * mat_i[j*block_size+k];
1139                }
1140             }
1141          }
1142          ov[0] /= mat_i[0];
1143 
1144       }
1145 
1146    }
1147 #endif
1148 
1149 
1150    hypre_TFree(mat_i, HYPRE_MEMORY_HOST);
1151 
1152    return (ierr);
1153 }
1154 
1155 
1156 
1157 /*--------------------------------------------------------------------------
1158  * hypre_CSRBlockMatrixBlockInvMult
1159  * (o = i1^{-1} * i2)
1160  *--------------------------------------------------------------------------*/
1161 HYPRE_Int
hypre_CSRBlockMatrixBlockInvMult(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)1162 hypre_CSRBlockMatrixBlockInvMult(HYPRE_Complex* i1, HYPRE_Complex* i2, HYPRE_Complex* o, HYPRE_Int block_size)
1163 {
1164 
1165    HYPRE_Int ierr = 0;
1166    HYPRE_Int i, j;
1167    HYPRE_Complex *m_i1;
1168 
1169    m_i1 = hypre_CTAlloc(HYPRE_Complex,  block_size*block_size, HYPRE_MEMORY_HOST);
1170 
1171 #if LB_VERSION
1172    {
1173 
1174       HYPRE_Int one, info;
1175       HYPRE_Int *piv;
1176       HYPRE_Int sz;
1177 
1178       HYPRE_Complex *i2_t;
1179 
1180       one = 1;
1181       i2_t = hypre_CTAlloc(HYPRE_Complex,  block_size*block_size, HYPRE_MEMORY_HOST);
1182       piv = hypre_CTAlloc(HYPRE_Int,  block_size, HYPRE_MEMORY_HOST);
1183 
1184 
1185      /* copy i1 to m_i1*/
1186       sz = block_size*block_size;
1187       dcopy_(&sz, i1, &one, m_i1, &one);
1188 
1189 
1190       /* writes over m_i1 with LU */
1191       dgetrf_(&block_size, &block_size, m_i1, &block_size, piv , &info);
1192       if (info)
1193       {
1194          hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1195          hypre_TFree(i2_t, HYPRE_MEMORY_HOST);
1196          hypre_TFree(piv, HYPRE_MEMORY_HOST);
1197          return (-1);
1198       }
1199 
1200       /* need the transpose of i_2*/
1201      for (i=0; i < block_size; i++)
1202      {
1203         for (j=0; j< block_size; j++)
1204         {
1205            i2_t[i*block_size+j] = i2[j*block_size+i];
1206         }
1207      }
1208 
1209       /* writes over i2_t */
1210       dgetrs_("T", &block_size, &block_size,
1211            m_i1, &block_size, piv, i2_t, &block_size, &info);
1212       if (info)
1213       {
1214          hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1215          hypre_TFree(i2_t, HYPRE_MEMORY_HOST);
1216          hypre_TFree(piv, HYPRE_MEMORY_HOST);
1217          return (-1);
1218       }
1219 
1220       /* ans. is the transpose of i2_t*/
1221       for (i=0; i < block_size; i++)
1222       {
1223          for (j=0; j< block_size; j++)
1224          {
1225             o[i*block_size+j] = i2_t[j*block_size+i];
1226          }
1227       }
1228 
1229       hypre_TFree(i2_t, HYPRE_MEMORY_HOST);
1230       hypre_TFree(piv, HYPRE_MEMORY_HOST);
1231 
1232    }
1233 
1234 #else
1235    {
1236       HYPRE_Int m, k;
1237       HYPRE_Int piv_row;
1238       HYPRE_Real eps;
1239       HYPRE_Complex factor;
1240       HYPRE_Complex piv, tmp;
1241 
1242       eps = 1.0e-6;
1243 
1244       if (block_size ==1 )
1245       {
1246          if (hypre_cabs(m_i1[0]) > 1e-10)
1247          {
1248             o[0] = i2[0]/i1[0];
1249             hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1250             return(ierr);
1251          }
1252          else
1253          {
1254             /* hypre_printf("GE zero pivot error\n"); */
1255             hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1256             return(-1);
1257          }
1258       }
1259       else
1260       {
1261          /* copy i2 to o and i1 to m_i1*/
1262          for (k = 0; k < block_size*block_size; k++)
1263          {
1264             o[k] = i2[k];
1265             m_i1[k] = i1[k];
1266          }
1267 
1268 
1269          /* start ge  - turning m_i1 into U factor (don't save L - just apply to
1270             rhs - which is o)*/
1271          /* we do partial pivoting for size */
1272 
1273          /* loop through the rows (row k) */
1274          for (k = 0; k < block_size-1; k++)
1275          {
1276             piv = m_i1[k*block_size+k];
1277             piv_row = k;
1278 
1279             /* find the largest pivot in position k*/
1280             for (j=k+1; j < block_size; j++)
1281             {
1282                if (hypre_cabs(m_i1[j*block_size+k]) > hypre_cabs(piv))
1283                {
1284                   piv =  m_i1[j*block_size+k];
1285                   piv_row = j;
1286                }
1287 
1288             }
1289             if (piv_row !=k) /* do a row exchange  - rows k and piv_row*/
1290             {
1291                for (j=0; j < block_size; j++)
1292                {
1293                   tmp = m_i1[k*block_size + j];
1294                   m_i1[k*block_size + j] = m_i1[piv_row*block_size + j];
1295                   m_i1[piv_row*block_size + j] = tmp;
1296 
1297                   tmp = o[k*block_size + j];
1298                   o[k*block_size + j] = o[piv_row*block_size + j];
1299                   o[piv_row*block_size + j] = tmp;
1300 
1301                }
1302             }
1303             /* end of pivoting */
1304 
1305 
1306             if (hypre_cabs(piv) > eps)
1307             {
1308                /* now we can factor into U */
1309                for (j = k+1; j < block_size; j++)
1310                {
1311                   factor = m_i1[j*block_size+k]/piv;
1312                   for (m = k+1; m < block_size; m++)
1313                   {
1314                      m_i1[j*block_size+m]  -= factor * m_i1[k*block_size+m];
1315                   }
1316                   /* Elimination step for rhs */
1317                   /* do for each of the "rhs" */
1318                   for (i=0; i < block_size; i++)
1319                   {
1320                      /* o(row, col) = o(row*block_size + col) */
1321                      o[j*block_size+i] -= factor * o[k*block_size + i];
1322                   }
1323                }
1324             }
1325             else
1326             {
1327                /* hypre_printf("Block of matrix is nearly singular: zero pivot error\n"); */
1328                hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1329                return(-1);
1330             }
1331          }
1332 
1333 
1334          /* we also need to check the pivot in the last row to see if it is zero */
1335          k = block_size - 1; /* last row */
1336          if ( hypre_cabs(m_i1[k*block_size+k]) < eps)
1337          {
1338             /* hypre_printf("Block of matrix is nearly singular: zero pivot error\n"); */
1339             hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1340             return(-1);
1341          }
1342 
1343 
1344          /* Back Substitution  - do for each "rhs" (U is now in m_i1)*/
1345          for (i=0; i < block_size; i++)
1346          {
1347             for (k = block_size-1; k > 0; --k)
1348             {
1349                o[k*block_size + i] /= m_i1[k*block_size+k];
1350                for (j = 0; j < k; j++)
1351                {
1352                   if (m_i1[j*block_size+k] != 0.0)
1353                   {
1354                      o[j*block_size + i] -= o[k*block_size + i] * m_i1[j*block_size+k];
1355                   }
1356                }
1357             }
1358             o[0*block_size + i] /= m_i1[0];
1359          }
1360       }
1361    }
1362 
1363 #endif
1364    hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1365 
1366    return ierr;
1367 }
1368 
1369 
1370 /*--------------------------------------------------------------------------
1371  * hypre_CSRBlockMatrixBlockMultInv
1372  * (o = i2*il^(-1))
1373  *--------------------------------------------------------------------------*/
1374 HYPRE_Int
hypre_CSRBlockMatrixBlockMultInv(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)1375 hypre_CSRBlockMatrixBlockMultInv(HYPRE_Complex* i1, HYPRE_Complex* i2, HYPRE_Complex* o, HYPRE_Int block_size)
1376 {
1377 
1378    HYPRE_Int ierr = 0;
1379 
1380 
1381 #if LB_VERSION
1382 
1383    {
1384      /* same as solving A^T C^T = B^T */
1385       HYPRE_Complex *m_i1;
1386       HYPRE_Int info;
1387       HYPRE_Int *piv;
1388       HYPRE_Int sz, one;
1389 
1390 
1391       piv = hypre_CTAlloc(HYPRE_Int,  block_size, HYPRE_MEMORY_HOST);
1392       m_i1 = hypre_CTAlloc(HYPRE_Complex,  block_size*block_size, HYPRE_MEMORY_HOST);
1393       one = 1;
1394       sz = block_size*block_size;
1395 
1396      /* copy i1 to m_i1 and i2 to o*/
1397 
1398       dcopy_(&sz, i1, &one, m_i1, &one);
1399       dcopy_(&sz, i2, &one, o, &one);
1400 
1401       /* writes over m_i1 with LU */
1402       dgetrf_(&block_size, &block_size, m_i1, &block_size, piv , &info);
1403       if (info)
1404       {
1405          hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1406          hypre_TFree(piv, HYPRE_MEMORY_HOST);
1407          return (-1);
1408       }
1409      /* writes over B */
1410       dgetrs_("N", &block_size, &block_size,
1411            m_i1, &block_size, piv, o, &block_size, &info);
1412       if (info)
1413       {
1414          hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1415          hypre_TFree(piv, HYPRE_MEMORY_HOST);
1416          return (-1);
1417       }
1418 
1419       hypre_TFree(m_i1, HYPRE_MEMORY_HOST);
1420       hypre_TFree(piv, HYPRE_MEMORY_HOST);
1421    }
1422 
1423 #else
1424    {
1425       HYPRE_Real     eps;
1426       HYPRE_Complex *i1_t, *i2_t, *o_t;
1427 
1428       eps = 1.0e-12;
1429 
1430       if (block_size ==1 )
1431       {
1432          if (hypre_cabs(i1[0]) > eps)
1433          {
1434             o[0] = i2[0]/i1[0];
1435             return(ierr);
1436          }
1437          else
1438          {
1439             /* hypre_printf("GE zero pivot error\n"); */
1440             return(-1);
1441          }
1442       }
1443       else
1444       {
1445 
1446          i1_t = hypre_CTAlloc(HYPRE_Complex,  block_size*block_size, HYPRE_MEMORY_HOST);
1447          i2_t = hypre_CTAlloc(HYPRE_Complex,  block_size*block_size, HYPRE_MEMORY_HOST);
1448          o_t = hypre_CTAlloc(HYPRE_Complex,  block_size*block_size, HYPRE_MEMORY_HOST);
1449 
1450          /* TO DO:: this could be done more efficiently! */
1451          hypre_CSRBlockMatrixBlockTranspose(i1, i1_t, block_size);
1452          hypre_CSRBlockMatrixBlockTranspose(i2, i2_t, block_size);
1453          ierr = hypre_CSRBlockMatrixBlockInvMult(i1_t, i2_t, o_t, block_size);
1454 
1455          if (!ierr) hypre_CSRBlockMatrixBlockTranspose(o_t, o, block_size);
1456 
1457          hypre_TFree(i1_t, HYPRE_MEMORY_HOST);
1458          hypre_TFree(i2_t, HYPRE_MEMORY_HOST);
1459          hypre_TFree(o_t, HYPRE_MEMORY_HOST);
1460 
1461       }
1462    }
1463 
1464 #endif
1465    return (ierr);
1466 }
1467 
1468 
1469 
1470 /*--------------------------------------------------------------------------
1471  * hypre_CSRBlockMatrixBlockInvMultDiag - zeros off-d entires
1472  * (o = diag(i1)^{-1} * diag(i2))
1473  *--------------------------------------------------------------------------*/
1474 HYPRE_Int
hypre_CSRBlockMatrixBlockInvMultDiag(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)1475 hypre_CSRBlockMatrixBlockInvMultDiag(HYPRE_Complex* i1, HYPRE_Complex* i2, HYPRE_Complex* o, HYPRE_Int block_size)
1476 {
1477 
1478    HYPRE_Int  ierr = 0;
1479    HYPRE_Int  i;
1480    HYPRE_Int  sz = block_size*block_size;
1481    HYPRE_Real eps = 1.0e-8;
1482 
1483    for (i = 0; i < sz; i++)
1484       o[i] = 0.0;
1485 
1486    for (i = 0; i < block_size; i++)
1487    {
1488       if (hypre_cabs(i1[i*block_size + i]) > eps)
1489       {
1490          o[i*block_size + i] = i2[i*block_size + i] / i1[i*block_size + i];
1491       }
1492       else
1493       {
1494          /* hypre_printf("GE zero pivot error\n"); */
1495          return(-1);
1496       }
1497    }
1498 
1499    return (ierr);
1500 }
1501 
1502 /*--------------------------------------------------------------------------
1503  * hypre_CSRBlockMatrixBlockInvMultDiag2
1504  * (o = (i1)* diag(i2)^-1) - so this scales the cols of il by
1505                              the diag entries in i2
1506  *--------------------------------------------------------------------------*/
1507 HYPRE_Int
hypre_CSRBlockMatrixBlockInvMultDiag2(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)1508 hypre_CSRBlockMatrixBlockInvMultDiag2(HYPRE_Complex* i1, HYPRE_Complex* i2, HYPRE_Complex* o, HYPRE_Int block_size)
1509 {
1510 
1511    HYPRE_Int ierr = 0;
1512    HYPRE_Int i, j;
1513 
1514    HYPRE_Real    eps = 1.0e-8;
1515    HYPRE_Complex tmp;
1516 
1517    for (i = 0; i < block_size; i++)
1518    {
1519       if (hypre_cabs(i2[i*block_size + i]) > eps)
1520       {
1521          tmp = 1 / i2[i*block_size + i];
1522       }
1523       else
1524       {
1525          tmp = 1.0;
1526       }
1527       for (j=0; j< block_size; j++)  /* this should be re-written to access by row (not col)! */
1528       {
1529          o[j*block_size + i] = i1[j*block_size + i] *tmp;
1530       }
1531    }
1532 
1533    return (ierr);
1534 }
1535 
1536 
1537 /*--------------------------------------------------------------------------
1538  * hypre_CSRBlockMatrixBlockInvMultDiag3
1539  * (o = (i1)* diag(i2)^-1) - so this scales the cols of il by
1540                              the i2 whose diags are row sums
1541  *--------------------------------------------------------------------------*/
1542 HYPRE_Int
hypre_CSRBlockMatrixBlockInvMultDiag3(HYPRE_Complex * i1,HYPRE_Complex * i2,HYPRE_Complex * o,HYPRE_Int block_size)1543 hypre_CSRBlockMatrixBlockInvMultDiag3(HYPRE_Complex* i1, HYPRE_Complex* i2, HYPRE_Complex* o, HYPRE_Int block_size)
1544 {
1545 
1546    HYPRE_Int ierr = 0;
1547    HYPRE_Int i, j;
1548    HYPRE_Real    eps = 1.0e-8;
1549    HYPRE_Complex tmp, row_sum;
1550 
1551    for (i = 0; i < block_size; i++)
1552    {
1553       /* get row sum of i2, row i */
1554       row_sum = 0.0;
1555       for (j=0; j< block_size; j++)
1556       {
1557          row_sum += i2[i*block_size + j];
1558       }
1559 
1560       /* invert */
1561       if (hypre_cabs(row_sum) > eps)
1562       {
1563          tmp = 1 / row_sum;
1564       }
1565       else
1566       {
1567          tmp = 1.0;
1568       }
1569       /* scale col of i1 */
1570       for (j=0; j< block_size; j++)  /* this should be re-written to access by row (not col)! */
1571       {
1572          o[j*block_size + i] = i1[j*block_size + i] *tmp;
1573       }
1574    }
1575 
1576    return (ierr);
1577 }
1578 
1579 
1580 
1581 
1582 /*--------------------------------------------------------------------------
1583  * hypre_CSRBlockMatrixTranspose
1584  *--------------------------------------------------------------------------*/
1585 
hypre_CSRBlockMatrixTranspose(hypre_CSRBlockMatrix * A,hypre_CSRBlockMatrix ** AT,HYPRE_Int data)1586 HYPRE_Int hypre_CSRBlockMatrixTranspose(hypre_CSRBlockMatrix *A,
1587                                   hypre_CSRBlockMatrix **AT, HYPRE_Int data)
1588 
1589 {
1590    HYPRE_Complex       *A_data = hypre_CSRBlockMatrixData(A);
1591    HYPRE_Int          *A_i = hypre_CSRBlockMatrixI(A);
1592    HYPRE_Int          *A_j = hypre_CSRBlockMatrixJ(A);
1593    HYPRE_Int           num_rowsA = hypre_CSRBlockMatrixNumRows(A);
1594    HYPRE_Int           num_colsA = hypre_CSRBlockMatrixNumCols(A);
1595    HYPRE_Int           num_nonzerosA = hypre_CSRBlockMatrixNumNonzeros(A);
1596    HYPRE_Int           block_size = hypre_CSRBlockMatrixBlockSize(A);
1597 
1598    HYPRE_Complex       *AT_data;
1599    HYPRE_Int          *AT_i;
1600    HYPRE_Int          *AT_j;
1601    HYPRE_Int           num_rowsAT;
1602    HYPRE_Int           num_colsAT;
1603    HYPRE_Int           num_nonzerosAT;
1604 
1605    HYPRE_Int           max_col;
1606    HYPRE_Int           i, j, k, m, offset, bnnz;
1607 
1608    /*--------------------------------------------------------------
1609     * First, ascertain that num_cols and num_nonzeros has been set.
1610     * If not, set them.
1611     *--------------------------------------------------------------*/
1612 
1613    if (! num_nonzerosA) num_nonzerosA = A_i[num_rowsA];
1614    if (num_rowsA && ! num_colsA)
1615    {
1616       max_col = -1;
1617       for (i = 0; i < num_rowsA; ++i)
1618          for (j = A_i[i]; j < A_i[i+1]; j++)
1619             if (A_j[j] > max_col) max_col = A_j[j];
1620       num_colsA = max_col+1;
1621    }
1622    num_rowsAT = num_colsA;
1623    num_colsAT = num_rowsA;
1624    num_nonzerosAT = num_nonzerosA;
1625    bnnz = block_size * block_size;
1626 
1627    *AT = hypre_CSRBlockMatrixCreate(block_size, num_rowsAT, num_colsAT,
1628                                     num_nonzerosAT);
1629 
1630    AT_i = hypre_CTAlloc(HYPRE_Int,  num_rowsAT+1, HYPRE_MEMORY_HOST);
1631    AT_j = hypre_CTAlloc(HYPRE_Int,  num_nonzerosAT, HYPRE_MEMORY_HOST);
1632    hypre_CSRBlockMatrixI(*AT) = AT_i;
1633    hypre_CSRBlockMatrixJ(*AT) = AT_j;
1634    if (data)
1635    {
1636       AT_data = hypre_CTAlloc(HYPRE_Complex,  num_nonzerosAT*bnnz, HYPRE_MEMORY_HOST);
1637       hypre_CSRBlockMatrixData(*AT) = AT_data;
1638    }
1639 
1640    /*-----------------------------------------------------------------
1641     * Count the number of entries in each column of A (row of AT)
1642     * and fill the AT_i array.
1643     *-----------------------------------------------------------------*/
1644 
1645    for (i = 0; i < num_nonzerosA; i++) ++AT_i[A_j[i]+1];
1646    for (i = 2; i <= num_rowsAT; i++) AT_i[i] += AT_i[i-1];
1647 
1648    /*----------------------------------------------------------------
1649     * Load the data and column numbers of AT
1650     *----------------------------------------------------------------*/
1651 
1652    for (i = 0; i < num_rowsA; i++)
1653    {
1654       for (j = A_i[i]; j < A_i[i+1]; j++)
1655       {
1656          AT_j[AT_i[A_j[j]]] = i;
1657          if (data)
1658          {
1659             offset = AT_i[A_j[j]] * bnnz;
1660             for (k = 0; k < block_size; k++)
1661                for (m = 0; m < block_size; m++)
1662                   AT_data[offset+k*block_size+m] =
1663                        A_data[j*bnnz+m*block_size+k];
1664          }
1665          AT_i[A_j[j]]++;
1666       }
1667    }
1668 
1669    /*------------------------------------------------------------
1670     * AT_i[j] now points to the *end* of the jth row of entries
1671     * instead of the beginning.  Restore AT_i to front of row.
1672     *------------------------------------------------------------*/
1673 
1674    for (i = num_rowsAT; i > 0; i--) AT_i[i] = AT_i[i-1];
1675    AT_i[0] = 0;
1676 
1677    return(0);
1678 }
1679 
1680