1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
5 1. Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7 2. Redistributions in binary form must reproduce the above copyright
8 notice, this list of conditions and the following disclaimer in the
9 documentation and/or other materials provided with the distribution.
10 3. Neither the name of the project nor the names of its contributors
11 may be used to endorse or promote products derived from this software
12 without specific prior written permission.
13
14 THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18 PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 #include "lis_config_win.h"
32 #endif
33 #endif
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #ifdef HAVE_MALLOC_H
38 #include <malloc.h>
39 #endif
40 #include <string.h>
41 #include <stdarg.h>
42 #include <math.h>
43 #ifdef _OPENMP
44 #include <omp.h>
45 #endif
46 #ifdef USE_MPI
47 #include <mpi.h>
48 #endif
49 #include "lislib.h"
50
51 /************************************************
52 * lis_matrix_set
53 * lis_matrix_malloc
54 * lis_matrix_copy
55 * lis_matrix_convert
56 * lis_matrix_get_diagonal
57 * lis_matrix_shift_diagonal
58 * lis_matrix_scale
59 * lis_matrix_scale_symm
60 * lis_matrix_normf
61 * lis_matrix_transpose
62 ************************************************/
63
64 #undef __FUNC__
65 #define __FUNC__ "lis_matrix_set_bsc"
lis_matrix_set_bsc(LIS_INT bnr,LIS_INT bnc,LIS_INT bnnz,LIS_INT * bptr,LIS_INT * bindex,LIS_SCALAR * value,LIS_MATRIX A)66 LIS_INT lis_matrix_set_bsc(LIS_INT bnr, LIS_INT bnc, LIS_INT bnnz, LIS_INT *bptr, LIS_INT *bindex, LIS_SCALAR *value, LIS_MATRIX A)
67 {
68 LIS_INT err;
69
70 LIS_DEBUG_FUNC_IN;
71
72 #if 0
73 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
74 if( err ) return err;
75 #else
76 if(lis_matrix_is_assembled(A)) {
77 LIS_DEBUG_FUNC_OUT;
78 return LIS_SUCCESS;
79 }
80 else {
81 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
82 if( err ) return err;
83 }
84 #endif
85
86 A->bptr = bptr;
87 A->bindex = bindex;
88 A->value = value;
89 A->is_copy = LIS_FALSE;
90 A->status = -LIS_MATRIX_BSC;
91 A->is_block = LIS_TRUE;
92 A->bnnz = bnnz;
93 A->nr = (A->n-1)/bnr+1;
94 A->nc = (A->gn-1)/bnc+1;
95 if( A->n==A->np )
96 {
97 A->nc = 1 + (A->n - 1)/bnc;
98 A->pad = (bnc - A->n%bnc)%bnc;
99 }
100 else
101 {
102 A->nc = 2 + (A->n - 1)/bnc + (A->np - A->n - 1)/bnc;
103 A->pad = (bnc - A->n%bnc)%bnc + (bnc - (A->np-A->n)%bnc)%bnc;
104 }
105 A->bnr = bnr;
106 A->bnc = bnc;
107
108 LIS_DEBUG_FUNC_OUT;
109 return LIS_SUCCESS;
110 }
111
112 #undef __FUNC__
113 #define __FUNC__ "lis_matrix_setDLU_bsc"
lis_matrix_setDLU_bsc(LIS_INT bnr,LIS_INT bnc,LIS_INT lbnnz,LIS_INT ubnnz,LIS_MATRIX_DIAG D,LIS_INT * lbptr,LIS_INT * lbindex,LIS_SCALAR * lvalue,LIS_INT * ubptr,LIS_INT * ubindex,LIS_SCALAR * uvalue,LIS_MATRIX A)114 LIS_INT lis_matrix_setDLU_bsc(LIS_INT bnr, LIS_INT bnc, LIS_INT lbnnz, LIS_INT ubnnz, LIS_MATRIX_DIAG D, LIS_INT *lbptr, LIS_INT *lbindex, LIS_SCALAR *lvalue, LIS_INT *ubptr, LIS_INT *ubindex, LIS_SCALAR *uvalue, LIS_MATRIX A)
115 {
116 LIS_INT err;
117
118 LIS_DEBUG_FUNC_IN;
119
120 #if 0
121 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
122 if( err ) return err;
123 #else
124 if(lis_matrix_is_assembled(A)) return LIS_SUCCESS;
125 else {
126 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
127 if( err ) return err;
128 }
129 #endif
130
131 A->L = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT),"lis_matrix_setDLU_bsc::A->L");
132 if( A->L==NULL )
133 {
134 LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT));
135 return LIS_OUT_OF_MEMORY;
136 }
137 A->U = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT),"lis_matrix_setDLU_bsc::A->U");
138 if( A->U==NULL )
139 {
140 LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT));
141 lis_matrix_DLU_destroy(A);
142 return LIS_OUT_OF_MEMORY;
143 }
144
145 A->D = D;
146 A->L->bnnz = lbnnz;
147 A->L->bptr = lbptr;
148 A->L->bindex = lbindex;
149 A->L->value = lvalue;
150 A->U->bnnz = ubnnz;
151 A->U->bptr = ubptr;
152 A->U->bindex = ubindex;
153 A->U->value = uvalue;
154 A->is_copy = LIS_FALSE;
155 A->status = -LIS_MATRIX_BSC;
156 A->is_splited = LIS_TRUE;
157 A->is_block = LIS_TRUE;
158 A->nr = (A->n-1)/bnr+1;
159 A->nc = (A->gn-1)/bnc+1;
160 if( A->n==A->np )
161 {
162 A->nc = 1 + (A->n - 1)/bnc;
163 A->pad = (bnc - A->n%bnc)%bnc;
164 }
165 else
166 {
167 A->nc = 2 + (A->n - 1)/bnc + (A->np - A->n - 1)/bnc;
168 A->pad = (bnc - A->n%bnc)%bnc + (bnc - (A->np-A->n)%bnc)%bnc;
169 }
170 A->bnr = bnr;
171 A->bnc = bnc;
172
173 LIS_DEBUG_FUNC_OUT;
174 return LIS_SUCCESS;
175 }
176
177 #undef __FUNC__
178 #define __FUNC__ "lis_matrix_malloc_bsc"
lis_matrix_malloc_bsc(LIS_INT n,LIS_INT bnr,LIS_INT bnc,LIS_INT bnnz,LIS_INT ** bptr,LIS_INT ** bindex,LIS_SCALAR ** value)179 LIS_INT lis_matrix_malloc_bsc(LIS_INT n, LIS_INT bnr, LIS_INT bnc, LIS_INT bnnz, LIS_INT **bptr, LIS_INT **bindex, LIS_SCALAR **value)
180 {
181 LIS_INT nc;
182
183 LIS_DEBUG_FUNC_IN;
184
185 nc = 1 + (n -1)/bnc;
186 *bptr = NULL;
187 *bindex = NULL;
188 *value = NULL;
189
190 *bptr = (LIS_INT *)lis_malloc( (nc+1)*sizeof(LIS_INT),"lis_matrix_malloc_bsc::bptr" );
191 if( *bptr==NULL )
192 {
193 LIS_SETERR_MEM((nc+1)*sizeof(LIS_INT));
194 lis_free2(3,*bptr,*bindex,*value);
195 return LIS_FAILS;
196 }
197 *bindex = (LIS_INT *)lis_malloc( bnnz*sizeof(LIS_INT),"lis_matrix_malloc_bsc::bindex" );
198 if( *bindex==NULL )
199 {
200 LIS_SETERR_MEM(bnnz*sizeof(LIS_INT));
201 lis_free2(3,*bptr,*bindex,*value);
202 return LIS_OUT_OF_MEMORY;
203 }
204 *value = (LIS_SCALAR *)lis_malloc( bnnz*bnr*bnc*sizeof(LIS_SCALAR),"lis_matrix_malloc_bsc::value" );
205 if( *value==NULL )
206 {
207 LIS_SETERR_MEM(bnnz*bnr*bnc*sizeof(LIS_SCALAR));
208 lis_free2(3,*bptr,*bindex,*value);
209 return LIS_OUT_OF_MEMORY;
210 }
211 LIS_DEBUG_FUNC_OUT;
212 return LIS_SUCCESS;
213 }
214
215 #undef __FUNC__
216 #define __FUNC__ "lis_matrix_elements_copy_bsc"
lis_matrix_elements_copy_bsc(LIS_INT n,LIS_INT bnr,LIS_INT bnc,LIS_INT bnnz,LIS_INT * ptr,LIS_INT * index,LIS_SCALAR * value,LIS_INT * o_ptr,LIS_INT * o_index,LIS_SCALAR * o_value)217 LIS_INT lis_matrix_elements_copy_bsc(LIS_INT n, LIS_INT bnr, LIS_INT bnc, LIS_INT bnnz, LIS_INT *ptr, LIS_INT *index, LIS_SCALAR *value, LIS_INT *o_ptr, LIS_INT *o_index, LIS_SCALAR *o_value)
218 {
219 LIS_INT i,j,k;
220 LIS_INT nc,bs;
221
222 LIS_DEBUG_FUNC_IN;
223
224 nc = 1 + (n - 1)/bnc;
225 bs = bnr*bnc;
226 #ifdef _OPENMP
227 #pragma omp parallel private(i,j,k)
228 #endif
229 {
230 #ifdef _OPENMP
231 #pragma omp for
232 #endif
233 for(i=0;i<nc+1;i++)
234 {
235 o_ptr[i] = ptr[i];
236 }
237 #ifdef _OPENMP
238 #pragma omp for
239 #endif
240 for(i=0;i<nc;i++)
241 {
242 for(j=ptr[i];j<ptr[i+1];j++)
243 {
244 for(k=0;k<bs;k++)
245 {
246 o_value[j*bs+k] = value[j*bs+k];
247 }
248 o_index[j] = index[j];
249 }
250 }
251 }
252
253 LIS_DEBUG_FUNC_OUT;
254 return LIS_SUCCESS;
255 }
256
257 #undef __FUNC__
258 #define __FUNC__ "lis_matrix_copy_bsc"
lis_matrix_copy_bsc(LIS_MATRIX Ain,LIS_MATRIX Aout)259 LIS_INT lis_matrix_copy_bsc(LIS_MATRIX Ain, LIS_MATRIX Aout)
260 {
261 LIS_INT err;
262 LIS_INT np,bnnz,bnr,bnc;
263 LIS_INT lbnnz,ubnnz;
264 LIS_INT *bptr,*bindex;
265 LIS_INT *lbptr,*lbindex;
266 LIS_INT *ubptr,*ubindex;
267 LIS_SCALAR *value,*lvalue,*uvalue;
268 LIS_MATRIX_DIAG D;
269
270 LIS_DEBUG_FUNC_IN;
271
272 np = Ain->np;
273 bnnz = Ain->bnnz;
274 bnr = Ain->bnr;
275 bnc = Ain->bnc;
276
277 if( Ain->is_splited )
278 {
279 lbnnz = Ain->L->bnnz;
280 ubnnz = Ain->U->bnnz;
281 lbptr = NULL;
282 lbindex = NULL;
283 lvalue = NULL;
284 ubptr = NULL;
285 ubindex = NULL;
286 uvalue = NULL;
287 D = NULL;
288
289 err = lis_matrix_malloc_bsc(np,bnr,bnc,lbnnz,&lbptr,&lbindex,&lvalue);
290 if( err )
291 {
292 return err;
293 }
294 err = lis_matrix_malloc_bsc(np,bnr,bnc,ubnnz,&ubptr,&ubindex,&uvalue);
295 if( err )
296 {
297 lis_free2(6,ubptr,lbptr,ubindex,lbindex,uvalue,lvalue);
298 return err;
299 }
300 err = lis_matrix_diag_duplicateM(Ain,&D);
301 if( err )
302 {
303 lis_free2(6,ubptr,lbptr,ubindex,lbindex,uvalue,lvalue);
304 return err;
305 }
306
307 lis_matrix_diag_copy(Ain->D,D);
308 lis_matrix_elements_copy_bsc(np,bnr,bnc,lbnnz,Ain->L->bptr,Ain->L->bindex,Ain->L->value,lbptr,lbindex,lvalue);
309 lis_matrix_elements_copy_bsc(np,bnr,bnc,ubnnz,Ain->U->bptr,Ain->U->bindex,Ain->U->value,ubptr,ubindex,uvalue);
310
311 err = lis_matrix_setDLU_bsc(bnr,bnc,lbnnz,ubnnz,D,lbptr,lbindex,lvalue,ubptr,ubindex,uvalue,Aout);
312 if( err )
313 {
314 lis_free2(6,ubptr,lbptr,ubindex,lbindex,uvalue,lvalue);
315 return err;
316 }
317 }
318 if( !Ain->is_splited || (Ain->is_splited && Ain->is_save) )
319 {
320 bptr = NULL;
321 bindex = NULL;
322 value = NULL;
323
324 err = lis_matrix_malloc_bsc(np,bnr,bnc,bnnz,&bptr,&bindex,&value);
325 if( err )
326 {
327 return err;
328 }
329
330 lis_matrix_elements_copy_bsc(np,bnr,bnc,bnnz,Ain->bptr,Ain->bindex,Ain->value,bptr,bindex,value);
331
332 err = lis_matrix_set_bsc(bnr,bnc,bnnz,bptr,bindex,value,Aout);
333 if( err )
334 {
335 lis_free2(3,bptr,bindex,value);
336 return err;
337 }
338 }
339
340 err = lis_matrix_assemble(Aout);
341 if( err )
342 {
343 lis_matrix_storage_destroy(Aout);
344 return err;
345 }
346 LIS_DEBUG_FUNC_OUT;
347 return LIS_SUCCESS;
348 }
349
350 #undef __FUNC__
351 #define __FUNC__ "lis_matrix_convert_csr2bsc"
lis_matrix_convert_csc2bsc(LIS_MATRIX Ain,LIS_MATRIX Aout)352 LIS_INT lis_matrix_convert_csc2bsc(LIS_MATRIX Ain, LIS_MATRIX Aout)
353 {
354 LIS_INT i,j,k,n,bnr,bnc;
355 LIS_INT ii,jj,kk,pad;
356 LIS_INT bnnz,bj,nr,nc,jpos,nnz,ij,kv,bi;
357 LIS_INT err;
358 LIS_INT np,nprocs,my_rank;
359 LIS_INT *iw,*iw2;
360 LIS_INT *bptr,*bindex;
361 LIS_SCALAR *value;
362
363 LIS_DEBUG_FUNC_IN;
364
365 bnr = Aout->conv_bnr;
366 bnc = Aout->conv_bnc;
367
368 n = Ain->n;
369 np = Ain->np;
370 nr = 1 + (n - 1)/bnr;
371 pad = (bnc - n%bnc)%bnc;
372 if( n==np )
373 {
374 nc = 1 + (n - 1)/bnc;
375 }
376 else
377 {
378 nc = 2 + (n - 1)/bnc + (np - n - 1)/bnc;
379 }
380
381 bptr = NULL;
382 bindex = NULL;
383 value = NULL;
384 iw = NULL;
385 iw2 = NULL;
386
387 bptr = (LIS_INT *)lis_malloc( (nc+1)*sizeof(LIS_INT),"lis_matrix_convert_csc2bsc::bptr" );
388 if( bptr==NULL )
389 {
390 LIS_SETERR_MEM((nc+1)*sizeof(LIS_INT));
391 lis_free2(5,bptr,bindex,value,iw,iw2);
392 return LIS_OUT_OF_MEMORY;
393 }
394
395 #ifdef _OPENMP
396 nprocs = omp_get_max_threads();
397 #else
398 nprocs = 1;
399 #endif
400 iw = (LIS_INT *)lis_malloc( nprocs*nr*sizeof(LIS_INT),"lis_matrix_convert_csc2bsc::iw" );
401 iw2 = (LIS_INT *)lis_malloc( nprocs*nr*sizeof(LIS_INT),"lis_matrix_convert_csc2bsc::iw2" );
402
403 #ifdef _OPENMP
404 #pragma omp parallel private(i,k,ii,j,bj,kk,ij,jj,kv,jpos,my_rank)
405 #endif
406 {
407 #ifdef _OPENMP
408 my_rank = omp_get_thread_num();
409 #else
410 my_rank = 0;
411 #endif
412 memset(&iw[my_rank*nr],0,nr*sizeof(LIS_INT));
413
414 #ifdef _OPENMP
415 #pragma omp for
416 #endif
417 for(i=0;i<nc;i++)
418 {
419 k = 0;
420 kk = bnc*i;
421 jj = 0;
422 #ifdef USE_MPI
423 for(ii=0;ii+kk<np&&ii<bnc;ii++)
424 {
425 for(j=Ain->ptr[kk+ii];j<Ain->ptr[kk+ii+1];j++)
426 {
427 bj = Ain->index[j]/bnr;
428 jpos = iw[my_rank*nr + bj];
429 if( jpos==0 )
430 {
431 iw[my_rank*nr + bj] = 1;
432 iw2[my_rank*nr + jj] = bj;
433 jj++;
434 }
435 }
436 }
437 #else
438 for(ii=0;ii+kk<np&&ii<bnc;ii++)
439 {
440 for(j=Ain->ptr[kk+ii];j<Ain->ptr[kk+ii+1];j++)
441 {
442 bj = Ain->index[j]/bnr;
443 jpos = iw[my_rank*nr + bj];
444 if( jpos==0 )
445 {
446 iw[my_rank*nr + bj] = 1;
447 iw2[my_rank*nr + jj] = bj;
448 jj++;
449 }
450 }
451 }
452 #endif
453 for(bj=0;bj<jj;bj++)
454 {
455 k++;
456 ii = iw2[my_rank*nr + bj];
457 iw[my_rank*nr + ii]=0;
458 }
459 bptr[i+1] = k;
460 }
461 }
462
463 bptr[0] = 0;
464 for(i=0;i<nc;i++)
465 {
466 bptr[i+1] += bptr[i];
467 }
468 bnnz = bptr[nc];
469 nnz = bnnz*bnr*bnc;
470
471 bindex = (LIS_INT *)lis_malloc( bnnz*sizeof(LIS_INT),"lis_matrix_convert_csc2bsc::bindex" );
472 if( bindex==NULL )
473 {
474 LIS_SETERR_MEM((nr+1)*sizeof(LIS_INT));
475 lis_free2(5,bptr,bindex,value,iw,iw2);
476 return LIS_OUT_OF_MEMORY;
477 }
478 value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_convert_csc2bsc::value" );
479 if( value==NULL )
480 {
481 LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR));
482 lis_free2(5,bptr,bindex,value,iw,iw2);
483 return LIS_OUT_OF_MEMORY;
484 }
485
486 /* convert bsc */
487 #ifdef _OPENMP
488 #pragma omp parallel private(bi,i,ii,k,j,bj,jpos,kv,kk,ij,jj,my_rank)
489 #endif
490 {
491 #ifdef _OPENMP
492 my_rank = omp_get_thread_num();
493 #else
494 my_rank = 0;
495 #endif
496 memset(&iw[my_rank*nr],0,nr*sizeof(LIS_INT));
497
498 #ifdef _OPENMP
499 #pragma omp for
500 #endif
501 for(bi=0;bi<nc;bi++)
502 {
503 i = bi*bnc;
504 ii = 0;
505 kk = bptr[bi];
506 while( i+ii<np && ii<=bnc-1 )
507 {
508 for( k=Ain->ptr[i+ii];k<Ain->ptr[i+ii+1];k++)
509 {
510 j = Ain->index[k];
511 bj = j/bnr;
512 j = j%bnr;
513 jpos = iw[my_rank*nr + bj];
514 if( jpos==0 )
515 {
516 kv = kk * bnr * bnc;
517 iw[my_rank*nr + bj] = kv+1;
518 bindex[kk] = bj;
519 for(jj=0;jj<bnr*bnc;jj++) value[kv+jj] = 0.0;
520 ij = j + ii*bnc;
521 value[kv+ij] = Ain->value[k];
522 kk = kk+1;
523 }
524 else
525 {
526 ij = j + ii*bnc;
527 value[jpos+ij-1] = Ain->value[k];
528 }
529 }
530 ii = ii+1;
531 }
532 for(j=bptr[bi];j<bptr[bi+1];j++)
533 {
534 iw[my_rank*nr + bindex[j]] = 0;
535 }
536 }
537 }
538 lis_free2(2,iw,iw2);
539
540 err = lis_matrix_set_bsc(bnr,bnc,bnnz,bptr,bindex,value,Aout);
541 if( err )
542 {
543 lis_free2(3,bptr,bindex,value);
544 return err;
545 }
546 Aout->pad_comm = pad;
547 err = lis_matrix_assemble(Aout);
548 if( err )
549 {
550 lis_matrix_storage_destroy(Aout);
551 return err;
552 }
553 #ifdef USE_MPI
554 Aout->commtable->pad = pad;
555 MPI_Barrier(Ain->comm);
556 #endif
557 LIS_DEBUG_FUNC_OUT;
558 return LIS_SUCCESS;
559 }
560
561 #undef __FUNC__
562 #define __FUNC__ "lis_matrix_convert_bsc2csr"
lis_matrix_convert_bsc2csr(LIS_MATRIX Ain,LIS_MATRIX Aout)563 LIS_INT lis_matrix_convert_bsc2csr(LIS_MATRIX Ain, LIS_MATRIX Aout)
564 {
565 LIS_INT i,j,k,l;
566 LIS_INT nc,bnr,bnc,bs,bi,bj;
567 LIS_INT err;
568 LIS_INT n,nnz,nprocs,my_rank;
569 LIS_INT *iw,*ptr,*index;
570 LIS_SCALAR *value;
571
572 LIS_DEBUG_FUNC_IN;
573
574 n = Ain->n;
575 nc = Ain->nc;
576 bnr = Ain->bnr;
577 bnc = Ain->bnc;
578 bs = bnr*bnc;
579 #ifdef _OPENMP
580 nprocs = omp_get_max_threads();
581 #else
582 nprocs = 1;
583 #endif
584
585
586 iw = NULL;
587 ptr = NULL;
588 index = NULL;
589 value = NULL;
590
591 iw = (LIS_INT *)lis_malloc( nprocs*n*sizeof(LIS_INT),"lis_matrix_convert_bsc2csr::iw" );
592 if( iw==NULL )
593 {
594 LIS_SETERR_MEM(nprocs*n*sizeof(LIS_INT));
595 return LIS_OUT_OF_MEMORY;
596 }
597 ptr = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_matrix_convert_bsc2csr::ptr" );
598 if( ptr==NULL )
599 {
600 LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
601 lis_free2(4,ptr,index,value,iw);
602 return LIS_OUT_OF_MEMORY;
603 }
604
605 /* check nnz */
606 #ifdef _OPENMP
607 #pragma omp parallel private(i,j,bi,bj,my_rank)
608 #endif
609 {
610 #ifdef _OPENMP
611 my_rank = omp_get_thread_num();
612 #else
613 my_rank = 0;
614 #endif
615 memset(&iw[my_rank*n],0,n*sizeof(LIS_INT));
616 #ifdef _OPENMP
617 #pragma omp for
618 #endif
619 for(bj=0;bj<nc;bj++)
620 {
621 for(j=0;j<bnc;j++)
622 {
623 for(bi=Ain->bptr[bj];bi<Ain->bptr[bj+1];bi++)
624 {
625 for(i=0;i<bnr;i++)
626 {
627 if( Ain->value[bi*bs + j*bnr + i] != (LIS_SCALAR)0.0 )
628 {
629 iw[my_rank*n + Ain->bindex[bi]*bnr + i]++;
630 }
631 }
632 }
633 }
634 }
635 #ifdef _OPENMP
636 #pragma omp for
637 #endif
638 for(i=0;i<n;i++)
639 {
640 j = 0;
641 for(k=0;k<nprocs;k++)
642 {
643 j += iw[k*n + i];
644 }
645 ptr[i+1] = j;
646 }
647 }
648
649 ptr[0] = 0;
650 for(i=0;i<n;i++)
651 {
652 ptr[i+1] += ptr[i];
653 }
654 nnz = ptr[n];
655
656 index = (LIS_INT *)lis_malloc( nnz*sizeof(LIS_INT),"lis_matrix_convert_bsc2csr::index" );
657 if( index==NULL )
658 {
659 lis_free2(4,ptr,index,value,iw);
660 LIS_SETERR_MEM(nnz*sizeof(LIS_INT));
661 return LIS_OUT_OF_MEMORY;
662 }
663 value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_convert_bsc2csr::value" );
664 if( value==NULL )
665 {
666 lis_free2(4,ptr,index,value,iw);
667 LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR));
668 return LIS_OUT_OF_MEMORY;
669 }
670
671 /* convert csr */
672 #ifdef _OPENMP
673 #pragma omp parallel private(i,j,bi,bj,k,l,my_rank)
674 #endif
675 {
676 #ifdef _OPENMP
677 my_rank = omp_get_thread_num();
678 #else
679 my_rank = 0;
680 #endif
681 #ifdef _OPENMP
682 #pragma omp for
683 #endif
684 for(i=0;i<n;i++)
685 {
686 k = ptr[i];
687 for(j=0;j<nprocs;j++)
688 {
689 l = iw[j*n + i];
690 iw[j*n + i] = k;
691 k = k + l;
692 }
693 }
694 #ifdef _OPENMP
695 #pragma omp for
696 #endif
697 for(bj=0;bj<nc;bj++)
698 {
699 for(j=0;j<bnc;j++)
700 {
701 if( bj*bnc+j==n ) break;
702 for(bi=Ain->bptr[bj];bi<Ain->bptr[bj+1];bi++)
703 {
704 for(i=0;i<bnr;i++)
705 {
706 if( Ain->value[bi*bs + j*bnr + i] != (LIS_SCALAR)0.0 )
707 {
708 k = iw[my_rank*n + Ain->bindex[bi]*bnr + i]++;
709 value[k] = Ain->value[bi*bs + j*bnr + i];
710 index[k] = bj*bnc + j;
711 }
712 }
713 }
714 }
715 }
716 }
717
718 err = lis_matrix_set_csr(nnz,ptr,index,value,Aout);
719 if( err )
720 {
721 lis_free2(4,ptr,index,value,iw);
722 return err;
723 }
724 Aout->pad = 0;
725 Aout->pad_comm = 0;
726 err = lis_matrix_assemble(Aout);
727 if( err )
728 {
729 lis_matrix_storage_destroy(Aout);
730 return err;
731 }
732 #ifdef USE_MPI
733 Aout->commtable->pad = 0;
734 #endif
735 lis_free(iw);
736 LIS_DEBUG_FUNC_OUT;
737 return LIS_SUCCESS;
738 }
739
740 #undef __FUNC__
741 #define __FUNC__ "lis_matrix_get_diagonal_bsc"
lis_matrix_get_diagonal_bsc(LIS_MATRIX A,LIS_SCALAR d[])742 LIS_INT lis_matrix_get_diagonal_bsc(LIS_MATRIX A, LIS_SCALAR d[])
743 {
744 LIS_INT i,j,k,bi,bj,bjj,nr;
745 LIS_INT bnr,bnc,bs;
746 LIS_INT n;
747
748 LIS_DEBUG_FUNC_IN;
749
750 n = A->n;
751 nr = A->nr;
752 bnr = A->bnr;
753 bnc = A->bnc;
754 bs = bnr*bnc;
755 if( A->is_splited )
756 {
757 #ifdef _OPENMP
758 #pragma omp parallel for private(i,j)
759 #endif
760 for(i=0;i<nr;i++)
761 {
762 for(j=0;j<bnr;j++)
763 {
764 d[i*bnr+j] = A->D->value[i*bs+j*bnr+j];
765 }
766 }
767 }
768 else
769 {
770 #ifdef _OPENMP
771 #pragma omp parallel for private(bi,bj,bjj,i,j,k)
772 #endif
773 for(bi=0;bi<nr;bi++)
774 {
775 k = 0;
776 i = bi*bnr;
777 for(bj=A->bptr[bi];bj<A->bptr[bi+1];bj++)
778 {
779 bjj = A->bindex[bj];
780 if( i>=bjj*bnc && i<(bjj+1)*bnc )
781 {
782 for(j=i%bnc;j<bnc&&k<bnr&&i<n;j++)
783 {
784 d[i] = A->value[bj*bs + j*bnr + k];
785 i++;
786 k++;
787 }
788 }
789 if( k==bnr ) break;
790 }
791 }
792 }
793
794 LIS_DEBUG_FUNC_OUT;
795 return LIS_SUCCESS;
796 }
797
798 #undef __FUNC__
799 #define __FUNC__ "lis_matrix_shift_diagonal_bsc"
lis_matrix_shift_diagonal_bsc(LIS_MATRIX A,LIS_SCALAR sigma)800 LIS_INT lis_matrix_shift_diagonal_bsc(LIS_MATRIX A, LIS_SCALAR sigma)
801 {
802 LIS_INT i,j,k,bi,bj,bjj,nr;
803 LIS_INT bnr,bnc,bs;
804 LIS_INT n;
805
806 LIS_DEBUG_FUNC_IN;
807
808 n = A->n;
809 nr = A->nr;
810 bnr = A->bnr;
811 bnc = A->bnc;
812 bs = bnr*bnc;
813 if( A->is_splited )
814 {
815 #ifdef _OPENMP
816 #pragma omp parallel for private(i,j)
817 #endif
818 for(i=0;i<nr;i++)
819 {
820 for(j=0;j<bnr;j++)
821 {
822 A->D->value[i*bs+j*bnr+j] -= sigma;
823 }
824 }
825 }
826 else
827 {
828 #ifdef _OPENMP
829 #pragma omp parallel for private(bi,bj,bjj,i,j,k)
830 #endif
831 for(bi=0;bi<nr;bi++)
832 {
833 k = 0;
834 i = bi*bnr;
835 for(bj=A->bptr[bi];bj<A->bptr[bi+1];bj++)
836 {
837 bjj = A->bindex[bj];
838 if( i>=bjj*bnc && i<(bjj+1)*bnc )
839 {
840 for(j=i%bnc;j<bnc&&k<bnr&&i<n;j++)
841 {
842 A->value[bj*bs + j*bnr + k] -= sigma;
843 i++;
844 k++;
845 }
846 }
847 if( k==bnr ) break;
848 }
849 }
850 }
851
852 LIS_DEBUG_FUNC_OUT;
853 return LIS_SUCCESS;
854 }
855
856 #undef __FUNC__
857 #define __FUNC__ "lis_matrix_scale_bsc"
lis_matrix_scale_bsc(LIS_MATRIX A,LIS_SCALAR d[])858 LIS_INT lis_matrix_scale_bsc(LIS_MATRIX A, LIS_SCALAR d[])
859 {
860 LIS_INT i,j;
861 LIS_INT bi,bj,bs;
862 LIS_INT nr;
863 LIS_INT bnr,bnc;
864
865 LIS_DEBUG_FUNC_IN;
866
867 bnr = A->bnr;
868 bnc = A->bnc;
869 nr = A->nr;
870 bs = A->bnr*A->bnc;
871
872 if( A->is_splited )
873 {
874 #ifdef _OPENMP
875 #pragma omp parallel for private(bi,bj,i,j)
876 #endif
877 for(bi=0;bi<nr;bi++)
878 {
879 for(bj=A->L->bptr[bi];bj<A->L->bptr[bi+1];bj++)
880 {
881 for(j=0;j<bnc;j++)
882 {
883 for(i=0;i<bnr;i++)
884 {
885 A->L->value[bj*bs+j*bnr+i] *= d[bi*bnr+i];
886 }
887 }
888 }
889 for(bj=A->U->bptr[bi];bj<A->U->bptr[bi+1];bj++)
890 {
891 for(j=0;j<bnc;j++)
892 {
893 for(i=0;i<bnr;i++)
894 {
895 A->U->value[bj*bs+j*bnr+i] *= d[bi*bnr+i];
896 }
897 }
898 }
899 for(j=0;j<bnc;j++)
900 {
901 for(i=0;i<bnr;i++)
902 {
903 A->D->value[bi*bs+j*bnr+i] *= d[bi*bnr+i];
904 }
905 }
906 }
907 }
908 else
909 {
910 #ifdef _OPENMP
911 #pragma omp parallel for private(bi,bj,i,j)
912 #endif
913 for(bi=0;bi<nr;bi++)
914 {
915 for(bj=A->bptr[bi];bj<A->bptr[bi+1];bj++)
916 {
917 for(j=0;j<bnc;j++)
918 {
919 for(i=0;i<bnr;i++)
920 {
921 A->value[bj*bs+j*bnr+i] *= d[bi*bnr+i];
922 }
923 }
924 }
925 }
926 }
927 LIS_DEBUG_FUNC_OUT;
928 return LIS_SUCCESS;
929 }
930
931 #undef __FUNC__
932 #define __FUNC__ "lis_matrix_scale_symm_bsc"
lis_matrix_scale_symm_bsc(LIS_MATRIX A,LIS_SCALAR d[])933 LIS_INT lis_matrix_scale_symm_bsc(LIS_MATRIX A, LIS_SCALAR d[])
934 {
935 LIS_INT i,j;
936 LIS_INT bi,bj,bjj,bs;
937 LIS_INT nr;
938 LIS_INT bnr,bnc;
939
940 LIS_DEBUG_FUNC_IN;
941
942 bnr = A->bnr;
943 bnc = A->bnc;
944 nr = A->nr;
945 bs = A->bnr*A->bnc;
946
947 if( A->is_splited )
948 {
949 #ifdef _OPENMP
950 #pragma omp parallel for private(bi,bj,i,j)
951 #endif
952 for(bi=0;bi<nr;bi++)
953 {
954 for(bj=A->L->bptr[bi];bj<A->L->bptr[bi+1];bj++)
955 {
956 bjj = A->L->bindex[bj];
957 for(j=0;j<bnc;j++)
958 {
959 for(i=0;i<bnr;i++)
960 {
961 A->L->value[bj*bs+j*bnr+i] *= d[bi*bnr+i]*d[bjj*bnc+j];
962 }
963 }
964 }
965 for(bj=A->U->bptr[bi];bj<A->U->bptr[bi+1];bj++)
966 {
967 bjj = A->U->bindex[bj];
968 for(j=0;j<bnc;j++)
969 {
970 for(i=0;i<bnr;i++)
971 {
972 A->U->value[bj*bs+j*bnr+i] *= d[bi*bnr+i]*d[bjj*bnc+j];
973 }
974 }
975 }
976 for(j=0;j<bnc;j++)
977 {
978 for(i=0;i<bnr;i++)
979 {
980 A->D->value[bi*bs+j*bnr+i] *= d[bi*bnr+i]*d[bi*bnr+i];
981 }
982 }
983 }
984 }
985 else
986 {
987 #ifdef _OPENMP
988 #pragma omp parallel for private(bi,bj,bjj,i,j)
989 #endif
990 for(bi=0;bi<nr;bi++)
991 {
992 for(bj=A->bptr[bi];bj<A->bptr[bi+1];bj++)
993 {
994 bjj = A->bindex[bj];
995 for(j=0;j<bnc;j++)
996 {
997 for(i=0;i<bnr;i++)
998 {
999 A->value[bj*bs+j*bnr+i] *= d[bi*bnr+i]*d[bjj*bnc+j];
1000 }
1001 }
1002 }
1003 }
1004 }
1005 LIS_DEBUG_FUNC_OUT;
1006 return LIS_SUCCESS;
1007 }
1008
1009 #undef __FUNC__
1010 #define __FUNC__ "lis_matrix_normf_bsc"
lis_matrix_normf_bsc(LIS_MATRIX A,LIS_SCALAR * nrm)1011 LIS_INT lis_matrix_normf_bsc(LIS_MATRIX A, LIS_SCALAR *nrm)
1012 {
1013 LIS_INT j;
1014 LIS_INT bi,bj,bs;
1015 LIS_INT nr;
1016 LIS_INT bnr,bnc;
1017 LIS_SCALAR sum;
1018
1019 LIS_DEBUG_FUNC_IN;
1020
1021 bnr = A->bnr;
1022 bnc = A->bnc;
1023 nr = A->nr;
1024 bs = bnr*bnc;
1025 sum = (LIS_SCALAR)0;
1026
1027 if( A->is_splited )
1028 {
1029 #ifdef _OPENMP
1030 #pragma omp parallel for reduction(+:sum) private(bi,bj,j)
1031 #endif
1032 for(bi=0;bi<nr;bi++)
1033 {
1034 for(bj=A->L->bptr[bi];bj<A->L->bptr[bi+1];bj++)
1035 {
1036 for(j=0;j<bs;j++)
1037 {
1038 sum += A->L->value[bj+j]*A->L->value[bj+j];
1039 }
1040 }
1041 for(bj=A->U->bptr[bi];bj<A->U->bptr[bi+1];bj++)
1042 {
1043 for(j=0;j<bs;j++)
1044 {
1045 sum += A->U->value[bj+j]*A->U->value[bj+j];
1046 }
1047 }
1048 }
1049 }
1050 else
1051 {
1052 #ifdef _OPENMP
1053 #pragma omp parallel for reduction(+:sum) private(bi,bj,j)
1054 #endif
1055 for(bi=0;bi<nr;bi++)
1056 {
1057 for(bj=A->bptr[bi];bj<A->bptr[bi+1];bj++)
1058 {
1059 for(j=0;j<bs;j++)
1060 {
1061 sum += A->value[bj+j]*A->value[bj+j];
1062 }
1063 }
1064 }
1065 }
1066 *nrm = sqrt(sum);
1067 LIS_DEBUG_FUNC_OUT;
1068 return LIS_SUCCESS;
1069 }
1070
1071 #undef __FUNC__
1072 #define __FUNC__ "lis_matrix_split_bsc"
lis_matrix_split_bsc(LIS_MATRIX A)1073 LIS_INT lis_matrix_split_bsc(LIS_MATRIX A)
1074 {
1075 LIS_INT i,j,np;
1076 LIS_INT bnr,bnc,nr,nc,bs;
1077 LIS_INT nnzl,nnzu;
1078 LIS_INT err;
1079 LIS_INT *lptr,*lindex,*uptr,*uindex;
1080 LIS_SCALAR *lvalue,*uvalue;
1081 LIS_MATRIX_DIAG D;
1082 #ifdef _OPENMP
1083 LIS_INT kl,ku;
1084 LIS_INT *liw,*uiw;
1085 #endif
1086
1087 LIS_DEBUG_FUNC_IN;
1088
1089 np = A->np;
1090 bnr = A->bnr;
1091 bnc = A->bnc;
1092 nr = A->nr;
1093 nc = A->nc;
1094 bs = A->bnr*A->bnc;
1095 nnzl = 0;
1096 nnzu = 0;
1097 D = NULL;
1098 lptr = NULL;
1099 lindex = NULL;
1100 lvalue = NULL;
1101 uptr = NULL;
1102 uindex = NULL;
1103 uvalue = NULL;
1104
1105 if( bnr!=bnc )
1106 {
1107 LIS_SETERR_IMP;
1108 return LIS_ERR_NOT_IMPLEMENTED;
1109 }
1110 #ifdef _OPENMP
1111 liw = (LIS_INT *)lis_malloc((nc+1)*sizeof(LIS_INT),"lis_matrix_split_bsc::liw");
1112 if( liw==NULL )
1113 {
1114 LIS_SETERR_MEM((nc+1)*sizeof(LIS_INT));
1115 return LIS_OUT_OF_MEMORY;
1116 }
1117 uiw = (LIS_INT *)lis_malloc((nc+1)*sizeof(LIS_INT),"lis_matrix_split_bsc::uiw");
1118 if( uiw==NULL )
1119 {
1120 LIS_SETERR_MEM((nc+1)*sizeof(LIS_INT));
1121 lis_free(liw);
1122 return LIS_OUT_OF_MEMORY;
1123 }
1124 #pragma omp parallel for private(i)
1125 for(i=0;i<nc+1;i++)
1126 {
1127 liw[i] = 0;
1128 uiw[i] = 0;
1129 }
1130 #pragma omp parallel for private(i,j)
1131 for(i=0;i<nc;i++)
1132 {
1133 for(j=A->bptr[i];j<A->bptr[i+1];j++)
1134 {
1135 if( A->bindex[j]<i )
1136 {
1137 liw[i+1]++;
1138 }
1139 else if( A->bindex[j]>i )
1140 {
1141 uiw[i+1]++;
1142 }
1143 }
1144 }
1145 for(i=0;i<nc;i++)
1146 {
1147 liw[i+1] += liw[i];
1148 uiw[i+1] += uiw[i];
1149 }
1150 nnzl = liw[nc];
1151 nnzu = uiw[nc];
1152 #else
1153 for(i=0;i<nc;i++)
1154 {
1155 for(j=A->bptr[i];j<A->bptr[i+1];j++)
1156 {
1157 if( A->bindex[j]<i )
1158 {
1159 nnzl++;
1160 }
1161 else if( A->bindex[j]>i )
1162 {
1163 nnzu++;
1164 }
1165 }
1166 }
1167 #endif
1168
1169 err = lis_matrix_LU_create(A);
1170 if( err )
1171 {
1172 return err;
1173 }
1174 err = lis_matrix_malloc_bsc(np,bnr,bnc,nnzl,&lptr,&lindex,&lvalue);
1175 if( err )
1176 {
1177 return err;
1178 }
1179 err = lis_matrix_malloc_bsc(np,bnr,bnc,nnzu,&uptr,&uindex,&uvalue);
1180 if( err )
1181 {
1182 lis_free2(6,lptr,lindex,lvalue,uptr,uindex,uvalue);
1183 return err;
1184 }
1185 err = lis_matrix_diag_duplicateM(A,&D);
1186 if( err )
1187 {
1188 lis_free2(6,lptr,lindex,lvalue,uptr,uindex,uvalue);
1189 return err;
1190 }
1191
1192 #ifdef _OPENMP
1193 #pragma omp parallel for private(i)
1194 for(i=0;i<nc+1;i++)
1195 {
1196 lptr[i] = liw[i];
1197 uptr[i] = uiw[i];
1198 }
1199 #pragma omp parallel for private(i,j,kl,ku)
1200 for(i=0;i<nc;i++)
1201 {
1202 kl = lptr[i];
1203 ku = uptr[i];
1204 for(j=A->bptr[i];j<A->bptr[i+1];j++)
1205 {
1206 if( A->bindex[j]<i )
1207 {
1208 lindex[kl] = A->bindex[j];
1209 memcpy(&lvalue[bs*kl],&A->value[bs*j],bs*sizeof(LIS_SCALAR));;
1210 kl++;
1211 }
1212 else if( A->bindex[j]>i )
1213 {
1214 uindex[ku] = A->bindex[j];
1215 memcpy(&uvalue[bs*ku],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
1216 ku++;
1217 }
1218 else
1219 {
1220 memcpy(&D->value[bs*i],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
1221 }
1222 }
1223 }
1224 lis_free2(2,liw,uiw);
1225 #else
1226 nnzl = 0;
1227 nnzu = 0;
1228 lptr[0] = 0;
1229 uptr[0] = 0;
1230 for(i=0;i<nc;i++)
1231 {
1232 for(j=A->bptr[i];j<A->bptr[i+1];j++)
1233 {
1234 if( A->bindex[j]<i )
1235 {
1236 lindex[nnzl] = A->bindex[j];
1237 memcpy(&lvalue[bs*nnzl],&A->value[bs*j],bs*sizeof(LIS_SCALAR));;
1238 nnzl++;
1239 }
1240 else if( A->bindex[j]>i )
1241 {
1242 uindex[nnzu] = A->bindex[j];
1243 memcpy(&uvalue[bs*nnzu],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
1244 nnzu++;
1245 }
1246 else
1247 {
1248 memcpy(&D->value[bs*i],&A->value[bs*j],bs*sizeof(LIS_SCALAR));
1249 }
1250 }
1251 lptr[i+1] = nnzl;
1252 uptr[i+1] = nnzu;
1253 }
1254 #endif
1255 A->L->bnr = bnr;
1256 A->L->bnc = bnc;
1257 A->L->nr = nr;
1258 A->L->nc = nc;
1259 A->L->bnnz = nnzl;
1260 A->L->bptr = lptr;
1261 A->L->bindex = lindex;
1262 A->L->value = lvalue;
1263 A->U->bnr = bnr;
1264 A->U->bnc = bnc;
1265 A->U->nr = nr;
1266 A->U->nc = nc;
1267 A->U->bnnz = nnzu;
1268 A->U->bptr = uptr;
1269 A->U->bindex = uindex;
1270 A->U->value = uvalue;
1271 A->D = D;
1272 A->is_splited = LIS_TRUE;
1273
1274 LIS_DEBUG_FUNC_OUT;
1275 return LIS_SUCCESS;
1276 }
1277
1278 #undef __FUNC__
1279 #define __FUNC__ "lis_matrix_merge_bsc"
lis_matrix_merge_bsc(LIS_MATRIX A)1280 LIS_INT lis_matrix_merge_bsc(LIS_MATRIX A)
1281 {
1282 LIS_INT i,j,np,nr,nc;
1283 LIS_INT bnnz,bnr,bnc,bs;
1284 LIS_INT err;
1285 LIS_INT *bptr,*bindex;
1286 LIS_SCALAR *value;
1287
1288 LIS_DEBUG_FUNC_IN;
1289
1290
1291 np = A->np;
1292 nc = A->nc;
1293 nr = A->nr;
1294 bnr = A->bnr;
1295 bnc = A->bnc;
1296 bs = bnr*bnc;
1297 bptr = NULL;
1298 bindex = NULL;
1299 value = NULL;
1300 bnnz = A->L->bnnz + A->U->bnnz + nr;
1301
1302 err = lis_matrix_malloc_bsc(np,bnr,bnc,bnnz,&bptr,&bindex,&value);
1303 if( err )
1304 {
1305 return err;
1306 }
1307
1308 bnnz = 0;
1309 bptr[0] = 0;
1310 for(i=0;i<nc;i++)
1311 {
1312 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1313 {
1314 bindex[bnnz] = A->L->bindex[j];
1315 memcpy(&value[bs*bnnz],&A->L->value[bs*j],bs*sizeof(LIS_SCALAR));;
1316 bnnz++;
1317 }
1318 bindex[bnnz] = i;
1319 memcpy(&value[bs*bnnz],&A->D->value[bs*i],bs*sizeof(LIS_SCALAR));;
1320 bnnz++;
1321 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1322 {
1323 bindex[bnnz] = A->U->bindex[j];
1324 memcpy(&value[bs*bnnz],&A->U->value[bs*j],bs*sizeof(LIS_SCALAR));;
1325 bnnz++;
1326 }
1327 bptr[i+1] = bnnz;
1328 }
1329
1330 A->bnnz = bnnz;
1331 A->bptr = bptr;
1332 A->value = value;
1333 A->bindex = bindex;
1334
1335 LIS_DEBUG_FUNC_OUT;
1336 return LIS_SUCCESS;
1337 }
1338
1339 #undef __FUNC__
1340 #define __FUNC__ "lis_matrix_solve_bsc"
lis_matrix_solve_bsc(LIS_MATRIX A,LIS_VECTOR B,LIS_VECTOR X,LIS_INT flag)1341 LIS_INT lis_matrix_solve_bsc(LIS_MATRIX A, LIS_VECTOR B, LIS_VECTOR X, LIS_INT flag)
1342 {
1343 LIS_INT i,j,k,ii,jj,nr,bnr,bnc,bs;
1344 LIS_SCALAR t0,t1,t2;
1345 LIS_SCALAR *b,*x,*w;
1346
1347 LIS_DEBUG_FUNC_IN;
1348
1349 nr = A->nr;
1350 bnr = A->bnr;
1351 bnc = A->bnc;
1352 bs = A->bnr*A->bnc;
1353 b = B->value;
1354 x = X->value;
1355
1356
1357 switch(flag)
1358 {
1359 case LIS_MATRIX_LOWER:
1360 switch(bnr)
1361 {
1362 case 1:
1363 for(i=0;i<nr;i++)
1364 {
1365 t0 = b[i];
1366 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1367 {
1368 jj = A->L->bindex[j];
1369 t0 -= A->L->value[j] * x[jj];
1370 }
1371 x[i] = A->WD->value[i] * t0;
1372 }
1373 break;
1374 case 2:
1375 for(i=0;i<nr;i++)
1376 {
1377 t0 = b[i*2];
1378 t1 = b[i*2+1];
1379 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1380 {
1381 jj = A->L->bindex[j];
1382 t0 -= A->L->value[j*4+0] * x[jj*2+0];
1383 t1 -= A->L->value[j*4+1] * x[jj*2+0];
1384 t0 -= A->L->value[j*4+2] * x[jj*2+1];
1385 t1 -= A->L->value[j*4+3] * x[jj*2+1];
1386 }
1387 x[i*2+0] = A->WD->value[4*i+0] * t0 + A->WD->value[4*i+2] * t1;
1388 x[i*2+1] = A->WD->value[4*i+1] * t0 + A->WD->value[4*i+3] * t1;
1389 }
1390 break;
1391 case 3:
1392 for(i=0;i<nr;i++)
1393 {
1394 t0 = b[i*3];
1395 t1 = b[i*3+1];
1396 t2 = b[i*3+2];
1397 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1398 {
1399 jj = A->L->bindex[j];
1400 t0 -= A->L->value[j*9+0] * x[jj*3+0];
1401 t1 -= A->L->value[j*9+1] * x[jj*3+0];
1402 t2 -= A->L->value[j*9+2] * x[jj*3+0];
1403 t0 -= A->L->value[j*9+3] * x[jj*3+1];
1404 t1 -= A->L->value[j*9+4] * x[jj*3+1];
1405 t2 -= A->L->value[j*9+5] * x[jj*3+1];
1406 t0 -= A->L->value[j*9+6] * x[jj*3+2];
1407 t1 -= A->L->value[j*9+7] * x[jj*3+2];
1408 t2 -= A->L->value[j*9+8] * x[jj*3+2];
1409 }
1410 x[i*3+0] = A->WD->value[9*i+0] * t0 + A->WD->value[9*i+3] * t1 + A->WD->value[9*i+6] * t2;
1411 x[i*3+1] = A->WD->value[9*i+1] * t0 + A->WD->value[9*i+4] * t1 + A->WD->value[9*i+7] * t2;
1412 x[i*3+2] = A->WD->value[9*i+2] * t0 + A->WD->value[9*i+5] * t1 + A->WD->value[9*i+8] * t2;
1413 }
1414 break;
1415 default:
1416 w = (LIS_SCALAR *)lis_malloc(bnr*sizeof(LIS_SCALAR),"lis_matrix_solve_bsc::w");
1417 for(i=0;i<nr;i++)
1418 {
1419 for(j=0;j<bnr;j++)
1420 {
1421 w[j] = b[i*bnr+j];
1422 }
1423 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1424 {
1425 k = A->L->bindex[j] * bnc;
1426 for(ii=0;ii<bnr;ii++)
1427 {
1428 t0 = w[ii];
1429 for(jj=0;jj<bnc;jj++)
1430 {
1431 t0 -= A->L->value[j*bs + jj*bnr+ii] * x[k + jj];
1432 }
1433 w[ii] = t0;
1434 }
1435 }
1436 for(ii=0;ii<bnr;ii++)
1437 {
1438 t0 = 0.0;
1439 for(jj=0;jj<bnc;jj++)
1440 {
1441 t0 += A->WD->value[i*bs + jj*bnr+ii] * w[jj];
1442 }
1443 x[i*bnr+ii] = t0;
1444 }
1445 }
1446 lis_free(w);
1447 break;
1448 }
1449 break;
1450 case LIS_MATRIX_UPPER:
1451 switch(bnr)
1452 {
1453 case 1:
1454 for(i=nr-1;i>=0;i--)
1455 {
1456 t0 = b[i];
1457 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1458 {
1459 jj = A->U->bindex[j];
1460 t0 -= A->U->value[j] * x[jj];
1461 }
1462 x[i] = A->WD->value[i] * t0;
1463 }
1464 break;
1465 case 2:
1466 for(i=nr-1;i>=0;i--)
1467 {
1468 t0 = b[i*2];
1469 t1 = b[i*2+1];
1470 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1471 {
1472 jj = A->U->bindex[j];
1473 t0 -= A->U->value[j*4+0] * x[jj*2+0];
1474 t1 -= A->U->value[j*4+1] * x[jj*2+0];
1475 t0 -= A->U->value[j*4+2] * x[jj*2+1];
1476 t1 -= A->U->value[j*4+3] * x[jj*2+1];
1477 }
1478 x[i*2+0] = A->WD->value[4*i+0] * t0 + A->WD->value[4*i+2] * t1;
1479 x[i*2+1] = A->WD->value[4*i+1] * t0 + A->WD->value[4*i+3] * t1;
1480 }
1481 break;
1482 case 3:
1483 for(i=nr-1;i>=0;i--)
1484 {
1485 t0 = b[i*3];
1486 t1 = b[i*3+1];
1487 t2 = b[i*3+2];
1488 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1489 {
1490 jj = A->U->bindex[j];
1491 t0 -= A->U->value[j*9+0] * x[jj*3+0];
1492 t1 -= A->U->value[j*9+1] * x[jj*3+0];
1493 t2 -= A->U->value[j*9+2] * x[jj*3+0];
1494 t0 -= A->U->value[j*9+3] * x[jj*3+1];
1495 t1 -= A->U->value[j*9+4] * x[jj*3+1];
1496 t2 -= A->U->value[j*9+5] * x[jj*3+1];
1497 t0 -= A->U->value[j*9+6] * x[jj*3+2];
1498 t1 -= A->U->value[j*9+7] * x[jj*3+2];
1499 t2 -= A->U->value[j*9+8] * x[jj*3+2];
1500 }
1501 x[i*3+0] = A->WD->value[9*i+0] * t0 + A->WD->value[9*i+3] * t1 + A->WD->value[9*i+6] * t2;
1502 x[i*3+1] = A->WD->value[9*i+1] * t0 + A->WD->value[9*i+4] * t1 + A->WD->value[9*i+7] * t2;
1503 x[i*3+2] = A->WD->value[9*i+2] * t0 + A->WD->value[9*i+5] * t1 + A->WD->value[9*i+8] * t2;
1504 }
1505 break;
1506 default:
1507 w = (LIS_SCALAR *)lis_malloc(bnr*sizeof(LIS_SCALAR),"lis_matrix_solve_bsc::w");
1508 for(i=nr-1;i>=0;i--)
1509 {
1510 for(j=0;j<bnr;j++)
1511 {
1512 w[j] = b[i*bnr+j];
1513 }
1514 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1515 {
1516 k = A->U->bindex[j] * bnc;
1517 for(ii=0;ii<bnr;ii++)
1518 {
1519 t0 = w[ii];
1520 for(jj=0;jj<bnc;jj++)
1521 {
1522 t0 -= A->U->value[j*bs + jj*bnr+ii] * x[k + jj];
1523 }
1524 w[ii] = t0;
1525 }
1526 }
1527 for(ii=0;ii<bnr;ii++)
1528 {
1529 t0 = 0.0;
1530 for(jj=0;jj<bnc;jj++)
1531 {
1532 t0 += A->WD->value[i*bs + jj*bnr+ii] * w[jj];
1533 }
1534 x[i*bnr+ii] = t0;
1535 }
1536 }
1537 lis_free(w);
1538 break;
1539 }
1540 break;
1541 case LIS_MATRIX_SSOR:
1542 switch(bnr)
1543 {
1544 case 1:
1545 for(i=0;i<nr;i++)
1546 {
1547 t0 = b[i];
1548 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1549 {
1550 jj = A->L->bindex[j];
1551 t0 -= A->L->value[j] * x[jj];
1552 }
1553 x[i] = A->WD->value[i] * t0;
1554 }
1555 for(i=nr-1;i>=0;i--)
1556 {
1557 t0 = 0.0;
1558 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1559 {
1560 jj = A->U->bindex[j];
1561 t0 += A->U->value[j] * x[jj];
1562 }
1563 x[i] -= A->WD->value[i] * t0;
1564 }
1565 break;
1566 case 2:
1567 for(i=0;i<nr;i++)
1568 {
1569 t0 = b[i*2];
1570 t1 = b[i*2+1];
1571 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1572 {
1573 jj = A->L->bindex[j];
1574 t0 -= A->L->value[j*4+0] * x[jj*2+0];
1575 t1 -= A->L->value[j*4+1] * x[jj*2+0];
1576 t0 -= A->L->value[j*4+2] * x[jj*2+1];
1577 t1 -= A->L->value[j*4+3] * x[jj*2+1];
1578 }
1579 x[i*2+0] = A->WD->value[4*i+0] * t0 + A->WD->value[4*i+2] * t1;
1580 x[i*2+1] = A->WD->value[4*i+1] * t0 + A->WD->value[4*i+3] * t1;
1581 }
1582 for(i=nr-1;i>=0;i--)
1583 {
1584 t0 = 0.0;
1585 t1 = 0.0;
1586 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1587 {
1588 jj = A->U->bindex[j];
1589 t0 += A->U->value[j*4+0] * x[jj*2+0];
1590 t1 += A->U->value[j*4+1] * x[jj*2+0];
1591 t0 += A->U->value[j*4+2] * x[jj*2+1];
1592 t1 += A->U->value[j*4+3] * x[jj*2+1];
1593 }
1594 x[i*2+0] -= A->WD->value[4*i+0] * t0 + A->WD->value[4*i+2] * t1;
1595 x[i*2+1] -= A->WD->value[4*i+1] * t0 + A->WD->value[4*i+3] * t1;
1596 }
1597 break;
1598 case 3:
1599 for(i=0;i<nr;i++)
1600 {
1601 t0 = b[i*bnr];
1602 t1 = b[i*bnr+1];
1603 t2 = b[i*bnr+2];
1604 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1605 {
1606 jj = A->L->bindex[j];
1607 t0 -= A->L->value[j*9+0] * x[jj*3+0];
1608 t1 -= A->L->value[j*9+1] * x[jj*3+0];
1609 t2 -= A->L->value[j*9+2] * x[jj*3+0];
1610 t0 -= A->L->value[j*9+3] * x[jj*3+1];
1611 t1 -= A->L->value[j*9+4] * x[jj*3+1];
1612 t2 -= A->L->value[j*9+5] * x[jj*3+1];
1613 t0 -= A->L->value[j*9+6] * x[jj*3+2];
1614 t1 -= A->L->value[j*9+7] * x[jj*3+2];
1615 t2 -= A->L->value[j*9+8] * x[jj*3+2];
1616 }
1617 x[i*bnr+0] = A->WD->value[9*i+0] * t0 + A->WD->value[9*i+3] * t1 + A->WD->value[9*i+6] * t2;
1618 x[i*bnr+1] = A->WD->value[9*i+1] * t0 + A->WD->value[9*i+4] * t1 + A->WD->value[9*i+7] * t2;
1619 x[i*bnr+2] = A->WD->value[9*i+2] * t0 + A->WD->value[9*i+5] * t1 + A->WD->value[9*i+8] * t2;
1620 }
1621 for(i=nr-1;i>=0;i--)
1622 {
1623 t0 = 0.0;
1624 t1 = 0.0;
1625 t2 = 0.0;
1626 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1627 {
1628 jj = A->U->bindex[j];
1629 t0 += A->U->value[j*9+0] * x[jj*3+0];
1630 t1 += A->U->value[j*9+1] * x[jj*3+0];
1631 t2 += A->U->value[j*9+2] * x[jj*3+0];
1632 t0 += A->U->value[j*9+3] * x[jj*3+1];
1633 t1 += A->U->value[j*9+4] * x[jj*3+1];
1634 t2 += A->U->value[j*9+5] * x[jj*3+1];
1635 t0 += A->U->value[j*9+6] * x[jj*3+2];
1636 t1 += A->U->value[j*9+7] * x[jj*3+2];
1637 t2 += A->U->value[j*9+8] * x[jj*3+2];
1638 }
1639 x[i*3+0] -= A->WD->value[9*i+0] * t0 + A->WD->value[9*i+3] * t1 + A->WD->value[9*i+6] * t2;
1640 x[i*3+1] -= A->WD->value[9*i+1] * t0 + A->WD->value[9*i+4] * t1 + A->WD->value[9*i+7] * t2;
1641 x[i*3+2] -= A->WD->value[9*i+2] * t0 + A->WD->value[9*i+5] * t1 + A->WD->value[9*i+8] * t2;
1642 }
1643 break;
1644 default:
1645 w = (LIS_SCALAR *)lis_malloc(bnr*sizeof(LIS_SCALAR),"lis_matrix_solve_bsc::w");
1646 for(i=0;i<nr;i++)
1647 {
1648 for(j=0;j<bnr;j++)
1649 {
1650 w[j] = b[i*bnr+j];
1651 }
1652 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1653 {
1654 k = A->L->bindex[j] * bnc;
1655 for(ii=0;ii<bnr;ii++)
1656 {
1657 t0 = w[ii];
1658 for(jj=0;jj<bnc;jj++)
1659 {
1660 t0 -= A->L->value[j*bs + jj*bnr+ii] * x[k + jj];
1661 }
1662 w[ii] = t0;
1663 }
1664 }
1665 for(ii=0;ii<bnr;ii++)
1666 {
1667 t0 = 0.0;
1668 for(jj=0;jj<bnc;jj++)
1669 {
1670 t0 += A->WD->value[i*bs + jj*bnr+ii] * w[jj];
1671 }
1672 x[i*bnr+ii] = t0;
1673 }
1674 }
1675 for(i=nr-1;i>=0;i--)
1676 {
1677 for(j=0;j<bnr;j++)
1678 {
1679 w[j] = 0.0;
1680 }
1681 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1682 {
1683 k = A->U->bindex[j] * bnc;
1684 for(ii=0;ii<bnr;ii++)
1685 {
1686 t0 = w[ii];
1687 for(jj=0;jj<bnc;jj++)
1688 {
1689 t0 += A->U->value[j*bs + jj*bnr+ii] * x[k + jj];
1690 }
1691 w[ii] = t0;
1692 }
1693 }
1694 for(ii=0;ii<bnr;ii++)
1695 {
1696 t0 = 0.0;
1697 for(jj=0;jj<bnc;jj++)
1698 {
1699 t0 += A->WD->value[i*bs + jj*bnr+ii] * w[jj];
1700 }
1701 x[i*bnr+ii] -= t0;
1702 }
1703 }
1704 lis_free(w);
1705 break;
1706 }
1707 break;
1708 }
1709
1710 LIS_DEBUG_FUNC_OUT;
1711 return LIS_SUCCESS;
1712 }
1713
1714 #undef __FUNC__
1715 #define __FUNC__ "lis_matrix_solveh_bsc"
lis_matrix_solveh_bsc(LIS_MATRIX A,LIS_VECTOR B,LIS_VECTOR X,LIS_INT flag)1716 LIS_INT lis_matrix_solveh_bsc(LIS_MATRIX A, LIS_VECTOR B, LIS_VECTOR X, LIS_INT flag)
1717 {
1718 LIS_INT i,j,k,ii,jj,nr,bnr,bnc,bs;
1719 LIS_SCALAR t0,t1,t2;
1720 LIS_SCALAR *x,*w;
1721
1722 LIS_DEBUG_FUNC_IN;
1723
1724 nr = A->nr;
1725 bnr = A->bnr;
1726 bnc = A->bnc;
1727 bs = A->bnr*A->bnc;
1728 x = X->value;
1729
1730 lis_vector_copy(B,X);
1731 switch(flag)
1732 {
1733 case LIS_MATRIX_LOWER:
1734 switch(bnr)
1735 {
1736 case 1:
1737 for(i=0;i<nr;i++)
1738 {
1739 x[i] = x[i] * A->WD->value[i];
1740 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1741 {
1742 jj = A->U->bindex[j];
1743 x[jj] -= conj(A->U->value[j]) * x[i];
1744 }
1745 }
1746 break;
1747 case 2:
1748 for(i=0;i<nr;i++)
1749 {
1750 t0 = conj(A->WD->value[4*i+0]) * x[i*2] + conj(A->WD->value[4*i+1]) * x[i*2+1];
1751 t1 = conj(A->WD->value[4*i+2]) * x[i*2] + conj(A->WD->value[4*i+3]) * x[i*2+1];
1752 x[i*2+0] = t0;
1753 x[i*2+1] = t1;
1754 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1755 {
1756 jj = A->U->bindex[j];
1757 x[jj*2+0] -= conj(A->U->value[j*4+0]) * t0 + A->U->value[j*4+1] * t1;
1758 x[jj*2+1] -= conj(A->U->value[j*4+2]) * t0 + A->U->value[j*4+3] * t1;
1759 }
1760 }
1761 break;
1762 case 3:
1763 for(i=0;i<nr;i++)
1764 {
1765 t0 = conj(A->WD->value[9*i+0]) * x[i*3] + conj(A->WD->value[9*i+1]) * x[i*3+1] + conj(A->WD->value[9*i+2]) * x[i*3+2];
1766 t1 = conj(A->WD->value[9*i+3]) * x[i*3] + conj(A->WD->value[9*i+4]) * x[i*3+1] + conj(A->WD->value[9*i+5]) * x[i*3+2];
1767 t2 = conj(A->WD->value[9*i+6]) * x[i*3] + conj(A->WD->value[9*i+7]) * x[i*3+1] + conj(A->WD->value[9*i+8]) * x[i*3+2];
1768 x[i*3] = t0;
1769 x[i*3+1] = t1;
1770 x[i*3+2] = t2;
1771 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1772 {
1773 jj = A->U->bindex[j];
1774 x[jj*3+0] -= conj(A->U->value[j*9+0]) * t0 + A->U->value[j*9+1] * t1 + A->U->value[j*9+2] * t2;
1775 x[jj*3+1] -= conj(A->U->value[j*9+3]) * t0 + A->U->value[j*9+4] * t1 + A->U->value[j*9+5] * t2;
1776 x[jj*3+2] -= conj(A->U->value[j*9+6]) * t0 + A->U->value[j*9+7] * t1 + A->U->value[j*9+8] * t2;
1777 }
1778 }
1779 break;
1780 default:
1781 w = (LIS_SCALAR *)lis_malloc(bnc*sizeof(LIS_SCALAR),"lis_matrix_solveh_bsc::w");
1782 for(i=0;i<nr;i++)
1783 {
1784 for(jj=0;jj<bnc;jj++)
1785 {
1786 t0 = 0.0;
1787 for(ii=0;ii<bnr;ii++)
1788 {
1789 t0 += conj(A->WD->value[i*bs + jj*bnr+ii]) * x[i*bnr + ii];
1790 }
1791 w[jj] = t0;
1792 }
1793 memcpy(&x[i*bnr],w,bnr*sizeof(LIS_SCALAR));
1794 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1795 {
1796 k = A->U->bindex[j] * bnc;
1797 for(jj=0;jj<bnc;jj++)
1798 {
1799 t0 = 0.0;
1800 for(ii=0;ii<bnr;ii++)
1801 {
1802 t0 += conj(A->U->value[j*bs + jj*bnr+ii]) * w[ii];
1803 }
1804 x[k + jj] -= t0;
1805 }
1806 }
1807 }
1808 lis_free(w);
1809 break;
1810 }
1811 break;
1812 case LIS_MATRIX_UPPER:
1813 switch(bnr)
1814 {
1815 case 1:
1816 for(i=nr-1;i>=0;i--)
1817 {
1818 x[i] = x[i] * A->WD->value[i];
1819 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1820 {
1821 jj = A->L->bindex[j];
1822 x[jj] -= conj(A->L->value[j]) * x[i];
1823 }
1824 }
1825 break;
1826 case 2:
1827 for(i=nr-1;i>=0;i--)
1828 {
1829 t0 = conj(A->WD->value[4*i+0]) * x[i*2] + conj(A->WD->value[4*i+1]) * x[i*2+1];
1830 t1 = conj(A->WD->value[4*i+2]) * x[i*2] + conj(A->WD->value[4*i+3]) * x[i*2+1];
1831 x[i*2+0] = t0;
1832 x[i*2+1] = t1;
1833 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1834 {
1835 jj = A->L->bindex[j];
1836 x[jj*2+0] -= conj(A->L->value[j*4+0]) * t0 + conj(A->L->value[j*4+1]) * t1;
1837 x[jj*2+1] -= conj(A->L->value[j*4+2]) * t0 + conj(A->L->value[j*4+3]) * t1;
1838 }
1839 }
1840 break;
1841 case 3:
1842 for(i=nr-1;i>=0;i--)
1843 {
1844 t0 = conj(A->WD->value[9*i+0]) * x[i*3] + conj(A->WD->value[9*i+1]) * x[i*3+1] + conj(A->WD->value[9*i+2]) * x[i*3+2];
1845 t1 = conj(A->WD->value[9*i+3]) * x[i*3] + conj(A->WD->value[9*i+4]) * x[i*3+1] + conj(A->WD->value[9*i+5]) * x[i*3+2];
1846 t2 = conj(A->WD->value[9*i+6]) * x[i*3] + conj(A->WD->value[9*i+7]) * x[i*3+1] + conj(A->WD->value[9*i+8]) * x[i*3+2];
1847 x[i*3] = t0;
1848 x[i*3+1] = t1;
1849 x[i*3+2] = t2;
1850 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1851 {
1852 jj = A->L->bindex[j];
1853 x[jj*3+0] -= conj(A->L->value[j*9+0]) * t0 + conj(A->L->value[j*9+1]) * t1 + conj(A->L->value[j*9+2]) * t2;
1854 x[jj*3+1] -= conj(A->L->value[j*9+3]) * t0 + conj(A->L->value[j*9+4]) * t1 + conj(A->L->value[j*9+5]) * t2;
1855 x[jj*3+2] -= conj(A->L->value[j*9+6]) * t0 + conj(A->L->value[j*9+7]) * t1 + conj(A->L->value[j*9+8]) * t2;
1856 }
1857 }
1858 break;
1859 default:
1860 w = (LIS_SCALAR *)lis_malloc(bnr*sizeof(LIS_SCALAR),"lis_matrix_solveh_bsc::w");
1861 for(i=nr-1;i>=0;i--)
1862 {
1863 for(jj=0;jj<bnc;jj++)
1864 {
1865 t0 = 0.0;
1866 for(ii=0;ii<bnr;ii++)
1867 {
1868 t0 += conj(A->WD->value[i*bs + jj*bnr+ii]) * x[i*bnr + ii];
1869 }
1870 w[jj] = t0;
1871 }
1872 memcpy(&x[i*bnr],w,bnr*sizeof(LIS_SCALAR));
1873 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1874 {
1875 k = A->L->bindex[j] * bnc;
1876 for(jj=0;jj<bnc;jj++)
1877 {
1878 t0 = 0.0;
1879 for(ii=0;ii<bnr;ii++)
1880 {
1881 t0 += conj(A->L->value[j*bs + jj*bnr+ii]) * w[ii];
1882 }
1883 x[k + jj] -= t0;
1884 }
1885 }
1886 }
1887 lis_free(w);
1888 break;
1889 }
1890 break;
1891 case LIS_MATRIX_SSOR:
1892 switch(bnr)
1893 {
1894 case 1:
1895 for(i=0;i<nr;i++)
1896 {
1897 t0 = x[i] * A->WD->value[i];
1898 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1899 {
1900 jj = A->U->bindex[j];
1901 x[jj] -= conj(A->U->value[j]) * t0;
1902 }
1903 }
1904 for(i=nr-1;i>=0;i--)
1905 {
1906 t0 = x[i] * A->WD->value[i];
1907 x[i] = t0;
1908 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1909 {
1910 jj = A->L->bindex[j];
1911 x[jj] -= conj(A->L->value[j]) * t0;
1912 }
1913 }
1914 break;
1915 case 2:
1916 for(i=0;i<nr;i++)
1917 {
1918 t0 = conj(A->WD->value[4*i+0]) * x[i*2] + conj(A->WD->value[4*i+1]) * x[i*2+1];
1919 t1 = conj(A->WD->value[4*i+2]) * x[i*2] + conj(A->WD->value[4*i+3]) * x[i*2+1];
1920 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1921 {
1922 jj = A->U->bindex[j];
1923 x[jj*2+0] -= conj(A->U->value[j*4+0]) * t0 + A->U->value[j*4+1] * t1;
1924 x[jj*2+1] -= conj(A->U->value[j*4+2]) * t0 + A->U->value[j*4+3] * t1;
1925 }
1926 }
1927 for(i=nr-1;i>=0;i--)
1928 {
1929 t0 = conj(A->WD->value[4*i+0]) * x[i*2] + conj(A->WD->value[4*i+1]) * x[i*2+1];
1930 t1 = conj(A->WD->value[4*i+2]) * x[i*2] + conj(A->WD->value[4*i+3]) * x[i*2+1];
1931 x[i*2+0] = t0;
1932 x[i*2+1] = t1;
1933 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1934 {
1935 jj = A->L->bindex[j];
1936 x[jj*2+0] -= conj(A->L->value[j*4+0]) * t0 + A->L->value[j*4+1] * t1;
1937 x[jj*2+1] -= conj(A->L->value[j*4+2]) * t0 + A->L->value[j*4+3] * t1;
1938 }
1939 }
1940 break;
1941 case 3:
1942 for(i=0;i<nr;i++)
1943 {
1944 t0 = conj(A->WD->value[9*i+0]) * x[i*3] + conj(A->WD->value[9*i+1]) * x[i*3+1] + conj(A->WD->value[9*i+2]) * x[i*3+2];
1945 t1 = conj(A->WD->value[9*i+3]) * x[i*3] + conj(A->WD->value[9*i+4]) * x[i*3+1] + conj(A->WD->value[9*i+5]) * x[i*3+2];
1946 t2 = conj(A->WD->value[9*i+6]) * x[i*3] + conj(A->WD->value[9*i+7]) * x[i*3+1] + conj(A->WD->value[9*i+8]) * x[i*3+2];
1947 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1948 {
1949 jj = A->U->bindex[j];
1950 x[jj*3+0] -= conj(A->U->value[j*9+0]) * t0 + conj(A->U->value[j*9+1]) * t1 + conj(A->U->value[j*9+2]) * t2;
1951 x[jj*3+1] -= conj(A->U->value[j*9+3]) * t0 + conj(A->U->value[j*9+4]) * t1 + conj(A->U->value[j*9+5]) * t2;
1952 x[jj*3+2] -= conj(A->U->value[j*9+6]) * t0 + conj(A->U->value[j*9+7]) * t1 + conj(A->U->value[j*9+8]) * t2;
1953 }
1954 }
1955 for(i=nr-1;i>=0;i--)
1956 {
1957 t0 = conj(A->WD->value[9*i+0]) * x[i*3] + conj(A->WD->value[9*i+1]) * x[i*3+1] + conj(A->WD->value[9*i+2]) * x[i*3+2];
1958 t1 = conj(A->WD->value[9*i+3]) * x[i*3] + conj(A->WD->value[9*i+4]) * x[i*3+1] + conj(A->WD->value[9*i+5]) * x[i*3+2];
1959 t2 = conj(A->WD->value[9*i+6]) * x[i*3] + conj(A->WD->value[9*i+7]) * x[i*3+1] + conj(A->WD->value[9*i+8]) * x[i*3+2];
1960 x[i*3] = t0;
1961 x[i*3+1] = t1;
1962 x[i*3+2] = t2;
1963 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
1964 {
1965 jj = A->L->bindex[j];
1966 x[jj*3+0] -= conj(A->L->value[j*9+0]) * t0 + conj(A->L->value[j*9+1]) * t1 + conj(A->L->value[j*9+2]) * t2;
1967 x[jj*3+1] -= conj(A->L->value[j*9+3]) * t0 + conj(A->L->value[j*9+4]) * t1 + conj(A->L->value[j*9+5]) * t2;
1968 x[jj*3+2] -= conj(A->L->value[j*9+6]) * t0 + conj(A->L->value[j*9+7]) * t1 + conj(A->L->value[j*9+8]) * t2;
1969 }
1970 }
1971 break;
1972 default:
1973 w = (LIS_SCALAR *)lis_malloc(bnc*sizeof(LIS_SCALAR),"lis_matrix_solveh_bsc::w");
1974 for(i=0;i<nr;i++)
1975 {
1976 for(jj=0;jj<bnc;jj++)
1977 {
1978 t0 = 0.0;
1979 for(ii=0;ii<bnr;ii++)
1980 {
1981 t0 += conj(A->WD->value[i*bs + jj*bnr+ii]) * x[i*bnr + ii];
1982 }
1983 w[jj] = t0;
1984 }
1985 for(j=A->U->bptr[i];j<A->U->bptr[i+1];j++)
1986 {
1987 k = A->U->bindex[j] * bnc;
1988 for(jj=0;jj<bnc;jj++)
1989 {
1990 t0 = 0.0;
1991 for(ii=0;ii<bnr;ii++)
1992 {
1993 t0 += conj(A->U->value[j*bs + jj*bnr+ii]) * w[ii];
1994 }
1995 x[k + jj] -= t0;
1996 }
1997 }
1998 }
1999 for(i=nr-1;i>=0;i--)
2000 {
2001 for(jj=0;jj<bnc;jj++)
2002 {
2003 t0 = 0.0;
2004 for(ii=0;ii<bnr;ii++)
2005 {
2006 t0 += conj(A->WD->value[i*bs + jj*bnr+ii]) * x[i*bnr + ii];
2007 }
2008 w[jj] = t0;
2009 }
2010 memcpy(&x[i*bnr],w,bnr*sizeof(LIS_SCALAR));
2011 for(j=A->L->bptr[i];j<A->L->bptr[i+1];j++)
2012 {
2013 k = A->L->bindex[j] * bnc;
2014 for(jj=0;jj<bnc;jj++)
2015 {
2016 t0 = 0.0;
2017 for(ii=0;ii<bnr;ii++)
2018 {
2019 t0 += conj(A->L->value[j*bs + jj*bnr+ii]) * w[ii];
2020 }
2021 x[k + jj] -= t0;
2022 }
2023 }
2024 }
2025 lis_free(w);
2026 break;
2027 }
2028 break;
2029 }
2030
2031 LIS_DEBUG_FUNC_OUT;
2032 return LIS_SUCCESS;
2033 }
2034