1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
5 1. Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7 2. Redistributions in binary form must reproduce the above copyright
8 notice, this list of conditions and the following disclaimer in the
9 documentation and/or other materials provided with the distribution.
10 3. Neither the name of the project nor the names of its contributors
11 may be used to endorse or promote products derived from this software
12 without specific prior written permission.
13
14 THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18 PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 #include "lis_config_win.h"
32 #endif
33 #endif
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #ifdef HAVE_MALLOC_H
38 #include <malloc.h>
39 #endif
40 #include <string.h>
41 #include <stdarg.h>
42 #include <math.h>
43 #ifdef _OPENMP
44 #include <omp.h>
45 #endif
46 #ifdef USE_MPI
47 #include <mpi.h>
48 #endif
49 #include "lislib.h"
50
51 /************************************************
52 * function | SOM |
53 *-----------------------------+-----+
54 * lis_matrix_set | o |
55 * lis_matrix_setDLU | o |
56 * lis_matrix_malloc | o |
57 * lis_matrix_elements_copy | o |
58 * lis_matrix_transpose | o |
59 * lis_matrix_split | o |
60 * lis_matrix_merge | o |
61 *-----------------------------+-----+-----+
62 * function |merge|split|
63 *-----------------------------+-----+-----|
64 * lis_matrix_convert | o | |
65 * lis_matrix_copy | o | o |
66 * lis_matrix_get_diagonal | o | o |
67 * lis_matrix_shift_diagonal | o | o |
68 * lis_matrix_scale | o | o |
69 * lis_matrix_scale_symm | o | o |
70 * lis_matrix_normf | o | o |
71 * lis_matrix_sort | o | o |
72 * lis_matrix_solve | xxx | o |
73 * lis_matrix_solveh | xxx | o |
74 ************************************************/
75
76 #undef __FUNC__
77 #define __FUNC__ "lis_matrix_set_ell"
lis_matrix_set_ell(LIS_INT maxnzr,LIS_INT * index,LIS_SCALAR * value,LIS_MATRIX A)78 LIS_INT lis_matrix_set_ell(LIS_INT maxnzr, LIS_INT *index, LIS_SCALAR *value, LIS_MATRIX A)
79 {
80 LIS_INT err;
81
82 LIS_DEBUG_FUNC_IN;
83
84 #if 0
85 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
86 if( err ) return err;
87 #else
88 if(lis_matrix_is_assembled(A)) {
89 LIS_DEBUG_FUNC_OUT;
90 return LIS_SUCCESS;
91 }
92 else {
93 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
94 if( err ) return err;
95 }
96 #endif
97
98 A->index = index;
99 A->value = value;
100 A->status = -LIS_MATRIX_ELL;
101 A->maxnzr = maxnzr;
102
103 LIS_DEBUG_FUNC_OUT;
104 return LIS_SUCCESS;
105 }
106
107 #undef __FUNC__
108 #define __FUNC__ "lis_matrix_setDLU_ell"
lis_matrix_setDLU_ell(LIS_INT lmaxnzr,LIS_INT umaxnzr,LIS_SCALAR * diag,LIS_INT * lindex,LIS_SCALAR * lvalue,LIS_INT * uindex,LIS_SCALAR * uvalue,LIS_MATRIX A)109 LIS_INT lis_matrix_setDLU_ell(LIS_INT lmaxnzr, LIS_INT umaxnzr, LIS_SCALAR *diag, LIS_INT *lindex, LIS_SCALAR *lvalue, LIS_INT *uindex, LIS_SCALAR *uvalue, LIS_MATRIX A)
110 {
111 LIS_INT err;
112 LIS_MATRIX_DIAG D;
113
114 LIS_DEBUG_FUNC_IN;
115
116 #if 0
117 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
118 if( err ) return err;
119 #else
120 if(lis_matrix_is_assembled(A)) return LIS_SUCCESS;
121 else {
122 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
123 if( err ) return err;
124 }
125 #endif
126
127 A->L = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT),"lis_matrix_setDLU_ell::A->L");
128 if( A->L==NULL )
129 {
130 LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT));
131 return LIS_OUT_OF_MEMORY;
132 }
133 A->U = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT),"lis_matrix_setDLU_ell::A->U");
134 if( A->U==NULL )
135 {
136 LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT));
137 lis_matrix_DLU_destroy(A);
138 return LIS_OUT_OF_MEMORY;
139 }
140 err = lis_matrix_diag_create(A->n,0,A->comm,&D);
141 if( err )
142 {
143 lis_matrix_DLU_destroy(A);
144 return err;
145 }
146
147 lis_free(D->value);
148 D->value = diag;
149 A->D = D;
150 A->L->maxnzr = lmaxnzr;
151 A->L->index = lindex;
152 A->L->value = lvalue;
153 A->U->maxnzr = umaxnzr;
154 A->U->index = uindex;
155 A->U->value = uvalue;
156 A->is_copy = LIS_FALSE;
157 A->status = -LIS_MATRIX_ELL;
158 A->is_splited = LIS_TRUE;
159
160 LIS_DEBUG_FUNC_OUT;
161 return LIS_SUCCESS;
162 }
163
164 #undef __FUNC__
165 #define __FUNC__ "lis_matrix_malloc_ell"
lis_matrix_malloc_ell(LIS_INT n,LIS_INT maxnzr,LIS_INT ** index,LIS_SCALAR ** value)166 LIS_INT lis_matrix_malloc_ell(LIS_INT n, LIS_INT maxnzr, LIS_INT **index, LIS_SCALAR **value)
167 {
168 LIS_DEBUG_FUNC_IN;
169
170 *index = NULL;
171 *value = NULL;
172
173 *index = (LIS_INT *)lis_malloc( n*maxnzr*sizeof(LIS_INT),"lis_matrix_malloc_ell::index" );
174 if( *index==NULL )
175 {
176 LIS_SETERR_MEM(n*maxnzr*sizeof(LIS_INT));
177 lis_free2(2,*index,*value);
178 return LIS_OUT_OF_MEMORY;
179 }
180 *value = (LIS_SCALAR *)lis_malloc( n*maxnzr*sizeof(LIS_SCALAR),"lis_matrix_malloc_ell::value" );
181 if( *value==NULL )
182 {
183 LIS_SETERR_MEM(n*maxnzr*sizeof(LIS_SCALAR));
184 lis_free2(2,*index,*value);
185 return LIS_OUT_OF_MEMORY;
186 }
187 LIS_DEBUG_FUNC_OUT;
188 return LIS_SUCCESS;
189 }
190
191 #undef __FUNC__
192 #define __FUNC__ "lis_matrix_elements_copy_ell"
lis_matrix_elements_copy_ell(LIS_INT n,LIS_INT maxnzr,LIS_INT * index,LIS_SCALAR * value,LIS_INT * o_index,LIS_SCALAR * o_value)193 LIS_INT lis_matrix_elements_copy_ell(LIS_INT n, LIS_INT maxnzr, LIS_INT *index, LIS_SCALAR *value, LIS_INT *o_index, LIS_SCALAR *o_value)
194 {
195 LIS_INT i,j;
196 #ifdef _OPENMP
197 LIS_INT is,ie,my_rank,nprocs;
198 #endif
199
200 LIS_DEBUG_FUNC_IN;
201
202 #ifdef _OPENMP
203 nprocs = omp_get_max_threads();
204 #pragma omp parallel private(i,j,is,ie,my_rank)
205 {
206 my_rank = omp_get_thread_num();
207 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
208 for(j=0;j<maxnzr;j++)
209 {
210 for(i=is;i<ie;i++)
211 {
212 o_value[j*n+i] = value[j*n+i];
213 o_index[j*n+i] = index[j*n+i];
214 }
215 }
216 }
217 #else
218 for(j=0;j<maxnzr;j++)
219 {
220 for(i=0;i<n;i++)
221 {
222 o_value[j*n+i] = value[j*n+i];
223 o_index[j*n+i] = index[j*n+i];
224 }
225 }
226 #endif
227
228 LIS_DEBUG_FUNC_OUT;
229 return LIS_SUCCESS;
230 }
231
232 #undef __FUNC__
233 #define __FUNC__ "lis_matrix_copy_ell"
lis_matrix_copy_ell(LIS_MATRIX Ain,LIS_MATRIX Aout)234 LIS_INT lis_matrix_copy_ell(LIS_MATRIX Ain, LIS_MATRIX Aout)
235 {
236 LIS_INT err;
237 LIS_INT i,n,maxnzr,lmaxnzr,umaxnzr;
238 LIS_INT *index;
239 LIS_INT *lindex;
240 LIS_INT *uindex;
241 LIS_SCALAR *value,*lvalue,*uvalue,*diag;
242
243 LIS_DEBUG_FUNC_IN;
244
245 n = Ain->n;
246
247 if( Ain->is_splited )
248 {
249 lmaxnzr = Ain->L->maxnzr;
250 umaxnzr = Ain->U->maxnzr;
251 lindex = NULL;
252 uindex = NULL;
253 diag = NULL;
254
255 err = lis_matrix_malloc_ell(n,lmaxnzr,&lindex,&lvalue);
256 if( err )
257 {
258 return err;
259 }
260 err = lis_matrix_malloc_ell(n,umaxnzr,&uindex,&uvalue);
261 if( err )
262 {
263 lis_free2(5,diag,uindex,lindex,uvalue,lvalue);
264 return err;
265 }
266 diag = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_matrix_copy_ell::diag");
267 if( diag==NULL )
268 {
269 lis_free2(5,diag,uindex,lindex,uvalue,lvalue);
270 return err;
271 }
272
273 #ifdef _OPENMP
274 #pragma omp parallel for private(i)
275 #endif
276 for(i=0;i<n;i++)
277 {
278 diag[i] = Ain->D->value[i];
279 }
280 lis_matrix_elements_copy_ell(n,lmaxnzr,Ain->L->index,Ain->L->value,lindex,lvalue);
281 lis_matrix_elements_copy_ell(n,umaxnzr,Ain->U->index,Ain->U->value,uindex,uvalue);
282
283 err = lis_matrix_setDLU_ell(lmaxnzr,umaxnzr,diag,lindex,lvalue,uindex,uvalue,Aout);
284 if( err )
285 {
286 lis_free2(5,diag,uindex,lindex,uvalue,lvalue);
287 return err;
288 }
289 }
290 if( !Ain->is_splited || (Ain->is_splited && Ain->is_save) )
291 {
292 index = NULL;
293 value = NULL;
294 maxnzr = Ain->maxnzr;
295
296 err = lis_matrix_malloc_ell(n,maxnzr,&index,&value);
297 if( err )
298 {
299 return err;
300 }
301
302 lis_matrix_elements_copy_ell(n,maxnzr,Ain->index,Ain->value,index,value);
303
304 err = lis_matrix_set_ell(maxnzr,index,value,Aout);
305 if( err )
306 {
307 lis_free2(2,index,value);
308 return err;
309 }
310 }
311
312 err = lis_matrix_assemble(Aout);
313 if( err )
314 {
315 lis_matrix_storage_destroy(Aout);
316 return err;
317 }
318 LIS_DEBUG_FUNC_OUT;
319 return LIS_SUCCESS;
320 }
321
322 #undef __FUNC__
323 #define __FUNC__ "lis_matrix_split_ell"
lis_matrix_split_ell(LIS_MATRIX A)324 LIS_INT lis_matrix_split_ell(LIS_MATRIX A)
325 {
326 LIS_INT i,j,n,maxnzr,my_rank,nprocs,is,ie;
327 LIS_INT lmaxnzr,umaxnzr,lcount,ucount;
328 LIS_INT err;
329 #ifdef _OPENMP
330 LIS_INT *iw;
331 #endif
332 LIS_INT *lindex,*uindex;
333 LIS_SCALAR *lvalue,*uvalue;
334 LIS_MATRIX_DIAG D;
335
336 LIS_DEBUG_FUNC_IN;
337
338 n = A->n;
339 maxnzr = A->maxnzr;
340 lmaxnzr = 0;
341 umaxnzr = 0;
342 D = NULL;
343 lindex = NULL;
344 lvalue = NULL;
345 uindex = NULL;
346 uvalue = NULL;
347
348 #ifdef _OPENMP
349 nprocs = omp_get_max_threads();
350 #else
351 nprocs = 1;
352 #endif
353 #ifdef _OPENMP
354 iw = (LIS_INT *)lis_malloc(nprocs*LIS_VEC_TMP_PADD*sizeof(LIS_INT),"lis_matrix_split_ell::iw");
355 if( iw==NULL )
356 {
357 LIS_SETERR_MEM(nprocs*LIS_VEC_TMP_PADD*sizeof(LIS_INT));
358 return LIS_OUT_OF_MEMORY;
359 }
360 #pragma omp parallel private(i,j,is,ie,lcount,ucount,my_rank)
361 {
362 my_rank = omp_get_thread_num();
363 iw[my_rank*LIS_VEC_TMP_PADD] = 0;
364 iw[my_rank*LIS_VEC_TMP_PADD+1] = 0;
365 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
366 for(i=is;i<ie;i++)
367 {
368 lcount = 0;
369 ucount = 0;
370 for(j=0;j<maxnzr;j++)
371 {
372 if( A->index[j*n+i]<i )
373 {
374 lcount++;
375 }
376 else if( A->index[j*n+i]>i )
377 {
378 ucount++;
379 }
380 }
381 if( lcount>iw[my_rank*LIS_VEC_TMP_PADD] ) iw[my_rank*LIS_VEC_TMP_PADD] = lcount;
382 if( ucount>iw[my_rank*LIS_VEC_TMP_PADD+1] ) iw[my_rank*LIS_VEC_TMP_PADD+1] = ucount;
383 }
384 }
385 for(i=0;i<nprocs;i++)
386 {
387 if( iw[i*LIS_VEC_TMP_PADD]>lmaxnzr ) lmaxnzr = iw[i*LIS_VEC_TMP_PADD];
388 if( iw[i*LIS_VEC_TMP_PADD+1]>umaxnzr ) umaxnzr = iw[i*LIS_VEC_TMP_PADD+1];
389 }
390 lis_free(iw);
391 #else
392 for(i=0;i<n;i++)
393 {
394 lcount = 0;
395 ucount = 0;
396 for(j=0;j<maxnzr;j++)
397 {
398 if( A->index[j*n+i]<i )
399 {
400 lcount++;
401 }
402 else if( A->index[j*n+i]>i )
403 {
404 ucount++;
405 }
406 }
407 if( lcount>lmaxnzr ) lmaxnzr = lcount;
408 if( ucount>umaxnzr ) umaxnzr = ucount;
409 }
410 #endif
411
412 err = lis_matrix_LU_create(A);
413 if( err )
414 {
415 return err;
416 }
417 err = lis_matrix_malloc_ell(n,lmaxnzr,&lindex,&lvalue);
418 if( err )
419 {
420 return err;
421 }
422 err = lis_matrix_malloc_ell(n,umaxnzr,&uindex,&uvalue);
423 if( err )
424 {
425 lis_free2(4,lindex,lvalue,uindex,uvalue);
426 return err;
427 }
428 err = lis_matrix_diag_duplicateM(A,&D);
429 if( err )
430 {
431 lis_free2(4,lindex,lvalue,uindex,uvalue);
432 return err;
433 }
434
435 #ifdef _OPENMP
436 #pragma omp parallel private(i,j,is,ie,lcount,ucount,my_rank)
437 #endif
438 {
439 #ifdef _OPENMP
440 my_rank = omp_get_thread_num();
441 #else
442 my_rank = 0;
443 #endif
444 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
445 for(j=0;j<lmaxnzr;j++)
446 {
447 for(i=is;i<ie;i++)
448 {
449 lvalue[j*n + i] = 0.0;
450 lindex[j*n + i] = i;
451 D->value[i] = 0.0;
452 }
453 }
454 for(j=0;j<umaxnzr;j++)
455 {
456 for(i=is;i<ie;i++)
457 {
458 uvalue[j*n + i] = 0.0;
459 uindex[j*n + i] = i;
460 }
461 }
462 for(i=is;i<ie;i++)
463 {
464 lcount = 0;
465 ucount = 0;
466 for(j=0;j<maxnzr;j++)
467 {
468 if( A->index[j*n+i]<i )
469 {
470 lindex[lcount*n+i] = A->index[j*n+i];
471 lvalue[lcount*n+i] = A->value[j*n+i];
472 lcount++;
473 }
474 else if( A->index[j*n+i]>i )
475 {
476 uindex[ucount*n+i] = A->index[j*n+i];
477 uvalue[ucount*n+i] = A->value[j*n+i];
478 ucount++;
479 }
480 else
481 {
482 if( A->value[j*n+i]!=0.0 ) D->value[i] = A->value[j*n+i];
483 }
484 }
485 }
486 }
487 A->L->maxnzr = lmaxnzr;
488 A->L->index = lindex;
489 A->L->value = lvalue;
490 A->U->maxnzr = umaxnzr;
491 A->U->index = uindex;
492 A->U->value = uvalue;
493 A->D = D;
494 A->is_splited = LIS_TRUE;
495
496 LIS_DEBUG_FUNC_OUT;
497 return LIS_SUCCESS;
498 }
499
500 #undef __FUNC__
501 #define __FUNC__ "lis_matrix_merge_ell"
lis_matrix_merge_ell(LIS_MATRIX A)502 LIS_INT lis_matrix_merge_ell(LIS_MATRIX A)
503 {
504 LIS_INT i,j,n;
505 LIS_INT maxnzr,lmaxnzr,umaxnzr,count;
506 LIS_INT err;
507 LIS_INT *index;
508 LIS_SCALAR *value;
509
510 LIS_DEBUG_FUNC_IN;
511
512
513 n = A->n;
514 maxnzr = 0;
515 lmaxnzr = A->L->maxnzr;
516 umaxnzr = A->U->maxnzr;
517 index = NULL;
518 value = NULL;
519
520 for(i=0;i<n;i++)
521 {
522 count = 0;
523 for(j=0;j<lmaxnzr;j++)
524 {
525 if( A->L->index[j*n+i]<i )
526 {
527 count++;
528 }
529 }
530 for(j=0;j<umaxnzr;j++)
531 {
532 if( A->U->index[j*n+i]>i )
533 {
534 count++;
535 }
536 }
537 count++;
538 if( count>maxnzr ) maxnzr = count;
539 }
540
541 err = lis_matrix_malloc_ell(n,maxnzr,&index,&value);
542 if( err )
543 {
544 return err;
545 }
546
547 for(j=0;j<maxnzr;j++)
548 {
549 for(i=0;i<n;i++)
550 {
551 value[j*n + i] = 0.0;
552 index[j*n + i] = i;
553 }
554 }
555 for(i=0;i<n;i++)
556 {
557 count = 0;
558 for(j=0;j<lmaxnzr;j++)
559 {
560 if( A->L->index[j*n+i]<i )
561 {
562 index[count*n+i] = A->L->index[j*n+i];
563 value[count*n+i] = A->L->value[j*n+i];
564 count++;
565 }
566 }
567 index[count*n+i] = i;
568 value[count*n+i] = A->D->value[i];
569 count++;
570 for(j=0;j<umaxnzr;j++)
571 {
572 if( A->U->index[j*n+i]>i )
573 {
574 index[count*n+i] = A->U->index[j*n+i];
575 value[count*n+i] = A->U->value[j*n+i];
576 count++;
577 }
578 }
579 }
580
581 A->maxnzr = maxnzr;
582 A->value = value;
583 A->index = index;
584
585 LIS_DEBUG_FUNC_OUT;
586 return LIS_SUCCESS;
587 }
588
589 #undef __FUNC__
590 #define __FUNC__ "lis_matrix_get_diagonal_ell"
lis_matrix_get_diagonal_ell(LIS_MATRIX A,LIS_SCALAR d[])591 LIS_INT lis_matrix_get_diagonal_ell(LIS_MATRIX A, LIS_SCALAR d[])
592 {
593 LIS_INT i,j;
594 LIS_INT n,maxnzr;
595
596 LIS_DEBUG_FUNC_IN;
597
598 n = A->n;
599 if( A->is_splited )
600 {
601 #ifdef _OPENMP
602 #pragma omp parallel for private(i,j)
603 #endif
604 for(i=0; i<n; i++)
605 {
606 d[i] = A->D->value[i];
607 }
608 }
609 else
610 {
611 maxnzr = A->maxnzr;
612 #ifdef _OPENMP
613 #pragma omp parallel for private(i,j)
614 #endif
615 for(i=0; i<n; i++)
616 {
617 d[i] = (LIS_SCALAR)0.0;
618 for(j=0;j<maxnzr;j++)
619 {
620 if( i==A->index[j*n+i] )
621 {
622 d[i] = A->value[j*n+i];
623 break;
624 }
625 }
626 }
627 }
628 LIS_DEBUG_FUNC_OUT;
629 return LIS_SUCCESS;
630 }
631
632 #undef __FUNC__
633 #define __FUNC__ "lis_matrix_shift_diagonal_ell"
lis_matrix_shift_diagonal_ell(LIS_MATRIX A,LIS_SCALAR sigma)634 LIS_INT lis_matrix_shift_diagonal_ell(LIS_MATRIX A, LIS_SCALAR sigma)
635 {
636 LIS_INT i,j;
637 LIS_INT n,maxnzr;
638
639 LIS_DEBUG_FUNC_IN;
640
641 n = A->n;
642 if( A->is_splited )
643 {
644 #ifdef _OPENMP
645 #pragma omp parallel for private(i,j)
646 #endif
647 for(i=0; i<n; i++)
648 {
649 A->D->value[i] -= sigma;
650 }
651 }
652 else
653 {
654 maxnzr = A->maxnzr;
655 #ifdef _OPENMP
656 #pragma omp parallel for private(i,j)
657 #endif
658 for(i=0; i<n; i++)
659 {
660 for(j=0;j<maxnzr;j++)
661 {
662 if( i==A->index[j*n+i] )
663 {
664 A->value[j*n+i] -= sigma;
665 break;
666 }
667 }
668 }
669 }
670 LIS_DEBUG_FUNC_OUT;
671 return LIS_SUCCESS;
672 }
673
674 #undef __FUNC__
675 #define __FUNC__ "lis_matrix_scale_ell"
lis_matrix_scale_ell(LIS_MATRIX A,LIS_SCALAR d[])676 LIS_INT lis_matrix_scale_ell(LIS_MATRIX A, LIS_SCALAR d[])
677 {
678 LIS_INT i,j,is,ie;
679 LIS_INT n,maxnzr,nprocs,my_rank;
680
681 LIS_DEBUG_FUNC_IN;
682
683 n = A->n;
684 if( A->is_splited )
685 {
686 #ifdef _OPENMP
687 nprocs = omp_get_max_threads();
688 #else
689 nprocs = 1;
690 #endif
691 #ifdef _OPENMP
692 #pragma omp parallel private(i,j,is,ie,my_rank)
693 #endif
694 {
695 #ifdef _OPENMP
696 my_rank = omp_get_thread_num();
697 #else
698 my_rank = 0;
699 #endif
700 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
701 for(i=is;i<ie;i++)
702 {
703 A->D->value[i] = 1.0;
704 }
705 for(j=0;j<A->L->maxnzr;j++)
706 {
707 for(i=is;i<ie;i++)
708 {
709 A->L->value[j*n + i] *= d[i];
710 }
711 }
712 for(j=0;j<A->U->maxnzr;j++)
713 {
714 for(i=is;i<ie;i++)
715 {
716 A->U->value[j*n + i] *= d[i];
717 }
718 }
719 }
720 }
721 else
722 {
723 maxnzr = A->maxnzr;
724 #ifdef _OPENMP
725 nprocs = omp_get_max_threads();
726 #else
727 nprocs = 1;
728 #endif
729 #ifdef _OPENMP
730 #pragma omp parallel private(i,j,is,ie,my_rank)
731 #endif
732 {
733 #ifdef _OPENMP
734 my_rank = omp_get_thread_num();
735 #else
736 my_rank = 0;
737 #endif
738 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
739 for(j=0;j<maxnzr;j++)
740 {
741 for(i=is;i<ie;i++)
742 {
743 A->value[j*n + i] *= d[i];
744 }
745 }
746 }
747 }
748 LIS_DEBUG_FUNC_OUT;
749 return LIS_SUCCESS;
750 }
751
752 #undef __FUNC__
753 #define __FUNC__ "lis_matrix_scale_symm_ell"
lis_matrix_scale_symm_ell(LIS_MATRIX A,LIS_SCALAR d[])754 LIS_INT lis_matrix_scale_symm_ell(LIS_MATRIX A, LIS_SCALAR d[])
755 {
756 LIS_INT i,j,is,ie;
757 LIS_INT n,maxnzr,nprocs,my_rank;
758
759 LIS_DEBUG_FUNC_IN;
760
761 n = A->n;
762 if( A->is_splited )
763 {
764 #ifdef _OPENMP
765 nprocs = omp_get_max_threads();
766 #else
767 nprocs = 1;
768 #endif
769 #ifdef _OPENMP
770 #pragma omp parallel private(i,j,is,ie,my_rank)
771 #endif
772 {
773 #ifdef _OPENMP
774 my_rank = omp_get_thread_num();
775 #else
776 my_rank = 0;
777 #endif
778 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
779 for(i=is;i<ie;i++)
780 {
781 A->D->value[i] = 1.0;
782 }
783 for(j=0;j<A->L->maxnzr;j++)
784 {
785 for(i=is;i<ie;i++)
786 {
787 A->L->value[j*n + i] *= d[i]*d[A->L->index[j*n + i]];
788 }
789 }
790 for(j=0;j<A->U->maxnzr;j++)
791 {
792 for(i=is;i<ie;i++)
793 {
794 A->U->value[j*n + i] *= d[i]*d[A->U->index[j*n + i]];
795 }
796 }
797 }
798 }
799 else
800 {
801 maxnzr = A->maxnzr;
802 #ifdef _OPENMP
803 nprocs = omp_get_max_threads();
804 #else
805 nprocs = 1;
806 #endif
807 #ifdef _OPENMP
808 #pragma omp parallel private(i,j,is,ie,my_rank)
809 #endif
810 {
811 #ifdef _OPENMP
812 my_rank = omp_get_thread_num();
813 #else
814 my_rank = 0;
815 #endif
816 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
817 for(j=0;j<maxnzr;j++)
818 {
819 for(i=is;i<ie;i++)
820 {
821 A->value[j*n + i] *= d[i]*d[A->index[j*n + i]];;
822 }
823 }
824 }
825 }
826 LIS_DEBUG_FUNC_OUT;
827 return LIS_SUCCESS;
828 }
829
830 #undef __FUNC__
831 #define __FUNC__ "lis_matrix_solve_ell"
lis_matrix_solve_ell(LIS_MATRIX A,LIS_VECTOR B,LIS_VECTOR X,LIS_INT flag)832 LIS_INT lis_matrix_solve_ell(LIS_MATRIX A, LIS_VECTOR B, LIS_VECTOR X, LIS_INT flag)
833 {
834 LIS_INT i,j,n;
835 LIS_SCALAR t;
836 LIS_SCALAR *b,*x;
837
838 LIS_DEBUG_FUNC_IN;
839
840 n = A->n;
841 b = B->value;
842 x = X->value;
843
844 switch(flag)
845 {
846 case LIS_MATRIX_LOWER:
847 for(i=0;i<n;i++)
848 {
849 t = b[i];
850 for(j=0;j<A->L->maxnzr;j++)
851 {
852 t -= A->L->value[j*n + i] * x[A->L->index[j*n + i]];
853 }
854 x[i] = t * A->WD->value[i];
855 }
856 break;
857 case LIS_MATRIX_UPPER:
858 for(i=n-1;i>=0;i--)
859 {
860 t = b[i];
861 for(j=0;j<A->U->maxnzr;j++)
862 {
863 t -= A->U->value[j*n + i] * x[A->U->index[j*n + i]];
864 }
865 x[i] = t * A->WD->value[i];
866 }
867 break;
868 case LIS_MATRIX_SSOR:
869 for(i=0;i<n;i++)
870 {
871 t = b[i];
872 for(j=0;j<A->L->maxnzr;j++)
873 {
874 t -= A->L->value[j*n + i] * x[A->L->index[j*n + i]];
875 }
876 x[i] = t * A->WD->value[i];
877 }
878 for(i=n-1;i>=0;i--)
879 {
880 t = 0.0;
881 for(j=0;j<A->U->maxnzr;j++)
882 {
883 if( A->U->index[j*n + i]>=n ) continue;
884 t += A->U->value[j*n + i] * x[A->U->index[j*n + i]];
885 }
886 x[i] -= t * A->WD->value[i];
887 }
888 break;
889 }
890
891 LIS_DEBUG_FUNC_OUT;
892 return LIS_SUCCESS;
893 }
894
895 #undef __FUNC__
896 #define __FUNC__ "lis_matrix_solveh_ell"
lis_matrix_solveh_ell(LIS_MATRIX A,LIS_VECTOR B,LIS_VECTOR X,LIS_INT flag)897 LIS_INT lis_matrix_solveh_ell(LIS_MATRIX A, LIS_VECTOR B, LIS_VECTOR X, LIS_INT flag)
898 {
899 LIS_INT i,j,n;
900 LIS_SCALAR t;
901 LIS_SCALAR *x;
902
903 LIS_DEBUG_FUNC_IN;
904
905 n = A->n;
906 x = X->value;
907
908 lis_vector_copy(B,X);
909 switch(flag)
910 {
911 case LIS_MATRIX_LOWER:
912 for(i=0;i<n;i++)
913 {
914 x[i] = x[i] * conj(A->WD->value[i]);
915 for(j=0;j<A->U->maxnzr;j++)
916 {
917 x[A->U->index[j*n + i]] -= conj(A->U->value[j*n + i]) * x[i];
918 }
919 }
920 break;
921 case LIS_MATRIX_UPPER:
922 for(i=n-1;i>=0;i--)
923 {
924 x[i] = x[i] * conj(A->WD->value[i]);
925 for(j=0;j<A->L->maxnzr;j++)
926 {
927 x[A->L->index[j*n +i]] -= conj(A->L->value[j*n + i]) * x[i];
928 }
929 }
930 break;
931 case LIS_MATRIX_SSOR:
932 for(i=0;i<n;i++)
933 {
934 t = x[i] * conj(A->WD->value[i]);
935 for(j=0;j<A->U->maxnzr;j++)
936 {
937 x[A->U->index[j*n + i]] -= conj(A->U->value[j*n + i]) * t;
938 }
939 }
940 for(i=n-1;i>=0;i--)
941 {
942 t = x[i] * conj(A->WD->value[i]);
943 x[i] = t;
944 for(j=0;j<A->L->maxnzr;j++)
945 {
946 x[A->L->index[j*n + i]] -= conj(A->L->value[j*n + i]) * t;
947 }
948 }
949 break;
950 }
951
952 LIS_DEBUG_FUNC_OUT;
953 return LIS_SUCCESS;
954 }
955
956 #undef __FUNC__
957 #define __FUNC__ "lis_matrix_convert_csr2ell"
lis_matrix_convert_csr2ell(LIS_MATRIX Ain,LIS_MATRIX Aout)958 LIS_INT lis_matrix_convert_csr2ell(LIS_MATRIX Ain, LIS_MATRIX Aout)
959 {
960 LIS_INT i,j,k;
961 LIS_INT err;
962 LIS_INT n,maxnzr,nprocs,my_rank;
963 LIS_INT is,ie,count;
964 LIS_INT *iw;
965 LIS_INT *index;
966 LIS_SCALAR *value;
967
968 LIS_DEBUG_FUNC_IN;
969
970 n = Ain->n;
971
972 index = NULL;
973 value = NULL;
974 iw = NULL;
975
976
977 /* check maxnzr */
978 #ifdef _OPENMP
979 #define PADD 32
980 nprocs = omp_get_max_threads();
981 iw = (LIS_INT *)lis_malloc( nprocs*PADD*sizeof(LIS_INT),"lis_matrix_convert_csr2ell::iw" );
982 if( iw==NULL )
983 {
984 LIS_SETERR_MEM(nprocs*PADD*sizeof(LIS_INT));
985 return LIS_OUT_OF_MEMORY;
986 }
987 #pragma omp parallel private(i,j,k,is,ie,my_rank)
988 {
989 my_rank = omp_get_thread_num();
990 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
991
992 iw[my_rank*PADD] = 0;
993 for(i=is;i<ie;i++)
994 {
995 k = Ain->ptr[i+1] - Ain->ptr[i];
996 if( k > iw[my_rank*PADD] ) iw[my_rank*PADD] = k;
997 }
998 }
999 maxnzr = 0;
1000 for(i=0;i<nprocs;i++)
1001 {
1002 if( iw[i*PADD] > maxnzr ) maxnzr = iw[i*PADD];
1003 }
1004 lis_free(iw);
1005 #else
1006 maxnzr = 0;
1007 for(i=0;i<n;i++)
1008 {
1009 count = Ain->ptr[i+1] - Ain->ptr[i];
1010 if( count > maxnzr ) maxnzr = count;
1011 }
1012 #endif
1013
1014
1015 err = lis_matrix_malloc_ell(n,maxnzr,&index,&value);
1016 if( err )
1017 {
1018 return err;
1019 }
1020
1021 /* convert ell */
1022 #ifdef _OPENMP
1023 #pragma omp parallel private(i,j,k,is,ie,my_rank)
1024 #endif
1025 {
1026 #ifdef _OPENMP
1027 my_rank = omp_get_thread_num();
1028 nprocs = omp_get_max_threads();
1029 #else
1030 my_rank = 0;
1031 nprocs = 1;
1032 #endif
1033 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
1034
1035 for(j=0;j<maxnzr;j++)
1036 {
1037 for(i=is;i<ie;i++)
1038 {
1039 value[j*n + i] = 0.0;
1040 index[j*n + i] = i;
1041 }
1042 }
1043 for(i=is;i<ie;i++)
1044 {
1045 k=0;
1046 for(j=Ain->ptr[i];j<Ain->ptr[i+1];j++)
1047 {
1048 value[k*n + i] = Ain->value[j];
1049 index[k*n + i] = Ain->index[j];
1050 k++;
1051 }
1052 }
1053 }
1054
1055 err = lis_matrix_set_ell(maxnzr,index,value,Aout);
1056 if( err )
1057 {
1058 lis_free2(2,index,value);
1059 return err;
1060 }
1061 err = lis_matrix_assemble(Aout);
1062 if( err )
1063 {
1064 lis_matrix_storage_destroy(Aout);
1065 return err;
1066 }
1067
1068 LIS_DEBUG_FUNC_OUT;
1069 return LIS_SUCCESS;
1070 }
1071
1072 #undef __FUNC__
1073 #define __FUNC__ "lis_matrix_convert_ell2csr"
lis_matrix_convert_ell2csr(LIS_MATRIX Ain,LIS_MATRIX Aout)1074 LIS_INT lis_matrix_convert_ell2csr(LIS_MATRIX Ain, LIS_MATRIX Aout)
1075 {
1076 LIS_INT i,j,k;
1077 LIS_INT err;
1078 LIS_INT n,nnz,maxnzr,is,ie,nprocs,my_rank;
1079 LIS_INT *iw;
1080 LIS_INT *ptr,*index;
1081 LIS_SCALAR *value;
1082
1083 n = Ain->n;
1084 maxnzr = Ain->maxnzr;
1085 is = Ain->is;
1086 ie = Ain->ie;
1087
1088 ptr = NULL;
1089 index = NULL;
1090 value = NULL;
1091 iw = NULL;
1092
1093 iw = (LIS_INT *)lis_malloc( n*sizeof(LIS_INT),"lis_matrix_convert_ell2csr::iw" );
1094 if( iw==NULL )
1095 {
1096 LIS_SETERR_MEM(n*sizeof(LIS_INT));
1097 return LIS_OUT_OF_MEMORY;
1098 }
1099 ptr = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_matrix_convert_ell2csr::ptr" );
1100 if( ptr==NULL )
1101 {
1102 LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
1103 lis_free2(4,ptr,index,value,iw);
1104 return LIS_OUT_OF_MEMORY;
1105 }
1106
1107 /* check nnz */
1108 #ifdef _OPENMP
1109 #pragma omp parallel private(i,j,k,is,ie,my_rank)
1110 #endif
1111 {
1112 #ifdef _OPENMP
1113 my_rank = omp_get_thread_num();
1114 nprocs = omp_get_max_threads();
1115 #else
1116 my_rank = 0;
1117 nprocs = 1;
1118 #endif
1119 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
1120 memset(&iw[is],0,(ie-is)*sizeof(LIS_INT));
1121
1122 for(j=0;j<maxnzr;j++)
1123 {
1124 for(i=is;i<ie;i++)
1125 {
1126 if( Ain->value[j*n + i]!=(LIS_SCALAR)0.0 )
1127 {
1128 iw[i]++;
1129 }
1130 }
1131 }
1132 #ifdef _OPENMP
1133 #pragma omp for
1134 #endif
1135 for(i=0;i<n+1;i++)
1136 {
1137 ptr[i] = 0;
1138 }
1139 #ifdef _OPENMP
1140 #pragma omp single
1141 #endif
1142 for(i=0;i<n;i++)
1143 {
1144 ptr[i+1] = ptr[i] + iw[i];
1145 }
1146 #ifdef _OPENMP
1147 #pragma omp for
1148 #endif
1149 for(i=0;i<n;i++)
1150 {
1151 iw[i] = ptr[i];
1152 }
1153 }
1154 nnz = ptr[n];
1155
1156 index = (LIS_INT *)lis_malloc( nnz*sizeof(LIS_INT),"lis_matrix_convert_ell2csr::index" );
1157 if( index==NULL )
1158 {
1159 LIS_SETERR_MEM(nnz*sizeof(LIS_INT));
1160 lis_free2(4,ptr,index,value,iw);
1161 return LIS_OUT_OF_MEMORY;
1162 }
1163 value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_convert_ell2csr::value" );
1164 if( value==NULL )
1165 {
1166 LIS_SETERR_MEM(nnz*sizeof(LIS_INT));
1167 lis_free2(4,ptr,index,value,iw);
1168 return LIS_OUT_OF_MEMORY;
1169 }
1170
1171 /* convert csr */
1172 #ifdef _OPENMP
1173 #pragma omp parallel private(i,j,k,is,ie,my_rank)
1174 #endif
1175 {
1176 #ifdef _OPENMP
1177 my_rank = omp_get_thread_num();
1178 nprocs = omp_get_max_threads();
1179 #else
1180 my_rank = 0;
1181 nprocs = 1;
1182 #endif
1183 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
1184
1185 for(j=0;j<maxnzr;j++)
1186 {
1187 for(i=is;i<ie;i++)
1188 {
1189 if( Ain->value[j*n + i]!=(LIS_SCALAR)0.0 )
1190 {
1191 k = iw[i]++;
1192 value[k] = Ain->value[j*n + i];
1193 index[k] = Ain->index[j*n + i];
1194 }
1195 }
1196 }
1197 }
1198
1199 err = lis_matrix_set_csr(nnz,ptr,index,value,Aout);
1200 if( err )
1201 {
1202 lis_free2(4,ptr,index,value,iw);
1203 return err;
1204 }
1205 err = lis_matrix_assemble(Aout);
1206 if( err )
1207 {
1208 lis_free(iw);
1209 lis_matrix_storage_destroy(Aout);
1210 return err;
1211 }
1212 lis_free(iw);
1213
1214 LIS_DEBUG_FUNC_OUT;
1215 return LIS_SUCCESS;
1216 }
1217
1218