1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
5 1. Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7 2. Redistributions in binary form must reproduce the above copyright
8 notice, this list of conditions and the following disclaimer in the
9 documentation and/or other materials provided with the distribution.
10 3. Neither the name of the project nor the names of its contributors
11 may be used to endorse or promote products derived from this software
12 without specific prior written permission.
13
14 THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18 PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 #include "lis_config_win.h"
32 #endif
33 #endif
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #ifdef HAVE_MALLOC_H
38 #include <malloc.h>
39 #endif
40 #include <string.h>
41 #include <stdarg.h>
42 #include <math.h>
43 #ifdef _OPENMP
44 #include <omp.h>
45 #endif
46 #ifdef USE_MPI
47 #include <mpi.h>
48 #endif
49 #include "lislib.h"
50
51 /************************************************
52 * function | SOM |
53 *-----------------------------+-----+
54 * lis_matrix_set | o |
55 * lis_matrix_setDLU | o |
56 * lis_matrix_malloc | o |
57 * lis_matrix_elements_copy | o |
58 * lis_matrix_transpose | o |
59 * lis_matrix_split | o |
60 * lis_matrix_merge | o |
61 *-----------------------------+-----+-----+
62 * function |merge|split|
63 *-----------------------------+-----+-----|
64 * lis_matrix_convert | o | |
65 * lis_matrix_copy | o | o |
66 * lis_matrix_get_diagonal | o | o |
67 * lis_matrix_shift_diagonal | o | o |
68 * lis_matrix_axpy | o | o |
69 * lis_matrix_xpay | o | o |
70 * lis_matrix_axpyz | o | o |
71 * lis_matrix_scale | o | o |
72 * lis_matrix_scale_symm | o | o |
73 * lis_matrix_normf | o | o |
74 * lis_matrix_sort | o | o |
75 * lis_matrix_solve | xxx | o |
76 * lis_matrix_solveh | xxx | o |
77 ************************************************/
78
79 #undef __FUNC__
80 #define __FUNC__ "lis_matrix_set_dns"
lis_matrix_set_dns(LIS_SCALAR * value,LIS_MATRIX A)81 LIS_INT lis_matrix_set_dns(LIS_SCALAR *value, LIS_MATRIX A)
82 {
83 LIS_INT err;
84
85 LIS_DEBUG_FUNC_IN;
86
87 #if 0
88 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
89 if( err ) return err;
90 #else
91 if(lis_matrix_is_assembled(A)) return LIS_SUCCESS;
92 else {
93 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
94 if( err ) return err;
95 }
96 #endif
97
98 A->value = value;
99 A->is_copy = LIS_FALSE;
100 A->status = -LIS_MATRIX_DNS;
101
102 LIS_DEBUG_FUNC_OUT;
103 return LIS_SUCCESS;
104 }
105
106 #undef __FUNC__
107 #define __FUNC__ "lis_matrix_malloc_dns"
lis_matrix_malloc_dns(LIS_INT n,LIS_INT np,LIS_SCALAR ** value)108 LIS_INT lis_matrix_malloc_dns(LIS_INT n, LIS_INT np, LIS_SCALAR **value)
109 {
110 LIS_DEBUG_FUNC_IN;
111
112 *value = NULL;
113
114 *value = (LIS_SCALAR *)lis_malloc( n*np*sizeof(LIS_SCALAR),"lis_matrix_malloc_dns::value" );
115 if( *value==NULL )
116 {
117 LIS_SETERR_MEM(n*np*sizeof(LIS_SCALAR));
118 return LIS_OUT_OF_MEMORY;
119 }
120 LIS_DEBUG_FUNC_OUT;
121 return LIS_SUCCESS;
122 }
123
124 #undef __FUNC__
125 #define __FUNC__ "lis_matrix_elements_copy_dns"
lis_matrix_elements_copy_dns(LIS_INT n,LIS_INT np,LIS_SCALAR * value,LIS_SCALAR * o_value)126 LIS_INT lis_matrix_elements_copy_dns(LIS_INT n, LIS_INT np, LIS_SCALAR *value, LIS_SCALAR *o_value)
127 {
128 LIS_INT i,j,is,ie;
129 LIS_INT nprocs,my_rank;
130
131 LIS_DEBUG_FUNC_IN;
132
133 #ifdef _OPENMP
134 nprocs = omp_get_max_threads();
135 #else
136 nprocs = 1;
137 #endif
138
139 #ifdef _OPENMP
140 #pragma omp parallel private(i,j,is,ie,my_rank)
141 #endif
142 {
143 #ifdef _OPENMP
144 my_rank = omp_get_thread_num();
145 #else
146 my_rank = 0;
147 #endif
148 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
149
150 for(j=0;j<np;j++)
151 {
152 for(i=is;i<ie;i++)
153 {
154 o_value[j*n + i] = value[j*n + i];
155 }
156 }
157 }
158
159 LIS_DEBUG_FUNC_OUT;
160 return LIS_SUCCESS;
161 }
162
163 #undef __FUNC__
164 #define __FUNC__ "lis_matrix_copy_dns"
lis_matrix_copy_dns(LIS_MATRIX Ain,LIS_MATRIX Aout)165 LIS_INT lis_matrix_copy_dns(LIS_MATRIX Ain, LIS_MATRIX Aout)
166 {
167 LIS_INT err;
168 LIS_INT i,n,np;
169 LIS_SCALAR *value;
170 LIS_MATRIX_DIAG D;
171
172 LIS_DEBUG_FUNC_IN;
173
174 n = Ain->n;
175 np = Ain->np;
176 value = NULL;
177
178 err = lis_matrix_malloc_dns(n,np,&value);
179 if( err )
180 {
181 return err;
182 }
183
184 lis_matrix_elements_copy_dns(n,np,Ain->value,value);
185
186 if( Ain->is_splited )
187 {
188 err = lis_matrix_diag_duplicateM(Ain,&D);
189 if( err )
190 {
191 lis_free(value);
192 return err;
193 }
194
195 #ifdef _OPENMP
196 #pragma omp parallel for private(i)
197 #endif
198 for(i=0;i<n;i++)
199 {
200 D->value[i] = Ain->value[i*n + i];
201 }
202 Aout->D = D;
203 }
204
205 err = lis_matrix_set_dns(value,Aout);
206 if( err )
207 {
208 lis_free(value);
209 return err;
210 }
211
212 err = lis_matrix_assemble(Aout);
213 if( err )
214 {
215 lis_matrix_storage_destroy(Aout);
216 return err;
217 }
218 LIS_DEBUG_FUNC_OUT;
219 return LIS_SUCCESS;
220 }
221
222 #undef __FUNC__
223 #define __FUNC__ "lis_matrix_get_diagonal_dns"
lis_matrix_get_diagonal_dns(LIS_MATRIX A,LIS_SCALAR d[])224 LIS_INT lis_matrix_get_diagonal_dns(LIS_MATRIX A, LIS_SCALAR d[])
225 {
226 LIS_INT i;
227 LIS_INT n;
228
229 LIS_DEBUG_FUNC_IN;
230
231 n = A->n;
232 #ifdef _OPENMP
233 #pragma omp parallel for private(i)
234 #endif
235 for(i=0; i<n; i++)
236 {
237 d[i] = A->value[i*n + i];
238 }
239 LIS_DEBUG_FUNC_OUT;
240 return LIS_SUCCESS;
241 }
242
243 #undef __FUNC__
244 #define __FUNC__ "lis_matrix_shift_diagonal_dns"
lis_matrix_shift_diagonal_dns(LIS_MATRIX A,LIS_SCALAR sigma)245 LIS_INT lis_matrix_shift_diagonal_dns(LIS_MATRIX A, LIS_SCALAR sigma)
246 {
247 LIS_INT i;
248 LIS_INT n;
249
250 LIS_DEBUG_FUNC_IN;
251
252 n = A->n;
253
254 if( A->is_splited )
255 {
256 #ifdef _OPENMP
257 #pragma omp parallel for private(i)
258 #endif
259 for(i=0; i<n; i++)
260 {
261 A->D->value[i] -= sigma;
262 }
263 }
264 else
265 {
266 #ifdef _OPENMP
267 #pragma omp parallel for private(i)
268 #endif
269 for(i=0; i<n; i++)
270 {
271 A->value[i*n + i] -= sigma;
272 }
273 }
274 LIS_DEBUG_FUNC_OUT;
275 return LIS_SUCCESS;
276 }
277
278 #undef __FUNC__
279 #define __FUNC__ "lis_matrix_axpy_dns"
lis_matrix_axpy_dns(LIS_SCALAR alpha,LIS_MATRIX A,LIS_MATRIX B)280 LIS_INT lis_matrix_axpy_dns(LIS_SCALAR alpha, LIS_MATRIX A, LIS_MATRIX B)
281 {
282 LIS_INT i,j,is,ie,n,np;
283 LIS_INT nprocs,my_rank;
284
285 LIS_DEBUG_FUNC_IN;
286
287 n = A->n;
288 np = A->np;
289
290 #ifdef _OPENMP
291 nprocs = omp_get_max_threads();
292 #else
293 nprocs = 1;
294 #endif
295
296 #ifdef _OPENMP
297 #pragma omp parallel private(i,j,is,ie,my_rank)
298 #endif
299 {
300 #ifdef _OPENMP
301 my_rank = omp_get_thread_num();
302 #else
303 my_rank = 0;
304 #endif
305 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
306
307 for(j=0;j<np;j++)
308 {
309 for(i=is;i<ie;i++)
310 {
311 B->value[j*n + i] += alpha * A->value[j*n + i];
312 }
313 }
314 }
315
316 if( A->is_splited && B->is_splited )
317 {
318 #ifdef _OPENMP
319 #pragma omp parallel for private(i)
320 #endif
321 for(i=0;i<n;i++)
322 {
323 B->D->value[i] += alpha * A->D->value[i];
324 }
325 }
326 else if( A->is_splited || B->is_splited )
327 {
328 LIS_SETERR(LIS_ERR_ILL_ARG,"either A or B is not splited\n");
329 return LIS_ERR_ILL_ARG;
330 }
331
332 LIS_DEBUG_FUNC_OUT;
333 return LIS_SUCCESS;
334 }
335
336 #undef __FUNC__
337 #define __FUNC__ "lis_matrix_xpay_dns"
lis_matrix_xpay_dns(LIS_SCALAR alpha,LIS_MATRIX A,LIS_MATRIX B)338 LIS_INT lis_matrix_xpay_dns(LIS_SCALAR alpha, LIS_MATRIX A, LIS_MATRIX B)
339 {
340 LIS_INT i,j,is,ie,n,np;
341 LIS_INT nprocs,my_rank;
342
343 LIS_DEBUG_FUNC_IN;
344
345 n = A->n;
346 np = A->np;
347
348 #ifdef _OPENMP
349 nprocs = omp_get_max_threads();
350 #else
351 nprocs = 1;
352 #endif
353
354 #ifdef _OPENMP
355 #pragma omp parallel private(i,j,is,ie,my_rank)
356 #endif
357 {
358 #ifdef _OPENMP
359 my_rank = omp_get_thread_num();
360 #else
361 my_rank = 0;
362 #endif
363 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
364
365 for(j=0;j<np;j++)
366 {
367 for(i=is;i<ie;i++)
368 {
369 B->value[j*n + i] = A->value[j*n + i] + alpha * B->value[j*n + i];
370 }
371 }
372 }
373
374 if( A->is_splited && B->is_splited )
375 {
376 #ifdef _OPENMP
377 #pragma omp parallel for private(i)
378 #endif
379 for(i=0;i<n;i++)
380 {
381 B->D->value[i] = A->D->value[i] + alpha * B->D->value[i];
382 }
383 }
384 else if( A->is_splited || B->is_splited )
385 {
386 LIS_SETERR(LIS_ERR_ILL_ARG,"either A or B is not splited\n");
387 return LIS_ERR_ILL_ARG;
388 }
389
390 LIS_DEBUG_FUNC_OUT;
391 return LIS_SUCCESS;
392 }
393
394 #undef __FUNC__
395 #define __FUNC__ "lis_matrix_axpyz_dns"
lis_matrix_axpyz_dns(LIS_SCALAR alpha,LIS_MATRIX A,LIS_MATRIX B,LIS_MATRIX C)396 LIS_INT lis_matrix_axpyz_dns(LIS_SCALAR alpha, LIS_MATRIX A, LIS_MATRIX B, LIS_MATRIX C)
397 {
398 LIS_INT i,j,is,ie,n,np;
399 LIS_INT nprocs,my_rank;
400
401 LIS_DEBUG_FUNC_IN;
402
403 n = A->n;
404 np = A->np;
405
406 #ifdef _OPENMP
407 nprocs = omp_get_max_threads();
408 #else
409 nprocs = 1;
410 #endif
411
412 #ifdef _OPENMP
413 #pragma omp parallel private(i,j,is,ie,my_rank)
414 #endif
415 {
416 #ifdef _OPENMP
417 my_rank = omp_get_thread_num();
418 #else
419 my_rank = 0;
420 #endif
421 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
422
423 for(j=0;j<np;j++)
424 {
425 for(i=is;i<ie;i++)
426 {
427 C->value[j*n + i] = alpha * A->value[j*n + i] + B->value[j*n + i];
428 }
429 }
430 }
431
432 if( A->is_splited && B->is_splited && C->is_splited )
433 {
434 #ifdef _OPENMP
435 #pragma omp parallel for private(i)
436 #endif
437 for(i=0;i<n;i++)
438 {
439 C->D->value[i] = alpha * A->D->value[i] + B->D->value[i];
440 }
441 }
442 else if( A->is_splited || B->is_splited || C->is_splited )
443 {
444 LIS_SETERR(LIS_ERR_ILL_ARG,"either A, B or C is not splited\n");
445 return LIS_ERR_ILL_ARG;
446 }
447
448 LIS_DEBUG_FUNC_OUT;
449 return LIS_SUCCESS;
450 }
451
452 #undef __FUNC__
453 #define __FUNC__ "lis_matrix_scale_dns"
lis_matrix_scale_dns(LIS_MATRIX A,LIS_SCALAR d[])454 LIS_INT lis_matrix_scale_dns(LIS_MATRIX A, LIS_SCALAR d[])
455 {
456 LIS_INT i,j;
457 LIS_INT n,np;
458
459 LIS_DEBUG_FUNC_IN;
460
461 n = A->n;
462 np = A->np;
463 for(j=0;j<np;j++)
464 {
465 for(i=0;i<n;i++)
466 {
467 A->value[j*n + i] *= d[i];
468 }
469 }
470 LIS_DEBUG_FUNC_OUT;
471 return LIS_SUCCESS;
472 }
473
474 #undef __FUNC__
475 #define __FUNC__ "lis_matrix_scale_symm_dns"
lis_matrix_scale_symm_dns(LIS_MATRIX A,LIS_SCALAR d[])476 LIS_INT lis_matrix_scale_symm_dns(LIS_MATRIX A, LIS_SCALAR d[])
477 {
478 LIS_INT i,j;
479 LIS_INT n,np;
480
481 LIS_DEBUG_FUNC_IN;
482
483 n = A->n;
484 np = A->np;
485 for(j=0;j<np;j++)
486 {
487 for(i=0;i<n;i++)
488 {
489 A->value[j*n + i] *= d[i]*d[j];
490 }
491 }
492 LIS_DEBUG_FUNC_OUT;
493 return LIS_SUCCESS;
494 }
495
496 #undef __FUNC__
497 #define __FUNC__ "lis_matrix_normf_dns"
lis_matrix_normf_dns(LIS_MATRIX A,LIS_SCALAR * nrm)498 LIS_INT lis_matrix_normf_dns(LIS_MATRIX A, LIS_SCALAR *nrm)
499 {
500 LIS_INT i,j;
501 LIS_INT n;
502 LIS_SCALAR sum;
503
504 LIS_DEBUG_FUNC_IN;
505
506 n = A->n;
507 sum = (LIS_SCALAR)0;
508 if( A->is_splited )
509 {
510 #ifdef _OPENMP
511 #pragma omp parallel for reduction(+:sum) private(i,j)
512 #endif
513 for(i=0; i<n; i++)
514 {
515 sum += A->D->value[i]*A->D->value[i];
516 for(j=A->L->index[i];j<A->L->index[i+1];j++)
517 {
518 sum += A->L->value[j]*A->L->value[j];
519 }
520 for(j=A->U->index[i];j<A->U->index[i+1];j++)
521 {
522 sum += A->U->value[j]*A->U->value[j];
523 }
524 }
525 }
526 else
527 {
528 #ifdef _OPENMP
529 #pragma omp parallel for reduction(+:sum) private(i,j)
530 #endif
531 for(i=0; i<n; i++)
532 {
533 sum += A->value[i]*A->value[i];
534 for(j=A->index[i];j<A->index[i+1];j++)
535 {
536 sum += A->value[j]*A->value[j];
537 }
538 }
539 }
540 *nrm = sqrt(sum);
541 LIS_DEBUG_FUNC_OUT;
542 return LIS_SUCCESS;
543 }
544
545 #undef __FUNC__
546 #define __FUNC__ "lis_matrix_transpose_dns"
lis_matrix_transpose_dns(LIS_MATRIX Ain,LIS_MATRIX * Aout)547 LIS_INT lis_matrix_transpose_dns(LIS_MATRIX Ain, LIS_MATRIX *Aout)
548 {
549
550 LIS_DEBUG_FUNC_IN;
551
552 /* err = lis_matrix_convert_dns2csc(Ain,Aout);*/
553 (*Aout)->matrix_type = LIS_MATRIX_DNS;
554 (*Aout)->status = LIS_MATRIX_DNS;
555
556 LIS_DEBUG_FUNC_OUT;
557 return LIS_SUCCESS;
558 }
559
560 #undef __FUNC__
561 #define __FUNC__ "lis_matrix_split_dns"
lis_matrix_split_dns(LIS_MATRIX A)562 LIS_INT lis_matrix_split_dns(LIS_MATRIX A)
563 {
564 LIS_INT i,n;
565 LIS_INT err;
566 LIS_MATRIX_DIAG D;
567
568 LIS_DEBUG_FUNC_IN;
569
570 n = A->n;
571
572 err = lis_matrix_diag_duplicateM(A,&D);
573 if( err )
574 {
575 return err;
576 }
577
578 for(i=0;i<n;i++)
579 {
580 D->value[i] = A->value[i*n + i];
581 }
582 A->D = D;
583 A->is_splited = LIS_TRUE;
584 A->is_save = LIS_TRUE;
585
586 LIS_DEBUG_FUNC_OUT;
587 return LIS_SUCCESS;
588 }
589
590
591 #undef __FUNC__
592 #define __FUNC__ "lis_matrix_merge_dns"
lis_matrix_merge_dns(LIS_MATRIX A)593 LIS_INT lis_matrix_merge_dns(LIS_MATRIX A)
594 {
595 LIS_DEBUG_FUNC_IN;
596
597 A->is_splited = LIS_FALSE;
598
599 LIS_DEBUG_FUNC_OUT;
600 return LIS_SUCCESS;
601 }
602
603 #undef __FUNC__
604 #define __FUNC__ "lis_matrix_sort_dns"
lis_matrix_sort_dns(LIS_MATRIX A)605 LIS_INT lis_matrix_sort_dns(LIS_MATRIX A)
606 {
607 LIS_DEBUG_FUNC_IN;
608
609 A->is_sorted = LIS_TRUE;
610
611 LIS_DEBUG_FUNC_OUT;
612 return LIS_SUCCESS;
613 }
614
615 #undef __FUNC__
616 #define __FUNC__ "lis_matrix_solve_dns"
lis_matrix_solve_dns(LIS_MATRIX A,LIS_VECTOR B,LIS_VECTOR X,LIS_INT flag)617 LIS_INT lis_matrix_solve_dns(LIS_MATRIX A, LIS_VECTOR B, LIS_VECTOR X, LIS_INT flag)
618 {
619 LIS_INT i,j,n,np;
620 LIS_SCALAR t;
621 LIS_SCALAR *b,*x;
622
623 LIS_DEBUG_FUNC_IN;
624
625 n = A->n;
626 np = A->np;
627 b = B->value;
628 x = X->value;
629
630 switch(flag)
631 {
632 case LIS_MATRIX_LOWER:
633 for(i=0;i<n;i++)
634 {
635 t = b[i];
636 for(j=0;j<i;j++)
637 {
638 t -= A->value[j*n + i] * x[j];
639 }
640 x[i] = t * A->WD->value[i];
641 }
642 break;
643 case LIS_MATRIX_UPPER:
644 for(i=n-1;i>=0;i--)
645 {
646 t = b[i];
647 for(j=i+1;j<np;j++)
648 {
649 t -= A->value[j*n + i] * x[j];
650 }
651 x[i] = t * A->WD->value[i];
652 }
653 break;
654 case LIS_MATRIX_SSOR:
655 for(i=0;i<n;i++)
656 {
657 t = b[i];
658 for(j=0;j<i;j++)
659 {
660
661 t -= A->value[j*n + i] * x[j];
662 }
663 x[i] = t * A->WD->value[i];
664 }
665 for(i=n-1;i>=0;i--)
666 {
667 t = 0.0;
668 for(j=i+1;j<n;j++)
669 {
670 t += A->value[j*n + i] * x[j];
671 }
672 x[i] -= t * A->WD->value[i];
673 }
674 break;
675 }
676
677 LIS_DEBUG_FUNC_OUT;
678 return LIS_SUCCESS;
679 }
680
681 #undef __FUNC__
682 #define __FUNC__ "lis_matrix_solveh_dns"
lis_matrix_solveh_dns(LIS_MATRIX A,LIS_VECTOR B,LIS_VECTOR X,LIS_INT flag)683 LIS_INT lis_matrix_solveh_dns(LIS_MATRIX A, LIS_VECTOR B, LIS_VECTOR X, LIS_INT flag)
684 {
685 LIS_INT i,j,n,np;
686 LIS_SCALAR t;
687 LIS_SCALAR *x;
688
689 LIS_DEBUG_FUNC_IN;
690
691 n = A->n;
692 np = A->np;
693 x = X->value;
694
695 lis_vector_copy(B,X);
696 switch(flag)
697 {
698 case LIS_MATRIX_LOWER:
699 for(i=0;i<n;i++)
700 {
701 x[i] = x[i] * conj(A->WD->value[i]);
702 for(j=i+1;j<np;j++)
703 {
704 x[j] -= conj(A->value[j*n + i]) * x[i];
705 }
706 }
707 break;
708 case LIS_MATRIX_UPPER:
709 for(i=n-1;i>=0;i--)
710 {
711 x[i] = x[i] * conj(A->WD->value[i]);
712 for(j=0;j<i;j++)
713 {
714 x[j] -= conj(A->value[j*n + i]) * x[i];
715 }
716 }
717 break;
718 case LIS_MATRIX_SSOR:
719 for(i=0;i<n;i++)
720 {
721 t = x[i] * conj(A->WD->value[i]);
722 for(j=i+1;j<np;j++)
723 {
724 x[j] -= conj(A->value[j*n + i]) * t;
725 }
726 }
727 for(i=n-1;i>=0;i--)
728 {
729 t = x[i] * conj(A->WD->value[i]);
730 x[i] = t;
731 for(j=0;j<i;j++)
732 {
733 x[j] -= conj(A->value[j*n + i]) * t;
734 }
735 }
736 break;
737 }
738
739 LIS_DEBUG_FUNC_OUT;
740 return LIS_SUCCESS;
741 }
742
743 #undef __FUNC__
744 #define __FUNC__ "lis_matrix_convert_csr2dns"
lis_matrix_convert_csr2dns(LIS_MATRIX Ain,LIS_MATRIX Aout)745 LIS_INT lis_matrix_convert_csr2dns(LIS_MATRIX Ain, LIS_MATRIX Aout)
746 {
747 LIS_INT i,j;
748 LIS_INT err;
749 LIS_INT n,np,nprocs,my_rank;
750 LIS_INT is,ie;
751 LIS_SCALAR *value;
752
753 LIS_DEBUG_FUNC_IN;
754
755
756 n = Ain->n;
757 np = Ain->np;
758 #ifdef _OPENMP
759 nprocs = omp_get_max_threads();
760 #else
761 nprocs = 1;
762 #endif
763
764 value = NULL;
765
766 err = lis_matrix_malloc_dns(n,np,&value);
767 if( err )
768 {
769 return err;
770 }
771
772 /* convert dns */
773 #ifdef _OPENMP
774 #pragma omp parallel private(i,j,is,ie,my_rank)
775 #endif
776 {
777 #ifdef _OPENMP
778 my_rank = omp_get_thread_num();
779 #else
780 my_rank = 0;
781 #endif
782 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
783
784 for(j=0;j<np;j++)
785 {
786 for(i=is;i<ie;i++)
787 {
788 value[j*n+i] = (LIS_SCALAR)0.0;
789 }
790 }
791 for(i=is;i<ie;i++)
792 {
793 for(j=Ain->ptr[i];j<Ain->ptr[i+1];j++)
794 {
795 value[i + n*Ain->index[j]] = Ain->value[j];
796 }
797 }
798 }
799
800 err = lis_matrix_set_dns(value,Aout);
801 if( err )
802 {
803 lis_free(value);
804 return err;
805 }
806 err = lis_matrix_assemble(Aout);
807 if( err )
808 {
809 lis_matrix_storage_destroy(Aout);
810 return err;
811 }
812
813 LIS_DEBUG_FUNC_OUT;
814 return LIS_SUCCESS;
815 }
816
817 #undef __FUNC__
818 #define __FUNC__ "lis_matrix_convert_dns2csr"
lis_matrix_convert_dns2csr(LIS_MATRIX Ain,LIS_MATRIX Aout)819 LIS_INT lis_matrix_convert_dns2csr(LIS_MATRIX Ain, LIS_MATRIX Aout)
820 {
821 LIS_INT i,j,k;
822 LIS_INT err;
823 LIS_INT n,np,nnz;
824 LIS_INT *ptr,*index;
825 LIS_SCALAR *value;
826
827 LIS_DEBUG_FUNC_IN;
828
829 n = Ain->n;
830 np = Ain->np;
831
832 ptr = NULL;
833 index = NULL;
834 value = NULL;
835
836 ptr = (LIS_INT *)lis_malloc( (n+1)*sizeof(LIS_INT),"lis_matrix_convert_dns2csr::ptr" );
837 if( ptr==NULL )
838 {
839 LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
840 return LIS_OUT_OF_MEMORY;
841 }
842
843 #ifdef _OPENMP
844 #pragma omp parallel for private(i,j)
845 #endif
846 for(i=0;i<n;i++)
847 {
848 ptr[i+1] = 0;
849 for(j=0;j<np;j++)
850 {
851 if( Ain->value[j*n+i]!=(LIS_SCALAR)0.0 )
852 {
853 ptr[i+1]++;
854 }
855 }
856 }
857 ptr[0] = 0;
858 for(i=0;i<n;i++)
859 {
860 ptr[i+1] += ptr[i];
861 }
862 nnz = ptr[n];
863
864 index = (LIS_INT *)lis_malloc( nnz*sizeof(LIS_INT),"lis_matrix_convert_dns2csr::index" );
865 if( index==NULL )
866 {
867 lis_free2(3,ptr,index,value);
868 LIS_SETERR_MEM(nnz*sizeof(LIS_INT));
869 return LIS_OUT_OF_MEMORY;
870 }
871 value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_convert_dns2csr::value" );
872 if( value==NULL )
873 {
874 lis_free2(3,ptr,index,value);
875 LIS_SETERR_MEM(nnz*sizeof(LIS_INT));
876 return LIS_OUT_OF_MEMORY;
877 }
878
879 /* convert csr */
880 #ifdef _OPENMP
881 #pragma omp parallel for private(i,j,k)
882 #endif
883 for(i=0;i<n;i++)
884 {
885 k = ptr[i];
886 for(j=0;j<np;j++)
887 {
888 if( Ain->value[j*n + i]!=(LIS_SCALAR)0.0 )
889 {
890 value[k] = Ain->value[j*n + i];
891 index[k] = j;
892 k++;
893 }
894 }
895 }
896
897 err = lis_matrix_set_csr(nnz,ptr,index,value,Aout);
898 if( err )
899 {
900 lis_free2(3,ptr,index,value);
901 return err;
902 }
903 err = lis_matrix_assemble(Aout);
904 if( err )
905 {
906 lis_matrix_storage_destroy(Aout);
907 return err;
908 }
909
910 LIS_DEBUG_FUNC_OUT;
911 return LIS_SUCCESS;
912 }
913
914