1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
5 1. Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7 2. Redistributions in binary form must reproduce the above copyright
8 notice, this list of conditions and the following disclaimer in the
9 documentation and/or other materials provided with the distribution.
10 3. Neither the name of the project nor the names of its contributors
11 may be used to endorse or promote products derived from this software
12 without specific prior written permission.
13
14 THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18 PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 #include "lis_config_win.h"
32 #endif
33 #endif
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #ifdef HAVE_MALLOC_H
38 #include <malloc.h>
39 #endif
40 #include <string.h>
41 #include <stdarg.h>
42 #include <math.h>
43 #ifdef _OPENMP
44 #include <omp.h>
45 #endif
46 #ifdef USE_MPI
47 #include <mpi.h>
48 #endif
49 #include "lislib.h"
50
51 /************************************************
52 * function | SOM |
53 *-----------------------------+-----+
54 * lis_matrix_set | o |
55 * lis_matrix_setDLU | o |
56 * lis_matrix_malloc | o |
57 * lis_matrix_elements_copy | o |
58 * lis_matrix_transpose | o |
59 * lis_matrix_split | o |
60 * lis_matrix_merge | o |
61 *-----------------------------+-----+-----+
62 * function |merge|split|
63 *-----------------------------+-----+-----|
64 * lis_matrix_convert | o | |
65 * lis_matrix_copy | o | o |
66 * lis_matrix_get_diagonal | o | o |
67 * lis_matrix_shift_diagonal | o | o |
68 * lis_matrix_scale | o | o |
69 * lis_matrix_scale_symm | o | o |
70 * lis_matrix_normf | o | o |
71 * lis_matrix_sort | o | o |
72 * lis_matrix_solve | xxx | o |
73 * lis_matrix_solveh | xxx | o |
74 ************************************************/
75
76 #undef __FUNC__
77 #define __FUNC__ "lis_matrix_set_jad"
lis_matrix_set_jad(LIS_INT nnz,LIS_INT maxnzr,LIS_INT * perm,LIS_INT * ptr,LIS_INT * index,LIS_SCALAR * value,LIS_MATRIX A)78 LIS_INT lis_matrix_set_jad(LIS_INT nnz, LIS_INT maxnzr, LIS_INT *perm, LIS_INT *ptr, LIS_INT *index, LIS_SCALAR *value, LIS_MATRIX A)
79 {
80 LIS_INT i,n;
81 LIS_INT *col;
82 LIS_INT err;
83
84 LIS_DEBUG_FUNC_IN;
85
86 #if 0
87 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
88 if( err ) return err;
89 #else
90 if(lis_matrix_is_assembled(A)) {
91 LIS_DEBUG_FUNC_OUT;
92 return LIS_SUCCESS;
93 }
94 else {
95 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
96 if( err ) return err;
97 }
98 #endif
99
100 n = A->n;
101 col = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_set_jad::col");
102 if( col==NULL )
103 {
104 LIS_SETERR_MEM(n*sizeof(LIS_INT));
105 return LIS_OUT_OF_MEMORY;
106 }
107
108 for(i=0;i<n;i++)
109 {
110 col[perm[i]] = i;
111 }
112
113 A->col = col;
114 A->row = perm;
115 A->ptr = ptr;
116 A->index = index;
117 A->value = value;
118 A->is_copy = LIS_FALSE;
119 A->status = -LIS_MATRIX_JAD;
120 A->nnz = nnz;
121 A->maxnzr = maxnzr;
122
123 LIS_DEBUG_FUNC_OUT;
124 return LIS_SUCCESS;
125 }
126
127 #undef __FUNC__
128 #define __FUNC__ "lis_matrix_setDLU_jad"
lis_matrix_setDLU_jad(LIS_INT lnnz,LIS_INT unnz,LIS_INT lmaxnzr,LIS_INT umaxnzr,LIS_SCALAR * diag,LIS_INT * lperm,LIS_INT * lptr,LIS_INT * lindex,LIS_SCALAR * lvalue,LIS_INT * uperm,LIS_INT * uptr,LIS_INT * uindex,LIS_SCALAR * uvalue,LIS_MATRIX A)129 LIS_INT lis_matrix_setDLU_jad(LIS_INT lnnz, LIS_INT unnz, LIS_INT lmaxnzr, LIS_INT umaxnzr, LIS_SCALAR *diag, LIS_INT *lperm, LIS_INT *lptr, LIS_INT *lindex, LIS_SCALAR *lvalue, LIS_INT *uperm, LIS_INT *uptr, LIS_INT *uindex, LIS_SCALAR *uvalue, LIS_MATRIX A)
130 {
131 LIS_INT n,i,err;
132 LIS_INT *lcol,*ucol;
133 LIS_MATRIX_DIAG D;
134
135 LIS_DEBUG_FUNC_IN;
136
137 n = A->n;
138
139 #if 0
140 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
141 if( err ) return err;
142 #else
143 if(lis_matrix_is_assembled(A)) return LIS_SUCCESS;
144 else {
145 err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
146 if( err ) return err;
147 }
148 #endif
149
150 A->L = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT),"lis_matrix_setDLU_jad::A->L");
151 if( A->L==NULL )
152 {
153 LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT));
154 return LIS_OUT_OF_MEMORY;
155 }
156 A->U = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT),"lis_matrix_setDLU_jad::A->U");
157 if( A->U==NULL )
158 {
159 LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT));
160 lis_matrix_DLU_destroy(A);
161 return LIS_OUT_OF_MEMORY;
162 }
163 err = lis_matrix_diag_create(A->n,0,A->comm,&D);
164 if( err )
165 {
166 lis_matrix_DLU_destroy(A);
167 return err;
168 }
169 lcol = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_setDLU_jad::lcol");
170 if( lcol==NULL )
171 {
172 LIS_SETERR_MEM(n*sizeof(LIS_INT));
173 lis_matrix_DLU_destroy(A);
174 return LIS_OUT_OF_MEMORY;
175 }
176 ucol = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_setDLU_jad::ucol");
177 if( ucol==NULL )
178 {
179 LIS_SETERR_MEM(n*sizeof(LIS_INT));
180 lis_matrix_DLU_destroy(A);
181 lis_free(lcol);
182 return LIS_OUT_OF_MEMORY;
183 }
184
185 for(i=0;i<n;i++)
186 {
187 lcol[lperm[i]] = i;
188 ucol[uperm[i]] = i;
189 }
190
191 lis_free(D->value);
192 D->value = diag;
193 A->D = D;
194 A->L->nnz = lnnz;
195 A->L->maxnzr = lmaxnzr;
196 A->L->col = lcol;
197 A->L->row = lperm;
198 A->L->ptr = lptr;
199 A->L->index = lindex;
200 A->L->value = lvalue;
201 A->U->nnz = unnz;
202 A->U->maxnzr = umaxnzr;
203 A->U->col = ucol;
204 A->U->row = uperm;
205 A->U->ptr = uptr;
206 A->U->index = uindex;
207 A->U->value = uvalue;
208 A->is_copy = LIS_FALSE;
209 A->status = -LIS_MATRIX_JAD;
210 A->is_splited = LIS_TRUE;
211
212 LIS_DEBUG_FUNC_OUT;
213 return LIS_SUCCESS;
214 }
215
216 #undef __FUNC__
217 #define __FUNC__ "lis_matrix_malloc_jad"
lis_matrix_malloc_jad(LIS_INT n,LIS_INT nnz,LIS_INT maxnzr,LIS_INT ** perm,LIS_INT ** ptr,LIS_INT ** index,LIS_SCALAR ** value)218 LIS_INT lis_matrix_malloc_jad(LIS_INT n, LIS_INT nnz, LIS_INT maxnzr, LIS_INT **perm, LIS_INT **ptr, LIS_INT **index, LIS_SCALAR **value)
219 {
220 LIS_INT nprocs;
221
222 LIS_DEBUG_FUNC_IN;
223
224 *perm = NULL;
225 *ptr = NULL;
226 *index = NULL;
227 *value = NULL;
228 #ifdef _OPENMP
229 nprocs = omp_get_max_threads();
230 #else
231 nprocs = 1;
232 #endif
233
234 *perm = (LIS_INT *)lis_malloc( n*sizeof(LIS_INT),"lis_matrix_malloc_jad::perm" );
235 if( *perm==NULL )
236 {
237 LIS_SETERR_MEM(n*sizeof(LIS_INT));
238 lis_free2(4,*perm,*ptr,*index,*value);
239 return LIS_OUT_OF_MEMORY;
240 }
241 *ptr = (LIS_INT *)lis_malloc( nprocs*(maxnzr+1)*sizeof(LIS_INT),"lis_matrix_malloc_jad::ptr" );
242 if( *ptr==NULL )
243 {
244 LIS_SETERR_MEM(nprocs*(maxnzr+1)*sizeof(LIS_INT));
245 lis_free2(4,*perm,*ptr,*index,*value);
246 return LIS_OUT_OF_MEMORY;
247 }
248 *index = (LIS_INT *)lis_malloc( nnz*sizeof(LIS_INT),"lis_matrix_malloc_jad::index" );
249 if( *index==NULL )
250 {
251 LIS_SETERR_MEM(nnz*sizeof(LIS_INT));
252 lis_free2(4,*perm,*ptr,*index,*value);
253 return LIS_OUT_OF_MEMORY;
254 }
255 *value = (LIS_SCALAR *)lis_malloc( nnz*sizeof(LIS_SCALAR),"lis_matrix_malloc_jad::value" );
256 if( *value==NULL )
257 {
258 LIS_SETERR_MEM(nnz*sizeof(LIS_SCALAR));
259 lis_free2(4,*perm,*ptr,*index,*value);
260 return LIS_OUT_OF_MEMORY;
261 }
262 LIS_DEBUG_FUNC_OUT;
263 return LIS_SUCCESS;
264 }
265
266
267 #undef __FUNC__
268 #define __FUNC__ "lis_matrix_elements_copy_jad"
lis_matrix_elements_copy_jad(LIS_INT n,LIS_INT maxnzr,LIS_INT * perm,LIS_INT * ptr,LIS_INT * index,LIS_SCALAR * value,LIS_INT * o_perm,LIS_INT * o_ptr,LIS_INT * o_index,LIS_SCALAR * o_value)269 LIS_INT lis_matrix_elements_copy_jad(LIS_INT n, LIS_INT maxnzr, LIS_INT *perm, LIS_INT *ptr, LIS_INT *index, LIS_SCALAR *value, LIS_INT *o_perm, LIS_INT *o_ptr, LIS_INT *o_index, LIS_SCALAR *o_value)
270 {
271 LIS_INT i,j,is,ie;
272 LIS_INT nprocs,my_rank;
273
274 LIS_DEBUG_FUNC_IN;
275
276 #ifdef _OPENMP
277 nprocs = omp_get_max_threads();
278 #else
279 nprocs = 1;
280 #endif
281
282 #ifdef _OPENMP
283 #pragma omp parallel private(i,j,is,ie,my_rank)
284 #endif
285 {
286 #ifdef _OPENMP
287 my_rank = omp_get_thread_num();
288 #else
289 my_rank = 0;
290 #endif
291 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
292
293 for(j=0;j<maxnzr+1;j++)
294 {
295 o_ptr[my_rank*(maxnzr+1) + j] = ptr[my_rank*(maxnzr+1) + j];
296 }
297 for(i=is;i<ie;i++)
298 {
299 o_perm[i] = perm[i];
300 }
301 for(j=0;j<maxnzr;j++)
302 {
303 for(i=ptr[my_rank*(maxnzr+1) + j];i<ptr[my_rank*(maxnzr+1) + j+1];i++)
304 {
305 o_value[i] = value[i];
306 o_index[i] = index[i];
307 }
308 }
309 }
310
311 LIS_DEBUG_FUNC_OUT;
312 return LIS_SUCCESS;
313 }
314
315 #undef __FUNC__
316 #define __FUNC__ "lis_matrix_copy_jad"
lis_matrix_copy_jad(LIS_MATRIX Ain,LIS_MATRIX Aout)317 LIS_INT lis_matrix_copy_jad(LIS_MATRIX Ain, LIS_MATRIX Aout)
318 {
319 LIS_INT err;
320 LIS_INT i,n,nnz,lnnz,unnz,maxnzr,lmaxnzr,umaxnzr;
321 LIS_INT *perm,*ptr,*index;
322 LIS_INT *lperm,*lptr,*lindex;
323 LIS_INT *uperm,*uptr,*uindex;
324 LIS_SCALAR *value,*lvalue,*uvalue,*diag;
325
326 LIS_DEBUG_FUNC_IN;
327
328 n = Ain->n;
329
330 if( Ain->is_splited )
331 {
332 lmaxnzr = Ain->L->maxnzr;
333 umaxnzr = Ain->U->maxnzr;
334 lnnz = Ain->L->nnz;
335 unnz = Ain->U->nnz;
336 lperm = NULL;
337 lptr = NULL;
338 lindex = NULL;
339 uperm = NULL;
340 uptr = NULL;
341 uindex = NULL;
342 diag = NULL;
343
344 err = lis_matrix_malloc_jad(n,lnnz,lmaxnzr,&lperm,&lptr,&lindex,&lvalue);
345 if( err )
346 {
347 return err;
348 }
349 err = lis_matrix_malloc_jad(n,unnz,umaxnzr,&uperm,&uptr,&uindex,&uvalue);
350 if( err )
351 {
352 lis_free2(9,diag,uperm,lperm,uptr,lptr,uindex,lindex,uvalue,lvalue);
353 return err;
354 }
355 diag = (LIS_SCALAR *)lis_malloc(n*sizeof(LIS_SCALAR),"lis_matrix_copy_jad::diag");
356 if( diag==NULL )
357 {
358 lis_free2(9,diag,uperm,lperm,uptr,lptr,uindex,lindex,uvalue,lvalue);
359 return err;
360 }
361
362 #ifdef _OPENMP
363 #pragma omp parallel for private(i)
364 #endif
365 for(i=0;i<n;i++)
366 {
367 diag[i] = Ain->D->value[i];
368 }
369 lis_matrix_elements_copy_jad(n,lmaxnzr,Ain->L->row,Ain->L->ptr,Ain->L->index,Ain->L->value,lperm,lptr,lindex,lvalue);
370 lis_matrix_elements_copy_jad(n,umaxnzr,Ain->U->row,Ain->U->ptr,Ain->U->index,Ain->U->value,uperm,uptr,uindex,uvalue);
371
372 err = lis_matrix_setDLU_jad(lnnz,unnz,lmaxnzr,umaxnzr,diag,lperm,lptr,lindex,lvalue,uperm,uptr,uindex,uvalue,Aout);
373 if( err )
374 {
375 lis_free2(9,diag,uperm,lperm,uptr,lptr,uindex,lindex,uvalue,lvalue);
376 return err;
377 }
378 }
379 if( !Ain->is_splited || (Ain->is_splited && Ain->is_save) )
380 {
381 perm = NULL;
382 ptr = NULL;
383 index = NULL;
384 value = NULL;
385 maxnzr = Ain->maxnzr;
386 nnz = Ain->nnz;
387 err = lis_matrix_malloc_jad(n,nnz,maxnzr,&perm,&ptr,&index,&value);
388 if( err )
389 {
390 return err;
391 }
392
393 lis_matrix_elements_copy_jad(n,maxnzr,Ain->row,Ain->ptr,Ain->index,Ain->value,perm,ptr,index,value);
394
395 err = lis_matrix_set_jad(nnz,maxnzr,perm,ptr,index,value,Aout);
396 if( err )
397 {
398 lis_free2(4,perm,ptr,index,value);
399 return err;
400 }
401 }
402
403 err = lis_matrix_assemble(Aout);
404 if( err )
405 {
406 lis_matrix_storage_destroy(Aout);
407 return err;
408 }
409 LIS_DEBUG_FUNC_OUT;
410 return LIS_SUCCESS;
411 }
412
413 #undef __FUNC__
414 #define __FUNC__ "lis_matrix_get_diagonal_jad"
lis_matrix_get_diagonal_jad(LIS_MATRIX A,LIS_SCALAR d[])415 LIS_INT lis_matrix_get_diagonal_jad(LIS_MATRIX A, LIS_SCALAR d[])
416 {
417 LIS_INT i,j,k,l;
418 LIS_INT n,maxnzr;
419 #ifdef _OPENMP
420 LIS_INT is,ie,js,je;
421 LIS_INT nprocs,my_rank;
422 #endif
423
424 LIS_DEBUG_FUNC_IN;
425
426 n = A->n;
427 maxnzr = A->maxnzr;
428 k = n;
429 if( A->is_splited )
430 {
431 #ifdef _OPENMP
432 #pragma omp parallel for private(i)
433 #endif
434 for(i=0; i<n; i++)
435 {
436 d[i] = A->D->value[i];
437 }
438 }
439 else
440 {
441 #ifdef _OPENMP
442 nprocs = omp_get_max_threads();
443 #pragma omp parallel private(i,j,k,l,is,ie,js,je,my_rank)
444 {
445 my_rank = omp_get_thread_num();
446 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
447 k = ie-is;
448 for(j=0;j<maxnzr;j++)
449 {
450 l = is;
451 js = A->ptr[my_rank*(maxnzr+1) + j];
452 je = A->ptr[my_rank*(maxnzr+1) + j+1];
453 for(i=js;i<je;i++)
454 {
455 if( A->row[l]==A->index[i] )
456 {
457 d[A->row[l]] = A->value[i];
458 k--;
459 if( k==0 ) goto get_diag_end;
460 }
461 l++;
462 }
463 }
464 get_diag_end:
465 ;
466 }
467 #else
468 for(j=0;j<maxnzr;j++)
469 {
470 l = 0;
471 for(i=A->ptr[j];i<A->ptr[j+1];i++)
472 {
473 if( A->row[l]==A->index[i] )
474 {
475 d[A->row[l]] = A->value[i];
476 k--;
477 if( k==0 ) return LIS_SUCCESS;
478 }
479 l++;
480 }
481 }
482 #endif
483 }
484 LIS_DEBUG_FUNC_OUT;
485 return LIS_SUCCESS;
486 }
487
488 #undef __FUNC__
489 #define __FUNC__ "lis_matrix_shift_diagonal_jad"
lis_matrix_shift_diagonal_jad(LIS_MATRIX A,LIS_SCALAR sigma)490 LIS_INT lis_matrix_shift_diagonal_jad(LIS_MATRIX A, LIS_SCALAR sigma)
491 {
492 LIS_INT i,j,k,l;
493 LIS_INT n,maxnzr;
494 #ifdef _OPENMP
495 LIS_INT is,ie,js,je;
496 LIS_INT nprocs,my_rank;
497 #endif
498
499 LIS_DEBUG_FUNC_IN;
500
501 n = A->n;
502 maxnzr = A->maxnzr;
503 k = n;
504 if( A->is_splited )
505 {
506 #ifdef _OPENMP
507 #pragma omp parallel for private(i)
508 #endif
509 for(i=0; i<n; i++)
510 {
511 A->D->value[i] -= sigma;
512 }
513 }
514 else
515 {
516 #ifdef _OPENMP
517 nprocs = omp_get_max_threads();
518 #pragma omp parallel private(i,j,k,l,is,ie,js,je,my_rank)
519 {
520 my_rank = omp_get_thread_num();
521 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
522 k = ie-is;
523 for(j=0;j<maxnzr;j++)
524 {
525 l = is;
526 js = A->ptr[my_rank*(maxnzr+1) + j];
527 je = A->ptr[my_rank*(maxnzr+1) + j+1];
528 for(i=js;i<je;i++)
529 {
530 if( A->row[l]==A->index[i] )
531 {
532 A->value[i] -= sigma;
533 k--;
534 if( k==0 ) goto get_diag_end;
535 }
536 l++;
537 }
538 }
539 get_diag_end:
540 ;
541 }
542 #else
543 for(j=0;j<maxnzr;j++)
544 {
545 l = 0;
546 for(i=A->ptr[j];i<A->ptr[j+1];i++)
547 {
548 if( A->row[l]==A->index[i] )
549 {
550 A->value[i] -= sigma;
551 k--;
552 if( k==0 ) return LIS_SUCCESS;
553 }
554 l++;
555 }
556 }
557 #endif
558 }
559 LIS_DEBUG_FUNC_OUT;
560 return LIS_SUCCESS;
561 }
562
563 #undef __FUNC__
564 #define __FUNC__ "lis_matrix_scale_jad"
lis_matrix_scale_jad(LIS_MATRIX A,LIS_SCALAR d[])565 LIS_INT lis_matrix_scale_jad(LIS_MATRIX A, LIS_SCALAR d[])
566 {
567 LIS_INT i,j,k,is,ie,js,je;
568 LIS_INT n,maxnzr;
569 LIS_INT nprocs,my_rank;
570
571 LIS_DEBUG_FUNC_IN;
572
573 n = A->n;
574 if( A->is_splited )
575 {
576 #ifdef _OPENMP
577 nprocs = omp_get_max_threads();
578 #else
579 nprocs = 1;
580 #endif
581
582 #ifdef _OPENMP
583 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
584 #endif
585 {
586 #ifdef _OPENMP
587 my_rank = omp_get_thread_num();
588 #else
589 my_rank = 0;
590 #endif
591 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
592 for(i=is;i<ie;i++)
593 {
594 A->D->value[i] = 1.0;
595 }
596 for(j=0;j<A->L->maxnzr;j++)
597 {
598 k = is;
599 js = A->L->ptr[my_rank*(A->L->maxnzr+1) + j];
600 je = A->L->ptr[my_rank*(A->L->maxnzr+1) + j+1];
601 for(i=js;i<je;i++)
602 {
603 A->L->value[i] *= d[A->L->row[k]];
604 k++;
605 }
606 }
607 for(j=0;j<A->U->maxnzr;j++)
608 {
609 k = is;
610 js = A->U->ptr[my_rank*(A->U->maxnzr+1) + j];
611 je = A->U->ptr[my_rank*(A->U->maxnzr+1) + j+1];
612 for(i=js;i<je;i++)
613 {
614 A->U->value[i] *= d[A->U->row[k]];
615 k++;
616 }
617 }
618 }
619 }
620 else
621 {
622 maxnzr = A->maxnzr;
623 #ifdef _OPENMP
624 nprocs = omp_get_max_threads();
625 #else
626 nprocs = 1;
627 #endif
628
629 #ifdef _OPENMP
630 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
631 #endif
632 {
633 #ifdef _OPENMP
634 my_rank = omp_get_thread_num();
635 #else
636 my_rank = 0;
637 #endif
638 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
639 for(j=0;j<maxnzr;j++)
640 {
641 k = is;
642 js = A->ptr[my_rank*(maxnzr+1) + j];
643 je = A->ptr[my_rank*(maxnzr+1) + j+1];
644 for(i=js;i<je;i++)
645 {
646 A->value[i] *= d[A->row[k]];
647 k++;
648 }
649 }
650 }
651 }
652 LIS_DEBUG_FUNC_OUT;
653 return LIS_SUCCESS;
654 }
655
656 #undef __FUNC__
657 #define __FUNC__ "lis_matrix_scale_symm_jad"
lis_matrix_scale_symm_jad(LIS_MATRIX A,LIS_SCALAR d[])658 LIS_INT lis_matrix_scale_symm_jad(LIS_MATRIX A, LIS_SCALAR d[])
659 {
660 LIS_INT i,j,k,is,ie,js,je;
661 LIS_INT n,maxnzr;
662 LIS_INT nprocs,my_rank;
663
664 LIS_DEBUG_FUNC_IN;
665
666 n = A->n;
667 if( A->is_splited )
668 {
669 #ifdef _OPENMP
670 nprocs = omp_get_max_threads();
671 #else
672 nprocs = 1;
673 #endif
674
675 #ifdef _OPENMP
676 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
677 #endif
678 {
679 #ifdef _OPENMP
680 my_rank = omp_get_thread_num();
681 #else
682 my_rank = 0;
683 #endif
684 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
685 for(i=is;i<ie;i++)
686 {
687 A->D->value[i] = 1.0;
688 }
689 for(j=0;j<A->L->maxnzr;j++)
690 {
691 k = is;
692 js = A->L->ptr[my_rank*(A->L->maxnzr+1) + j];
693 je = A->L->ptr[my_rank*(A->L->maxnzr+1) + j+1];
694 for(i=js;i<je;i++)
695 {
696 A->L->value[i] *= d[A->L->row[k]]*d[A->L->index[i]];
697 k++;
698 }
699 }
700 for(j=0;j<A->U->maxnzr;j++)
701 {
702 k = is;
703 js = A->U->ptr[my_rank*(A->U->maxnzr+1) + j];
704 je = A->U->ptr[my_rank*(A->U->maxnzr+1) + j+1];
705 for(i=js;i<je;i++)
706 {
707 A->U->value[i] *= d[A->U->row[k]]*d[A->U->index[i]];
708 k++;
709 }
710 }
711 }
712 }
713 else
714 {
715 maxnzr = A->maxnzr;
716 #ifdef _OPENMP
717 nprocs = omp_get_max_threads();
718 #else
719 nprocs = 1;
720 #endif
721
722 #ifdef _OPENMP
723 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
724 #endif
725 {
726 #ifdef _OPENMP
727 my_rank = omp_get_thread_num();
728 #else
729 my_rank = 0;
730 #endif
731 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
732 for(j=0;j<maxnzr;j++)
733 {
734 k = is;
735 js = A->ptr[my_rank*(maxnzr+1) + j];
736 je = A->ptr[my_rank*(maxnzr+1) + j+1];
737 for(i=js;i<je;i++)
738 {
739 A->value[i] *= d[A->row[k]]*d[A->index[i]];
740 k++;
741 }
742 }
743 }
744 }
745 LIS_DEBUG_FUNC_OUT;
746 return LIS_SUCCESS;
747 }
748
749 #undef __FUNC__
750 #define __FUNC__ "lis_matrix_normf_jad"
lis_matrix_normf_jad(LIS_MATRIX A,LIS_SCALAR * nrm)751 LIS_INT lis_matrix_normf_jad(LIS_MATRIX A, LIS_SCALAR *nrm)
752 {
753 LIS_INT i,j;
754 LIS_INT n;
755 LIS_SCALAR sum;
756
757 LIS_DEBUG_FUNC_IN;
758
759 n = A->n;
760 sum = (LIS_SCALAR)0;
761 if( A->is_splited )
762 {
763 #ifdef _OPENMP
764 #pragma omp parallel for reduction(+:sum) private(i,j)
765 #endif
766 for(i=0; i<n; i++)
767 {
768 sum += A->D->value[i]*A->D->value[i];
769 for(j=A->L->index[i];j<A->L->index[i+1];j++)
770 {
771 sum += A->L->value[j]*A->L->value[j];
772 }
773 for(j=A->U->index[i];j<A->U->index[i+1];j++)
774 {
775 sum += A->U->value[j]*A->U->value[j];
776 }
777 }
778 }
779 else
780 {
781 #ifdef _OPENMP
782 #pragma omp parallel for reduction(+:sum) private(i,j)
783 #endif
784 for(i=0; i<n; i++)
785 {
786 sum += A->value[i]*A->value[i];
787 for(j=A->index[i];j<A->index[i+1];j++)
788 {
789 sum += A->value[j]*A->value[j];
790 }
791 }
792 }
793 *nrm = sqrt(sum);
794 LIS_DEBUG_FUNC_OUT;
795 return LIS_SUCCESS;
796 }
797
798 #undef __FUNC__
799 #define __FUNC__ "lis_matrix_transpose_jad"
lis_matrix_transpose_jad(LIS_MATRIX Ain,LIS_MATRIX * Aout)800 LIS_INT lis_matrix_transpose_jad(LIS_MATRIX Ain, LIS_MATRIX *Aout)
801 {
802
803 LIS_DEBUG_FUNC_IN;
804
805 /* err = lis_matrix_convert_jad2csc(Ain,Aout);*/
806 (*Aout)->matrix_type = LIS_MATRIX_JAD;
807 (*Aout)->status = LIS_MATRIX_JAD;
808
809 LIS_DEBUG_FUNC_OUT;
810 return LIS_SUCCESS;
811 }
812
813 #undef __FUNC__
814 #define __FUNC__ "lis_matrix_split_jad"
lis_matrix_split_jad(LIS_MATRIX A)815 LIS_INT lis_matrix_split_jad(LIS_MATRIX A)
816 {
817 LIS_INT i,j,k,kk,n,maxnzr;
818 LIS_INT lnnz,unnz,lmaxnzr,umaxnzr;
819 LIS_INT err;
820 LIS_INT *liw,*uiw,*liw2,*uiw2;
821 LIS_INT *lperm,*lptr,*lindex;
822 LIS_INT *uperm,*uptr,*uindex;
823 #ifdef _OPENMP
824 LIS_INT *iw;
825 LIS_INT my_rank,nprocs,is,ie,js,je;
826 #endif
827 LIS_SCALAR *lvalue,*uvalue;
828 LIS_MATRIX_DIAG D;
829
830 LIS_DEBUG_FUNC_IN;
831
832 n = A->n;
833 maxnzr = A->maxnzr;
834 lmaxnzr = 0;
835 umaxnzr = 0;
836 lnnz = 0;
837 unnz = 0;
838 liw = NULL;
839 uiw = NULL;
840 liw2 = NULL;
841 uiw2 = NULL;
842 D = NULL;
843 lperm = NULL;
844 lptr = NULL;
845 lindex = NULL;
846 lvalue = NULL;
847 uperm = NULL;
848 uptr = NULL;
849 uindex = NULL;
850 uvalue = NULL;
851
852 liw = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_split_jad::liw");
853 if( liw==NULL )
854 {
855 LIS_SETERR_MEM(n*sizeof(LIS_INT));
856 return LIS_ERR_OUT_OF_MEMORY;
857 }
858 uiw = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_split_jad::uiw");
859 if( uiw==NULL )
860 {
861 LIS_SETERR_MEM(n*sizeof(LIS_INT));
862 lis_free2(12,liw,uiw,liw2,uiw2,lperm,lptr,lindex,lvalue,uperm,uptr,uindex,uvalue);
863 return LIS_ERR_OUT_OF_MEMORY;
864 }
865 liw2 = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_split_jad::liw2");
866 if( liw2==NULL )
867 {
868 LIS_SETERR_MEM(n*sizeof(LIS_INT));
869 lis_free2(12,liw,uiw,liw2,uiw2,lperm,lptr,lindex,lvalue,uperm,uptr,uindex,uvalue);
870 return LIS_ERR_OUT_OF_MEMORY;
871 }
872 uiw2 = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_split_jad::uiw2");
873 if( uiw2==NULL )
874 {
875 LIS_SETERR_MEM(n*sizeof(LIS_INT));
876 lis_free2(12,liw,uiw,liw2,uiw2,lperm,lptr,lindex,lvalue,uperm,uptr,uindex,uvalue);
877 return LIS_ERR_OUT_OF_MEMORY;
878 }
879
880 #ifdef _OPENMP
881 nprocs = omp_get_max_threads();
882 iw = (LIS_INT *)lis_malloc((nprocs+1)*LIS_VEC_TMP_PADD*sizeof(LIS_INT),"lis_matrix_split_jad::iw");
883 if( iw==NULL )
884 {
885 LIS_SETERR_MEM((nprocs+1)*LIS_VEC_TMP_PADD*sizeof(LIS_INT));
886 return LIS_OUT_OF_MEMORY;
887 }
888 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
889 {
890 my_rank = omp_get_thread_num();
891 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
892 memset(&liw[is],0,(ie-is)*sizeof(LIS_INT));
893 memset(&uiw[is],0,(ie-is)*sizeof(LIS_INT));
894 iw[my_rank*LIS_VEC_TMP_PADD] = 0;
895 iw[my_rank*LIS_VEC_TMP_PADD+1] = 0;
896 iw[my_rank*LIS_VEC_TMP_PADD+2] = 0;
897 iw[my_rank*LIS_VEC_TMP_PADD+3] = 0;
898 for(j=0;j<maxnzr;j++)
899 {
900 k = is;
901 js = A->ptr[my_rank*(maxnzr+1) + j];
902 je = A->ptr[my_rank*(maxnzr+1) + j+1];
903 for(i=js;i<je;i++)
904 {
905 if( A->index[i]<A->row[k] )
906 {
907 iw[my_rank*LIS_VEC_TMP_PADD+2]++;
908 liw[k]++;
909 }
910 else if( A->index[i]>A->row[k] )
911 {
912 iw[my_rank*LIS_VEC_TMP_PADD+3]++;
913 uiw[k]++;
914 }
915 k++;
916 }
917 }
918 for(i=is;i<ie;i++)
919 {
920 if( iw[my_rank*LIS_VEC_TMP_PADD]<liw[i] ) iw[my_rank*LIS_VEC_TMP_PADD] = liw[i];
921 if( iw[my_rank*LIS_VEC_TMP_PADD+1]<uiw[i] ) iw[my_rank*LIS_VEC_TMP_PADD+1] = uiw[i];
922 }
923 }
924 iw[4] = 0;
925 iw[5] = 0;
926 for(i=0;i<nprocs;i++)
927 {
928 if( iw[i*LIS_VEC_TMP_PADD]>lmaxnzr ) lmaxnzr = iw[i*LIS_VEC_TMP_PADD];
929 if( iw[i*LIS_VEC_TMP_PADD+1]>umaxnzr ) umaxnzr = iw[i*LIS_VEC_TMP_PADD+1];
930 iw[(i+1)*LIS_VEC_TMP_PADD+4] = iw[i*LIS_VEC_TMP_PADD+4] + iw[i*LIS_VEC_TMP_PADD+2];
931 iw[(i+1)*LIS_VEC_TMP_PADD+5] = iw[i*LIS_VEC_TMP_PADD+5] + iw[i*LIS_VEC_TMP_PADD+3];
932 }
933 lnnz = iw[nprocs*LIS_VEC_TMP_PADD+4];
934 unnz = iw[nprocs*LIS_VEC_TMP_PADD+5];
935 #else
936 memset(liw,0,n*sizeof(LIS_INT));
937 memset(uiw,0,n*sizeof(LIS_INT));
938 for(j=0;j<maxnzr;j++)
939 {
940 k = 0;
941 for(i=A->ptr[j];i<A->ptr[j+1];i++)
942 {
943 if( A->index[i]<A->row[k] )
944 {
945 lnnz++;
946 liw[k]++;
947 }
948 else if( A->index[i]>A->row[k] )
949 {
950 unnz++;
951 uiw[k]++;
952 }
953 k++;
954 }
955 }
956 for(i=0;i<n;i++)
957 {
958 if( lmaxnzr<liw[i] ) lmaxnzr = liw[i];
959 if( umaxnzr<uiw[i] ) umaxnzr = uiw[i];
960 }
961 #endif
962
963 err = lis_matrix_LU_create(A);
964 if( err )
965 {
966 lis_free2(12,liw,uiw,liw2,uiw2,lperm,lptr,lindex,lvalue,uperm,uptr,uindex,uvalue);
967 return err;
968 }
969 err = lis_matrix_malloc_jad(n,lnnz,lmaxnzr,&lperm,&lptr,&lindex,&lvalue);
970 if( err )
971 {
972 lis_free2(12,liw,uiw,liw2,uiw2,lperm,lptr,lindex,lvalue,uperm,uptr,uindex,uvalue);
973 return err;
974 }
975 err = lis_matrix_malloc_jad(n,unnz,umaxnzr,&uperm,&uptr,&uindex,&uvalue);
976 if( err )
977 {
978 lis_free2(12,liw,uiw,liw2,uiw2,lperm,lptr,lindex,lvalue,uperm,uptr,uindex,uvalue);
979 return err;
980 }
981 err = lis_matrix_diag_duplicateM(A,&D);
982 if( err )
983 {
984 lis_free2(12,liw,uiw,liw2,uiw2,lperm,lptr,lindex,lvalue,uperm,uptr,uindex,uvalue);
985 return err;
986 }
987
988 #ifdef _OPENMP
989 nprocs = omp_get_max_threads();
990 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
991 {
992 my_rank = omp_get_thread_num();
993 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
994 memset(&lptr[my_rank*(lmaxnzr+1)],0,(lmaxnzr+1)*sizeof(LIS_INT));
995 memset(&uptr[my_rank*(umaxnzr+1)],0,(umaxnzr+1)*sizeof(LIS_INT));
996 for(i=is;i<ie;i++)
997 {
998 lperm[i] = A->row[i];
999 uperm[i] = A->row[i];
1000 for(j=0;j<liw[i];j++)
1001 {
1002 lptr[my_rank*(lmaxnzr+1) + j+1]++;
1003 }
1004 for(j=0;j<uiw[i];j++)
1005 {
1006 uptr[my_rank*(umaxnzr+1) + j+1]++;
1007 }
1008 }
1009 lis_sortr_ii(is,ie-1,liw,lperm);
1010 lis_sortr_ii(is,ie-1,uiw,uperm);
1011 lptr[my_rank*(lmaxnzr+1)] = iw[my_rank*LIS_VEC_TMP_PADD+4];
1012 uptr[my_rank*(umaxnzr+1)] = iw[my_rank*LIS_VEC_TMP_PADD+5];
1013 for(j=0;j<lmaxnzr;j++)
1014 {
1015 lptr[my_rank*(lmaxnzr+1) + j+1] += lptr[my_rank*(lmaxnzr+1) + j];
1016 }
1017 for(j=0;j<umaxnzr;j++)
1018 {
1019 uptr[my_rank*(umaxnzr+1) + j+1] += uptr[my_rank*(umaxnzr+1) + j];
1020 }
1021
1022 for(i=is;i<ie;i++)
1023 {
1024 liw[i] = 0;
1025 uiw[i] = 0;
1026 liw2[lperm[i]] = i;
1027 uiw2[uperm[i]] = i;
1028 }
1029 for(j=0;j<maxnzr;j++)
1030 {
1031 k = is;
1032 for(i=A->ptr[my_rank*(maxnzr+1) + j];i<A->ptr[my_rank*(maxnzr+1) + j+1];i++)
1033 {
1034 if( A->index[i]<A->row[k] )
1035 {
1036 kk = lptr[my_rank*(lmaxnzr+1) + liw[A->row[k]]] + liw2[A->row[k]] - is;
1037 liw[A->row[k]]++;
1038 lindex[kk] = A->index[i];
1039 lvalue[kk] = A->value[i];
1040 }
1041 else if( A->index[i]>A->row[k] )
1042 {
1043 kk = uptr[my_rank*(umaxnzr+1) + uiw[A->row[k]]] + uiw2[A->row[k]] - is;
1044 uiw[A->row[k]]++;
1045 uindex[kk] = A->index[i];
1046 uvalue[kk] = A->value[i];
1047 }
1048 else
1049 {
1050 D->value[A->row[k]] = A->value[i];
1051 }
1052 k++;
1053 }
1054 }
1055 }
1056 lis_free(iw);
1057 #else
1058 memset(lptr,0,(lmaxnzr+1)*sizeof(LIS_INT));
1059 memset(uptr,0,(umaxnzr+1)*sizeof(LIS_INT));
1060 for(i=0;i<n;i++)
1061 {
1062 lperm[i] = A->row[i];
1063 uperm[i] = A->row[i];
1064 for(j=0;j<liw[i];j++)
1065 {
1066 lptr[j+1]++;
1067 }
1068 for(j=0;j<uiw[i];j++)
1069 {
1070 uptr[j+1]++;
1071 }
1072 }
1073 lis_sortr_ii(0,n-1,liw,lperm);
1074 lis_sortr_ii(0,n-1,uiw,uperm);
1075 for(j=0;j<lmaxnzr;j++)
1076 {
1077 lptr[j+1] += lptr[j];
1078 }
1079 for(j=0;j<umaxnzr;j++)
1080 {
1081 uptr[j+1] += uptr[j];
1082 }
1083
1084 for(i=0;i<n;i++)
1085 {
1086 liw[i] = 0;
1087 uiw[i] = 0;
1088 liw2[lperm[i]] = i;
1089 uiw2[uperm[i]] = i;
1090 }
1091 for(j=0;j<maxnzr;j++)
1092 {
1093 k = 0;
1094 for(i=A->ptr[j];i<A->ptr[j+1];i++)
1095 {
1096 if( A->index[i]<A->row[k] )
1097 {
1098 kk = lptr[liw[A->row[k]]] + liw2[A->row[k]];
1099 liw[A->row[k]]++;
1100 lindex[kk] = A->index[i];
1101 lvalue[kk] = A->value[i];
1102 }
1103 else if( A->index[i]>A->row[k] )
1104 {
1105 kk = uptr[uiw[A->row[k]]] + uiw2[A->row[k]];
1106 uiw[A->row[k]]++;
1107 uindex[kk] = A->index[i];
1108 uvalue[kk] = A->value[i];
1109 }
1110 else
1111 {
1112 D->value[A->row[k]] = A->value[i];
1113 }
1114 k++;
1115 }
1116 }
1117 #endif
1118 A->L->nnz = lnnz;
1119 A->L->maxnzr = lmaxnzr;
1120 A->L->col = liw2;
1121 A->L->row = lperm;
1122 A->L->ptr = lptr;
1123 A->L->index = lindex;
1124 A->L->value = lvalue;
1125 A->U->nnz = unnz;
1126 A->U->maxnzr = umaxnzr;
1127 A->U->col = uiw2;
1128 A->U->row = uperm;
1129 A->U->ptr = uptr;
1130 A->U->index = uindex;
1131 A->U->value = uvalue;
1132 A->D = D;
1133 A->is_splited = LIS_TRUE;
1134
1135 lis_free2(2,liw,uiw);
1136 LIS_DEBUG_FUNC_OUT;
1137 return LIS_SUCCESS;
1138 }
1139
1140
1141 #undef __FUNC__
1142 #define __FUNC__ "lis_matrix_merge_jad"
lis_matrix_merge_jad(LIS_MATRIX A)1143 LIS_INT lis_matrix_merge_jad(LIS_MATRIX A)
1144 {
1145 LIS_INT i,j,k,kk,ie;
1146 LIS_INT n,nnz,maxnzr;
1147 LIS_INT err;
1148 LIS_INT *perm,*ptr,*index,*iw,*iw2;
1149 LIS_SCALAR *value;
1150 #ifdef _OPENMP
1151 LIS_INT is,js,je,nprocs,my_rank;
1152 LIS_INT *iw3;
1153 #endif
1154
1155 LIS_DEBUG_FUNC_IN;
1156
1157
1158 n = A->n;
1159 perm = NULL;
1160 ptr = NULL;
1161 index = NULL;
1162 value = NULL;
1163 iw = NULL;
1164 iw2 = NULL;
1165 nnz = A->L->nnz + A->U->nnz + n;
1166
1167 iw = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_merge_jad::iw");
1168 if( iw==NULL )
1169 {
1170 LIS_SETERR_MEM(n*sizeof(LIS_INT));
1171 return LIS_OUT_OF_MEMORY;
1172 }
1173 iw2 = (LIS_INT *)lis_malloc(n*sizeof(LIS_INT),"lis_matrix_merge_jad::iw2");
1174 if( iw2==NULL )
1175 {
1176 LIS_SETERR_MEM(n*sizeof(LIS_INT));
1177 lis_free2(2,iw,iw2);
1178 return LIS_OUT_OF_MEMORY;
1179 }
1180 #ifdef _OPENMP
1181 nprocs = omp_get_max_threads();
1182 iw3 = (LIS_INT *)lis_malloc((nprocs+1)*LIS_VEC_TMP_PADD*sizeof(LIS_INT),"lis_matrix_merge_jad::iw3");
1183 if( iw3==NULL )
1184 {
1185 LIS_SETERR_MEM((nprocs+1)*LIS_VEC_TMP_PADD*sizeof(LIS_INT));
1186 return LIS_OUT_OF_MEMORY;
1187 }
1188 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
1189 {
1190 my_rank = omp_get_thread_num();
1191 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
1192 iw3[my_rank*LIS_VEC_TMP_PADD] = 0;
1193 iw3[my_rank*LIS_VEC_TMP_PADD+1] = (ie-is);
1194 for(i=is;i<ie;i++)
1195 {
1196 iw[i] = 1;
1197 }
1198 for(j=0;j<A->L->maxnzr;j++)
1199 {
1200 k = is;
1201 js = A->L->ptr[my_rank*(A->L->maxnzr+1) + j];
1202 je = A->L->ptr[my_rank*(A->L->maxnzr+1) + j+1];
1203 for(i=js;i<je;i++)
1204 {
1205 iw3[my_rank*LIS_VEC_TMP_PADD+1]++;
1206 iw[A->L->row[k++]]++;
1207 }
1208 }
1209 for(j=0;j<A->U->maxnzr;j++)
1210 {
1211 k = is;
1212 js = A->U->ptr[my_rank*(A->U->maxnzr+1) + j];
1213 je = A->U->ptr[my_rank*(A->U->maxnzr+1) + j+1];
1214 for(i=js;i<je;i++)
1215 {
1216 iw3[my_rank*LIS_VEC_TMP_PADD+1]++;
1217 iw[A->U->row[k++]]++;
1218 }
1219 }
1220 for(i=is;i<ie;i++)
1221 {
1222 if( iw3[my_rank*LIS_VEC_TMP_PADD]<iw[i] ) iw3[my_rank*LIS_VEC_TMP_PADD] = iw[i];
1223 }
1224 }
1225 maxnzr = 0;
1226 iw3[2] = 0;
1227 for(i=0;i<nprocs;i++)
1228 {
1229 if( iw3[i*LIS_VEC_TMP_PADD]>maxnzr ) maxnzr = iw3[i*LIS_VEC_TMP_PADD];
1230 iw3[(i+1)*LIS_VEC_TMP_PADD+2] = iw3[i*LIS_VEC_TMP_PADD+2] + iw3[i*LIS_VEC_TMP_PADD+1];
1231 }
1232 #else
1233 for(i=0;i<n;i++)
1234 {
1235 iw[i] = 1;
1236 }
1237 for(j=0;j<A->L->maxnzr;j++)
1238 {
1239 ie = A->L->ptr[j+1] - A->L->ptr[j];
1240 for(i=0;i<ie;i++)
1241 {
1242 iw[A->L->row[i]]++;
1243 }
1244 }
1245 for(j=0;j<A->U->maxnzr;j++)
1246 {
1247 ie = A->U->ptr[j+1] - A->U->ptr[j];
1248 for(i=0;i<ie;i++)
1249 {
1250 iw[A->U->row[i]]++;
1251 }
1252 }
1253 maxnzr = 0;
1254 for(i=0;i<n;i++)
1255 {
1256 if( maxnzr<iw[i] ) maxnzr = iw[i];
1257 }
1258 #endif
1259
1260 err = lis_matrix_malloc_jad(n,nnz,maxnzr,&perm,&ptr,&index,&value);
1261 if( err )
1262 {
1263 lis_free2(2,iw,iw2);
1264 return err;
1265 }
1266
1267 #ifdef _OPENMP
1268 nprocs = omp_get_max_threads();
1269 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
1270 {
1271 my_rank = omp_get_thread_num();
1272 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
1273 memset(&ptr[my_rank*(maxnzr+1)],0,(maxnzr+1)*sizeof(LIS_INT));
1274 for(i=is;i<ie;i++)
1275 {
1276 perm[i] = i;
1277 for(j=0;j<iw[i];j++)
1278 {
1279 ptr[my_rank*(maxnzr+1) + j+1]++;
1280 }
1281 }
1282 lis_sortr_ii(is,ie-1,iw,perm);
1283 ptr[my_rank*(maxnzr+1)] = iw3[my_rank*LIS_VEC_TMP_PADD+2];
1284 for(j=0;j<maxnzr;j++)
1285 {
1286 ptr[my_rank*(maxnzr+1) + j+1] += ptr[my_rank*(maxnzr+1) + j];
1287 }
1288
1289 for(i=is;i<ie;i++)
1290 {
1291 iw[i] = 0;
1292 iw2[perm[i]] = i;
1293 }
1294 for(j=0;j<A->L->maxnzr;j++)
1295 {
1296 k = is;
1297 for(i=A->L->ptr[my_rank*(A->L->maxnzr+1) + j];i<A->L->ptr[my_rank*(A->L->maxnzr+1) + j+1];i++)
1298 {
1299 kk = ptr[my_rank*(maxnzr+1) + iw[A->L->row[k]]] + iw2[A->L->row[k]] - is;
1300 iw[A->L->row[k]]++;
1301 index[kk] = A->L->index[i];
1302 value[kk] = A->L->value[i];
1303 k++;
1304 }
1305 }
1306 for(i=is;i<ie;i++)
1307 {
1308 kk = ptr[my_rank*(maxnzr+1) + iw[i]] + iw2[i] - is;
1309 iw[i]++;
1310 index[kk] = i;
1311 value[kk] = A->D->value[i];
1312 }
1313 for(j=0;j<A->U->maxnzr;j++)
1314 {
1315 k = is;
1316 for(i=A->U->ptr[my_rank*(A->U->maxnzr+1) + j];i<A->U->ptr[my_rank*(A->U->maxnzr+1) + j+1];i++)
1317 {
1318 kk = ptr[my_rank*(maxnzr+1) + iw[A->U->row[k]]] + iw2[A->U->row[k]] - is;
1319 iw[A->U->row[k]]++;
1320 index[kk] = A->U->index[i];
1321 value[kk] = A->U->value[i];
1322 k++;
1323 }
1324 }
1325 }
1326 #else
1327 memset(ptr,0,(maxnzr+1)*sizeof(LIS_INT));
1328 for(i=0;i<n;i++)
1329 {
1330 perm[i] = i;
1331 for(j=0;j<iw[i];j++)
1332 {
1333 ptr[j+1]++;
1334 }
1335 }
1336 lis_sortr_ii(0,n-1,iw,perm);
1337 for(j=0;j<maxnzr;j++)
1338 {
1339 ptr[j+1] += ptr[j];
1340 }
1341
1342 for(i=0;i<n;i++)
1343 {
1344 iw[i] = 0;
1345 iw2[perm[i]] = i;
1346 }
1347 for(j=0;j<A->L->maxnzr;j++)
1348 {
1349 k = 0;
1350 for(i=A->L->ptr[j];i<A->L->ptr[j+1];i++)
1351 {
1352 kk = ptr[iw[A->L->row[k]]] + iw2[A->L->row[k]];
1353 iw[A->L->row[k]]++;
1354 index[kk] = A->L->index[i];
1355 value[kk] = A->L->value[i];
1356 k++;
1357 }
1358 }
1359 for(i=0;i<n;i++)
1360 {
1361 kk = ptr[iw[i]] + iw2[i];
1362 iw[i]++;
1363 index[kk] = i;
1364 value[kk] = A->D->value[i];
1365 }
1366 for(j=0;j<A->U->maxnzr;j++)
1367 {
1368 k = 0;
1369 for(i=A->U->ptr[j];i<A->U->ptr[j+1];i++)
1370 {
1371 kk = ptr[iw[A->U->row[k]]] + iw2[A->U->row[k]];
1372 iw[A->U->row[k]]++;
1373 index[kk] = A->U->index[i];
1374 value[kk] = A->U->value[i];
1375 k++;
1376 }
1377 }
1378 #endif
1379
1380 A->nnz = nnz;
1381 A->row = perm;
1382 A->ptr = ptr;
1383 A->value = value;
1384 A->index = index;
1385
1386 lis_free2(2,iw,iw2);
1387
1388 LIS_DEBUG_FUNC_OUT;
1389 return LIS_SUCCESS;
1390 }
1391
1392 #undef __FUNC__
1393 #define __FUNC__ "lis_matrix_sort_jad"
lis_matrix_sort_jad(LIS_MATRIX A)1394 LIS_INT lis_matrix_sort_jad(LIS_MATRIX A)
1395 {
1396 LIS_INT i,n;
1397
1398 LIS_DEBUG_FUNC_IN;
1399
1400 if( !A->is_sorted )
1401 {
1402 n = A->n;
1403 if( A->is_splited )
1404 {
1405 #ifdef _OPENMP
1406 #pragma omp parallel for private(i)
1407 #endif
1408 for(i=0;i<n;i++)
1409 {
1410 lis_sort_id(A->L->ptr[i],A->L->ptr[i+1]-1,A->L->index,A->L->value);
1411 lis_sort_id(A->U->ptr[i],A->U->ptr[i+1]-1,A->U->index,A->U->value);
1412 }
1413 }
1414 else
1415 {
1416 #ifdef _OPENMP
1417 #pragma omp parallel for private(i)
1418 #endif
1419 for(i=0;i<n;i++)
1420 {
1421 lis_sort_id(A->ptr[i],A->ptr[i+1]-1,A->index,A->value);
1422 }
1423 }
1424 A->is_sorted = LIS_TRUE;
1425 }
1426
1427 LIS_DEBUG_FUNC_OUT;
1428 return LIS_SUCCESS;
1429 }
1430
1431 #undef __FUNC__
1432 #define __FUNC__ "lis_matrix_solve_jad"
lis_matrix_solve_jad(LIS_MATRIX A,LIS_VECTOR B,LIS_VECTOR X,LIS_INT flag)1433 LIS_INT lis_matrix_solve_jad(LIS_MATRIX A, LIS_VECTOR B, LIS_VECTOR X, LIS_INT flag)
1434 {
1435 LIS_INT i,j,k,l,n;
1436 LIS_SCALAR t;
1437 LIS_SCALAR *b,*x;
1438
1439 LIS_DEBUG_FUNC_IN;
1440
1441 n = A->n;
1442 b = B->value;
1443 x = X->value;
1444
1445 switch(flag)
1446 {
1447 case LIS_MATRIX_LOWER:
1448 for(i=0;i<n;i++)
1449 {
1450 k = A->L->col[i];
1451 l = 0;
1452 j = A->L->ptr[l++] + k;
1453 t = b[i];
1454 while( j<A->L->ptr[l] && l<=A->L->maxnzr )
1455 {
1456 t -= A->L->value[j] * x[A->L->index[j]];
1457 j = A->L->ptr[l++] + k;
1458 }
1459 x[i] = t * A->WD->value[i];
1460 }
1461 break;
1462 case LIS_MATRIX_UPPER:
1463 for(i=n-1;i>=0;i--)
1464 {
1465 k = A->U->col[i];
1466 l = 0;
1467 j = A->U->ptr[l++] + k;
1468 t = b[i];
1469 while( j<A->U->ptr[l] && l<=A->U->maxnzr )
1470 {
1471 t -= A->U->value[j] * x[A->U->index[j]];
1472 j = A->U->ptr[l++] + k;
1473 }
1474 x[i] = t * A->WD->value[i];
1475 }
1476 break;
1477 case LIS_MATRIX_SSOR:
1478 for(i=0;i<n;i++)
1479 {
1480 k = A->L->col[i];
1481 l = 0;
1482 j = A->L->ptr[l++] + k;
1483 t = b[i];
1484 while( j<A->L->ptr[l] && l<=A->L->maxnzr )
1485 {
1486 t -= A->L->value[j] * x[A->L->index[j]];
1487 j = A->L->ptr[l++] + k;
1488 }
1489 x[i] = t * A->WD->value[i];
1490 }
1491 for(i=n-1;i>=0;i--)
1492 {
1493 k = A->U->col[i];
1494 l = 0;
1495 j = A->U->ptr[l++] + k;
1496 t = 0.0;
1497 while( j<A->U->ptr[l] && l<=A->U->maxnzr )
1498 {
1499 t += A->U->value[j] * x[A->U->index[j]];
1500 j = A->U->ptr[l++] + k;
1501 }
1502 x[i] -= t * A->WD->value[i];
1503 }
1504 break;
1505 }
1506
1507 LIS_DEBUG_FUNC_OUT;
1508 return LIS_SUCCESS;
1509 }
1510
1511 #undef __FUNC__
1512 #define __FUNC__ "lis_matrix_solveh_jad"
lis_matrix_solveh_jad(LIS_MATRIX A,LIS_VECTOR B,LIS_VECTOR X,LIS_INT flag)1513 LIS_INT lis_matrix_solveh_jad(LIS_MATRIX A, LIS_VECTOR B, LIS_VECTOR X, LIS_INT flag)
1514 {
1515 LIS_INT i,j,k,l,n;
1516 LIS_SCALAR t;
1517 LIS_SCALAR *x;
1518
1519 LIS_DEBUG_FUNC_IN;
1520
1521 n = A->n;
1522 x = X->value;
1523
1524 lis_vector_copy(B,X);
1525 switch(flag)
1526 {
1527 case LIS_MATRIX_LOWER:
1528 for(i=0;i<n;i++)
1529 {
1530 k = A->U->col[i];
1531 l = 0;
1532 j = A->U->ptr[l++] + k;
1533 x[i] = x[i] * conj(A->WD->value[i]);
1534 while( j<A->U->ptr[l] && l<=A->U->maxnzr )
1535 {
1536 x[A->U->index[j]] -= conj(A->U->value[j]) * x[i];
1537 j = A->U->ptr[l++] + k;
1538 }
1539 }
1540 break;
1541 case LIS_MATRIX_UPPER:
1542 for(i=n-1;i>=0;i--)
1543 {
1544 k = A->L->col[i];
1545 l = 0;
1546 j = A->L->ptr[l++] + k;
1547 x[i] = x[i] * conj(A->WD->value[i]);
1548 while( j<A->L->ptr[l] && l<=A->L->maxnzr )
1549 {
1550 x[A->L->index[j]] -= conj(A->L->value[j]) * x[i];
1551 j = A->L->ptr[l++] + k;
1552 }
1553 }
1554 break;
1555 case LIS_MATRIX_SSOR:
1556 for(i=0;i<n;i++)
1557 {
1558 k = A->U->col[i];
1559 l = 0;
1560 j = A->U->ptr[l++] + k;
1561 t = x[i] * conj(A->WD->value[i]);
1562 while( j<A->U->ptr[l] && l<=A->U->maxnzr )
1563 {
1564 x[A->U->index[j]] -= conj(A->U->value[j]) * t;
1565 j = A->U->ptr[l++] + k;
1566 }
1567 }
1568 for(i=n-1;i>=0;i--)
1569 {
1570 k = A->L->col[i];
1571 l = 0;
1572 j = A->L->ptr[l++] + k;
1573 x[i] = x[i] * conj(A->WD->value[i]);
1574 t = x[i];
1575 while( j<A->L->ptr[l] && l<=A->L->maxnzr )
1576 {
1577 x[A->L->index[j]] -= conj(A->L->value[j]) * t;
1578 j = A->L->ptr[l++] + k;
1579 }
1580 }
1581 break;
1582 }
1583
1584 LIS_DEBUG_FUNC_OUT;
1585 return LIS_SUCCESS;
1586 }
1587
1588 #ifndef USE_OVERLAP
1589 #undef __FUNC__
1590 #define __FUNC__ "lis_matrix_convert_csr2jad"
lis_matrix_convert_csr2jad(LIS_MATRIX Ain,LIS_MATRIX Aout)1591 LIS_INT lis_matrix_convert_csr2jad(LIS_MATRIX Ain, LIS_MATRIX Aout)
1592 {
1593 LIS_INT i,j,l,js,je;
1594 LIS_INT err;
1595 LIS_INT n,nnz,maxnzr,nprocs,my_rank;
1596 LIS_INT is,ie;
1597 LIS_INT *iw,*maxnzrpe,*nnzpe;
1598 LIS_INT *perm,*ptr,*index;
1599 LIS_SCALAR *value;
1600
1601 LIS_DEBUG_FUNC_IN;
1602
1603 n = Ain->n;
1604 nnz = Ain->nnz;
1605 my_rank = Ain->my_rank;
1606 is = Ain->is;
1607 ie = Ain->ie;
1608 #ifdef _OPENMP
1609 nprocs = omp_get_max_threads();
1610 #else
1611 nprocs = 1;
1612 #endif
1613
1614 perm = NULL;
1615 ptr = NULL;
1616 index = NULL;
1617 value = NULL;
1618 iw = NULL;
1619 maxnzrpe = NULL;
1620 nnzpe = NULL;
1621
1622 iw = (LIS_INT *)lis_malloc( n*sizeof(LIS_INT),"lis_matrix_convert_csr2jad::iw" );
1623 if( iw==NULL )
1624 {
1625 LIS_SETERR_MEM(n*sizeof(LIS_INT));
1626 return LIS_OUT_OF_MEMORY;
1627 }
1628 maxnzrpe = (LIS_INT *)lis_malloc( nprocs*sizeof(LIS_INT),"lis_matrix_convert_csr2jad::maxnzrpe" );
1629 if( maxnzrpe==NULL )
1630 {
1631 LIS_SETERR_MEM(nprocs*sizeof(LIS_INT));
1632 return LIS_OUT_OF_MEMORY;
1633 }
1634 nnzpe = (LIS_INT *)lis_malloc( (nprocs+1)*sizeof(LIS_INT),"lis_matrix_convert_csr2jad::nnzpe" );
1635 if( nnzpe==NULL )
1636 {
1637 LIS_SETERR_MEM((nprocs+1)*sizeof(LIS_INT));
1638 return LIS_OUT_OF_MEMORY;
1639 }
1640
1641 #ifdef _OPENMP
1642 #pragma omp parallel private(i,is,ie,my_rank)
1643 #endif
1644 {
1645 #ifdef _OPENMP
1646 my_rank = omp_get_thread_num();
1647 #else
1648 my_rank = 0;
1649 #endif
1650 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
1651
1652 maxnzrpe[my_rank] = 0;
1653 #ifdef USE_VEC_COMP
1654 #pragma cdir nodep
1655 #pragma _NEC ivdep
1656 #endif
1657 for(i=is;i<ie;i++)
1658 {
1659 iw[i] = Ain->ptr[i+1] - Ain->ptr[i];
1660 if( iw[i] > maxnzrpe[my_rank] ) maxnzrpe[my_rank] = iw[i];
1661 }
1662 nnzpe[my_rank+1] = Ain->ptr[ie] - Ain->ptr[is];
1663 }
1664 maxnzr = 0;
1665 nnzpe[0] = 0;
1666 for(i=0;i<nprocs;i++)
1667 {
1668 if( maxnzrpe[i] > maxnzr ) maxnzr = maxnzrpe[i];
1669 nnzpe[i+1] += nnzpe[i];
1670 }
1671
1672 err = lis_matrix_malloc_jad(n,nnz,maxnzr,&perm,&ptr,&index,&value);
1673 if( err )
1674 {
1675 return err;
1676 }
1677
1678 /* convert jad */
1679 #ifdef _OPENMP
1680 #pragma omp parallel private(i,j,is,ie,js,je,l,my_rank)
1681 #endif
1682 {
1683 #ifdef _OPENMP
1684 my_rank = omp_get_thread_num();
1685 #else
1686 my_rank = 0;
1687 #endif
1688 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
1689
1690 memset(&ptr[my_rank*(maxnzr+1)],0,(maxnzr+1)*sizeof(LIS_INT));
1691 #if 1
1692 for(i=is;i<ie;i++)
1693 {
1694 perm[i] = i;
1695 for(j=0;j<iw[i];j++)
1696 {
1697 ptr[my_rank*(maxnzr+1) + j+1]++;
1698 }
1699 }
1700 lis_sortr_ii(is,ie-1,iw,perm);
1701 #else
1702 lis_sort_jad(is,ie,maxnzr,iw,perm);
1703
1704 j = 0;
1705 for(i=ie-1;i>=is;i--)
1706 {
1707 for(;j<iw[i];j++)
1708 {
1709 ptr[my_rank*(maxnzr+1) + j+1] = i-is+1;
1710 }
1711 if( iw[i]==maxnzr ) break;
1712 }
1713
1714 #endif
1715
1716 ptr[my_rank*(maxnzr+1)] = nnzpe[my_rank];
1717 for(j=0;j<maxnzr;j++)
1718 {
1719 ptr[my_rank*(maxnzr+1) + j+1] += ptr[my_rank*(maxnzr+1) + j];
1720 }
1721
1722
1723
1724 #ifndef USE_VEC_COMP
1725 for(i=is;i<ie;i++)
1726 {
1727 js = Ain->ptr[perm[i]];
1728 je = Ain->ptr[perm[i]+1];
1729 for(j=js;j<je;j++)
1730 {
1731 l = ptr[my_rank*(maxnzr+1) + j-js]+i-is;
1732 value[l] = Ain->value[j];
1733 index[l] = Ain->index[j];
1734 }
1735 }
1736 #else
1737 for(j=0;j<maxnzr;j++)
1738 {
1739 js = ptr[my_rank*(maxnzr+1) + j];
1740 je = ptr[my_rank*(maxnzr+1) + j+1];
1741 #pragma cdir nodep
1742 #pragma _NEC ivdep
1743 for(i=js;i<je;i++)
1744 {
1745 l = Ain->ptr[perm[is+(i-js)]] + j;
1746 value[i] = Ain->value[l];
1747 index[i] = Ain->index[l];
1748 }
1749 }
1750 #endif
1751 }
1752
1753 err = lis_matrix_set_jad(nnz,maxnzr,perm,ptr,index,value,Aout);
1754 if( err )
1755 {
1756 lis_free2(7,perm,ptr,index,value,iw,maxnzrpe,nnzpe);
1757 return err;
1758 }
1759 err = lis_matrix_assemble(Aout);
1760 if( err )
1761 {
1762 lis_free2(2,iw,nnzpe);
1763 lis_matrix_storage_destroy(Aout);
1764 return err;
1765 }
1766 lis_free2(3,iw,nnzpe,maxnzrpe);
1767
1768 LIS_DEBUG_FUNC_OUT;
1769 return LIS_SUCCESS;
1770 }
1771 #else
1772 #undef __FUNC__
1773 #define __FUNC__ "lis_matrix_convert_csr2jad"
lis_matrix_convert_csr2jad(LIS_MATRIX Ain,LIS_MATRIX Aout)1774 LIS_INT lis_matrix_convert_csr2jad(LIS_MATRIX Ain, LIS_MATRIX Aout)
1775 {
1776 LIS_INT i,j,jj,k,kk,l,js,je;
1777 LIS_INT err;
1778 LIS_INT np,n,nnz,nnz2,maxnzr,maxnzr2,nprocs,my_rank;
1779 LIS_INT is,ie,pe;
1780 LIS_INT *iw,*maxnzrpe,*nnzpe;
1781 LIS_INT *iw2,*maxnzrpe2,*nnzpe2;
1782 LIS_INT *perm,*ptr,*index;
1783 LIS_INT *perm2,*ptr2,*index2;
1784 LIS_SCALAR *value;
1785 LIS_SCALAR *value2;
1786
1787 LIS_DEBUG_FUNC_IN;
1788
1789 n = Ain->n;
1790 np = Ain->np;
1791 nnz = Ain->nnz;
1792 my_rank = Ain->my_rank;
1793 is = Ain->is;
1794 ie = Ain->ie;
1795 #ifdef _OPENMP
1796 nprocs = omp_get_max_threads();
1797 #else
1798 nprocs = 1;
1799 #endif
1800
1801 perm = NULL;
1802 ptr = NULL;
1803 index = NULL;
1804 value = NULL;
1805 iw = NULL;
1806 iw2 = NULL;
1807 maxnzrpe = NULL;
1808 nnzpe = NULL;
1809
1810 lis_matrix_split2_csr(Ain);
1811
1812 iw = (LIS_INT *)lis_malloc( n*sizeof(LIS_INT),"lis_matrix_convert_csr2jad::iw" );
1813 if( iw==NULL )
1814 {
1815 LIS_SETERR_MEM(n*sizeof(LIS_INT));
1816 return LIS_OUT_OF_MEMORY;
1817 }
1818 iw2 = (LIS_INT *)lis_malloc( n*sizeof(LIS_INT),"lis_matrix_convert_csr2jad::iw2" );
1819 if( iw2==NULL )
1820 {
1821 LIS_SETERR_MEM(n*sizeof(LIS_INT));
1822 return LIS_OUT_OF_MEMORY;
1823 }
1824 maxnzrpe = (LIS_INT *)lis_malloc( nprocs*sizeof(LIS_INT),"lis_matrix_convert_csr2jad::maxnzrpe" );
1825 if( maxnzrpe==NULL )
1826 {
1827 LIS_SETERR_MEM(nprocs*sizeof(LIS_INT));
1828 return LIS_OUT_OF_MEMORY;
1829 }
1830 maxnzrpe2 = (LIS_INT *)lis_malloc( nprocs*sizeof(LIS_INT),"lis_matrix_convert_csr2jad::maxnzrpe2" );
1831 if( maxnzrpe2==NULL )
1832 {
1833 LIS_SETERR_MEM(nprocs*sizeof(LIS_INT));
1834 return LIS_OUT_OF_MEMORY;
1835 }
1836 nnzpe = (LIS_INT *)lis_malloc( (nprocs+1)*sizeof(LIS_INT),"lis_matrix_convert_csr2jad::nnzpe" );
1837 if( nnzpe==NULL )
1838 {
1839 LIS_SETERR_MEM((nprocs+1)*sizeof(LIS_INT));
1840 return LIS_OUT_OF_MEMORY;
1841 }
1842 nnzpe2 = (LIS_INT *)lis_malloc( (nprocs+1)*sizeof(LIS_INT),"lis_matrix_convert_csr2jad::nnzpe2" );
1843 if( nnzpe2==NULL )
1844 {
1845 LIS_SETERR_MEM((nprocs+1)*sizeof(LIS_INT));
1846 return LIS_OUT_OF_MEMORY;
1847 }
1848
1849 #ifdef _OPENMP
1850 #pragma omp parallel private(i,j,k,is,ie,jj,my_rank)
1851 #endif
1852 {
1853 #ifdef _OPENMP
1854 my_rank = omp_get_thread_num();
1855 #else
1856 my_rank = 0;
1857 #endif
1858 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
1859
1860 maxnzrpe[my_rank] = 0;
1861 maxnzrpe2[my_rank] = 0;
1862 for(i=is;i<ie;i++)
1863 {
1864 iw[i] = Ain->L->ptr[i+1] - Ain->L->ptr[i];
1865 iw2[i] = Ain->U->ptr[i+1] - Ain->U->ptr[i];
1866 if( iw[i] > maxnzrpe[my_rank] ) maxnzrpe[my_rank] = iw[i];
1867 if( iw2[i] > maxnzrpe2[my_rank] ) maxnzrpe2[my_rank] = iw2[i];
1868 }
1869 nnzpe[my_rank+1] = Ain->L->ptr[ie] - Ain->L->ptr[is];
1870 nnzpe2[my_rank+1] = Ain->U->ptr[ie] - Ain->U->ptr[is];
1871 }
1872 maxnzr = 0;
1873 maxnzr2 = 0;
1874 nnzpe[0] = 0;
1875 nnzpe2[0] = 0;
1876 for(i=0;i<nprocs;i++)
1877 {
1878 if( maxnzrpe[i] > maxnzr ) maxnzr = maxnzrpe[i];
1879 if( maxnzrpe2[i] > maxnzr2 ) maxnzr2 = maxnzrpe2[i];
1880 nnzpe[i+1] += nnzpe[i];
1881 nnzpe2[i+1] += nnzpe2[i];
1882 }
1883 nnz = nnzpe[nprocs];
1884 nnz2 = nnzpe2[nprocs];
1885
1886 err = lis_matrix_malloc_jad(n,nnz,maxnzr,&perm,&ptr,&index,&value);
1887 if( err )
1888 {
1889 return err;
1890 }
1891 err = lis_matrix_malloc_jad(n,nnz2,maxnzr2,&perm2,&ptr2,&index2,&value2);
1892 if( err )
1893 {
1894 return err;
1895 }
1896 err = lis_matrix_LU_create(Aout);
1897 if( err )
1898 {
1899 lis_free2(7,perm,ptr,index,value,iw,maxnzrpe,nnzpe);
1900 return err;
1901 }
1902
1903 /* convert jad */
1904 #ifdef _OPENMP
1905 #pragma omp parallel private(i,j,k,is,ie,jj,kk,js,je,l,my_rank)
1906 #endif
1907 {
1908 #ifdef _OPENMP
1909 my_rank = omp_get_thread_num();
1910 #else
1911 my_rank = 0;
1912 #endif
1913 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
1914
1915 memset(&ptr[my_rank*(maxnzr+1)],0,(maxnzr+1)*sizeof(LIS_INT));
1916 memset(&ptr2[my_rank*(maxnzr2+1)],0,(maxnzr2+1)*sizeof(LIS_INT));
1917 #if 0
1918 for(i=is;i<ie;i++)
1919 {
1920 perm[i] = i;
1921 perm2[i] = i;
1922 for(j=0;j<iw[i];j++)
1923 {
1924 ptr[my_rank*(maxnzr+1) + j+1]++;
1925 }
1926 for(j=0;j<iw2[i];j++)
1927 {
1928 ptr2[my_rank*(maxnzr2+1) + j+1]++;
1929 }
1930 }
1931 lis_sortr_ii(is,ie-1,iw,perm);
1932 lis_sortr_ii(is,ie-1,iw2,perm2);
1933 #else
1934 lis_sort_jad(is,ie,maxnzr,iw,perm);
1935 lis_sort_jad(is,ie,maxnzr2,iw2,perm2);
1936
1937 j = 0;
1938 for(i=ie-1;i>=is;i--)
1939 {
1940 for(;j<iw[i];j++)
1941 {
1942 ptr[my_rank*(maxnzr+1) + j+1] = i-is+1;
1943 }
1944 if( iw[i]==maxnzr ) break;
1945 }
1946 j = 0;
1947 for(i=ie-1;i>=is;i--)
1948 {
1949 for(;j<iw2[i];j++)
1950 {
1951 ptr2[my_rank*(maxnzr2+1) + j+1] = i-is+1;
1952 }
1953 if( iw2[i]==maxnzr2 ) break;
1954 }
1955 #endif
1956
1957 ptr[my_rank*(maxnzr+1)] = nnzpe[my_rank];
1958 ptr2[my_rank*(maxnzr2+1)] = nnzpe2[my_rank];
1959 for(j=0;j<maxnzr;j++)
1960 {
1961 ptr[my_rank*(maxnzr+1) + j+1] += ptr[my_rank*(maxnzr+1) + j];
1962 }
1963 for(j=0;j<maxnzr2;j++)
1964 {
1965 ptr2[my_rank*(maxnzr2+1) + j+1] += ptr2[my_rank*(maxnzr2+1) + j];
1966 }
1967 #if 0
1968 for(i=is;i<ie;i++)
1969 {
1970 kk = 0;
1971 js = Ain->L->ptr[perm[i]];
1972 je = Ain->L->ptr[perm[i]+1];
1973 for(j=js;j<je;j++)
1974 {
1975 l = ptr[my_rank*(maxnzr+1) + kk]+i-is;
1976 value[l] = Ain->L->value[j];
1977 index[l] = Ain->L->index[j];
1978 kk++;
1979 }
1980 kk = 0;
1981 js = Ain->U->ptr[perm2[i]];
1982 je = Ain->U->ptr[perm2[i]+1];
1983 for(j=js;j<je;j++)
1984 {
1985 l = ptr2[my_rank*(maxnzr2+1) + kk]+i-is;
1986 value2[l] = Ain->U->value[j];
1987 index2[l] = Ain->U->index[j];
1988 kk++;
1989 }
1990 }
1991 #else
1992 for(j=0;j<maxnzr;j++)
1993 {
1994 js = ptr[my_rank*(maxnzr+1) + j];
1995 je = ptr[my_rank*(maxnzr+1) + j+1];
1996 #pragma cdir nodep
1997 #pragma _NEC ivdep
1998 for(i=js;i<je;i++)
1999 {
2000 l = Ain->L->ptr[perm[i-js]] + j;
2001 value[i] = Ain->L->value[l];
2002 index[i] = Ain->L->index[l];
2003 }
2004 }
2005 for(j=0;j<maxnzr2;j++)
2006 {
2007 js = ptr2[my_rank*(maxnzr2+1) + j];
2008 je = ptr2[my_rank*(maxnzr2+1) + j+1];
2009 #pragma cdir nodep
2010 #pragma _NEC ivdep
2011 for(i=js;i<je;i++)
2012 {
2013 l = Ain->U->ptr[perm2[i-js]] + j;
2014 value2[i] = Ain->U->value[l];
2015 index2[i] = Ain->U->index[l];
2016 }
2017 }
2018 #endif
2019 }
2020
2021 err = lis_matrix_set_jad(nnz,maxnzr,perm,ptr,index,value,Aout);
2022 if( err )
2023 {
2024 lis_free2(7,perm,ptr,index,value,iw,maxnzrpe,nnzpe);
2025 return err;
2026 }
2027 Aout->U->maxnzr = maxnzr2;
2028 Aout->U->row = perm2;
2029 Aout->U->ptr = ptr2;
2030 Aout->U->index = index2;
2031 Aout->U->value = value2;
2032 err = lis_matrix_assemble(Aout);
2033 if( err )
2034 {
2035 lis_free2(2,iw,nnzpe);
2036 lis_matrix_storage_destroy(Aout);
2037 return err;
2038 }
2039 Aout->work = (LIS_SCALAR *)lis_malloc(np*sizeof(LIS_SCALAR));
2040 lis_free2(6,iw,nnzpe,maxnzrpe,iw2,nnzpe2,maxnzrpe2);
2041
2042 LIS_DEBUG_FUNC_OUT;
2043 return LIS_SUCCESS;
2044 }
2045 #endif
2046
2047 #undef __FUNC__
2048 #define __FUNC__ "lis_matrix_convert_jad2csr"
lis_matrix_convert_jad2csr(LIS_MATRIX Ain,LIS_MATRIX Aout)2049 LIS_INT lis_matrix_convert_jad2csr(LIS_MATRIX Ain, LIS_MATRIX Aout)
2050 {
2051 LIS_INT i,j,jj,k,is,ie;
2052 LIS_INT err;
2053 LIS_INT n,nnz,maxnzr;
2054 LIS_INT *iw;
2055 LIS_INT *ptr,*index;
2056 LIS_SCALAR *value;
2057 #ifdef _OPENMP
2058 LIS_INT nprocs,my_rank;
2059 #endif
2060
2061 LIS_DEBUG_FUNC_IN;
2062
2063 n = Ain->n;
2064 nnz = Ain->nnz;
2065 maxnzr = Ain->maxnzr;
2066 is = Ain->is;
2067 ie = Ain->ie;
2068
2069 ptr = NULL;
2070 index = NULL;
2071 value = NULL;
2072 iw = NULL;
2073
2074 iw = (LIS_INT *)lis_malloc( n*sizeof(LIS_INT),"lis_matrix_convert_jad2csr::iw" );
2075 if( iw==NULL )
2076 {
2077 LIS_SETERR_MEM(n*sizeof(LIS_INT));
2078 return LIS_OUT_OF_MEMORY;
2079 }
2080 err = lis_matrix_malloc_csr(n,nnz,&ptr,&index,&value);
2081 if( err )
2082 {
2083 lis_free2(4,ptr,index,value,iw);
2084 return err;
2085 }
2086
2087 /* convert csr */
2088 #ifdef _OPENMP
2089 nprocs = omp_get_max_threads();
2090 ptr[0] = 0;
2091 #pragma omp parallel private(i,j,k,is,ie,jj,my_rank)
2092 {
2093 my_rank = omp_get_thread_num();
2094 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
2095
2096 for(i=is;i<ie;i++)
2097 {
2098 ptr[i+1] = 0;
2099 }
2100 for(j=0;j<maxnzr;j++)
2101 {
2102 k = is;
2103 for(i=Ain->ptr[my_rank*(maxnzr+1) + j];i<Ain->ptr[my_rank*(maxnzr+1) + j+1];i++)
2104 {
2105 ptr[Ain->row[k]+1]++;
2106 k++;
2107 }
2108 }
2109 #pragma omp barrier
2110 #pragma omp single
2111 for(i=0;i<n;i++)
2112 {
2113 ptr[i+1] += ptr[i];
2114 }
2115 for(i=is;i<ie;i++)
2116 {
2117 iw[i] = ptr[i];
2118 }
2119 for(j=0;j<maxnzr;j++)
2120 {
2121 jj = is;
2122 for(i=Ain->ptr[my_rank*(maxnzr+1) + j];i<Ain->ptr[my_rank*(maxnzr+1) + j+1];i++)
2123 {
2124 k = iw[Ain->row[jj]]++;
2125 value[k] = Ain->value[i];
2126 index[k] = Ain->index[i];
2127 jj++;
2128 }
2129 }
2130 }
2131 #else
2132 for(i=0;i<n+1;i++)
2133 {
2134 ptr[i] = 0;
2135 }
2136 for(j=0;j<maxnzr;j++)
2137 {
2138 k = 0;
2139 for(i=Ain->ptr[j];i<Ain->ptr[j+1];i++)
2140 {
2141 ptr[Ain->row[k]+1]++;
2142 k++;
2143 }
2144 }
2145 for(i=0;i<n;i++)
2146 {
2147 ptr[i+1] += ptr[i];
2148 }
2149 for(i=0;i<n;i++)
2150 {
2151 iw[i] = ptr[i];
2152 }
2153 for(j=0;j<maxnzr;j++)
2154 {
2155 jj = 0;
2156 for(i=Ain->ptr[j];i<Ain->ptr[j+1];i++)
2157 {
2158 k = iw[Ain->row[jj]]++;
2159 value[k] = Ain->value[i];
2160 index[k] = Ain->index[i];
2161 jj++;
2162 }
2163 }
2164 #endif
2165
2166 err = lis_matrix_set_csr(nnz,ptr,index,value,Aout);
2167 if( err )
2168 {
2169 lis_free2(4,ptr,index,value,iw);
2170 return err;
2171 }
2172 err = lis_matrix_assemble(Aout);
2173 if( err )
2174 {
2175 lis_free(iw);
2176 lis_matrix_storage_destroy(Aout);
2177 return err;
2178 }
2179 lis_free(iw);
2180
2181 LIS_DEBUG_FUNC_OUT;
2182 return LIS_SUCCESS;
2183 }
2184
2185
2186 #undef __FUNC__
2187 #define __FUNC__ "lis_vector_sort_jad_order"
lis_vector_sort_jad_order(LIS_MATRIX A,LIS_VECTOR v)2188 LIS_INT lis_vector_sort_jad_order(LIS_MATRIX A, LIS_VECTOR v)
2189 {
2190 LIS_INT i,n,np;
2191 LIS_SCALAR *t;
2192
2193 LIS_DEBUG_FUNC_IN;
2194
2195 n = A->n;
2196 np = A->np;
2197
2198 t = (LIS_SCALAR *)lis_malloc(np*sizeof(LIS_SCALAR),"lis_vector_sort_jad_order::t");
2199 if( t==NULL )
2200 {
2201 LIS_SETERR_MEM(np*sizeof(LIS_SCALAR));
2202 return LIS_OUT_OF_MEMORY;
2203 }
2204 #ifdef USE_VEC_COMP
2205 #pragma cdir nodep
2206 #pragma _NEC ivdep
2207 #endif
2208 #ifdef _OPENMP
2209 #pragma omp parallel for private(i)
2210 #endif
2211 for(i=0;i<n;i++)
2212 {
2213 t[i] = v->value[A->row[i]];
2214 }
2215 lis_free(v->value);
2216 v->value = t;
2217
2218 LIS_DEBUG_FUNC_OUT;
2219 return LIS_SUCCESS;
2220 }
2221
2222 #undef __FUNC__
2223 #define __FUNC__ "lis_vector_sort_ascending_order"
lis_vector_sort_ascending_order(LIS_MATRIX A,LIS_VECTOR v)2224 LIS_INT lis_vector_sort_ascending_order(LIS_MATRIX A, LIS_VECTOR v)
2225 {
2226 LIS_INT i,n,np;
2227 LIS_SCALAR *t;
2228
2229 LIS_DEBUG_FUNC_IN;
2230
2231 n = A->n;
2232 np = A->np;
2233
2234 t = (LIS_SCALAR *)lis_malloc(np*sizeof(LIS_SCALAR),"lis_vector_sort_ascending_order::t");
2235 if( t==NULL )
2236 {
2237 LIS_SETERR_MEM(np*sizeof(LIS_SCALAR));
2238 return LIS_OUT_OF_MEMORY;
2239 }
2240 #ifdef USE_VEC_COMP
2241 #pragma cdir nodep
2242 #pragma _NEC ivdep
2243 #endif
2244 #ifdef _OPENMP
2245 #pragma omp parallel for private(i)
2246 #endif
2247 for(i=0;i<n;i++)
2248 {
2249 t[A->row[i]] = v->value[i];
2250 }
2251 lis_free(v->value);
2252 v->value = t;
2253
2254 LIS_DEBUG_FUNC_OUT;
2255 return LIS_SUCCESS;
2256 }
2257
2258