1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
5 1. Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7 2. Redistributions in binary form must reproduce the above copyright
8 notice, this list of conditions and the following disclaimer in the
9 documentation and/or other materials provided with the distribution.
10 3. Neither the name of the project nor the names of its contributors
11 may be used to endorse or promote products derived from this software
12 without specific prior written permission.
13
14 THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18 PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 #include "lis_config_win.h"
32 #endif
33 #endif
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #ifdef HAVE_MALLOC_H
38 #include <malloc.h>
39 #endif
40 #include <string.h>
41 #include <stdarg.h>
42 #ifdef _OPENMP
43 #include <omp.h>
44 #endif
45 #ifdef USE_MPI
46 #include <mpi.h>
47 #endif
48 #include "lislib.h"
49
50
51 #undef __FUNC__
52 #define __FUNC__ "lis_matrix_diag_init"
lis_matrix_diag_init(LIS_MATRIX_DIAG * D)53 LIS_INT lis_matrix_diag_init(LIS_MATRIX_DIAG *D)
54 {
55 LIS_DEBUG_FUNC_IN;
56
57 memset(*D,0,sizeof(struct LIS_MATRIX_DIAG_STRUCT));
58 (*D)->bn = 1;
59
60 LIS_DEBUG_FUNC_OUT;
61 return LIS_SUCCESS;
62 }
63
64 #undef __FUNC__
65 #define __FUNC__ "lis_matrix_diag_check"
lis_matrix_diag_check(LIS_MATRIX_DIAG D,LIS_INT level)66 LIS_INT lis_matrix_diag_check(LIS_MATRIX_DIAG D, LIS_INT level)
67 {
68 LIS_DEBUG_FUNC_IN;
69
70 switch( level )
71 {
72 case LIS_MATRIX_CHECK_NULL:
73 if( D==NULL )
74 {
75 LIS_SETERR(LIS_ERR_ILL_ARG,"diagonal matrix D is undefined\n");
76 return LIS_ERR_ILL_ARG;
77 }
78 break;
79 default:
80 if( D==NULL )
81 {
82 LIS_SETERR(LIS_ERR_ILL_ARG,"diagonal matrix D is undefined\n");
83 return LIS_ERR_ILL_ARG;
84 }
85 break;
86 }
87 LIS_DEBUG_FUNC_OUT;
88 return LIS_SUCCESS;
89 }
90
91 #undef __FUNC__
92 #define __FUNC__ "lis_matrix_diag_create"
lis_matrix_diag_create(LIS_INT local_n,LIS_INT global_n,LIS_Comm comm,LIS_MATRIX_DIAG * D)93 LIS_INT lis_matrix_diag_create(LIS_INT local_n, LIS_INT global_n, LIS_Comm comm, LIS_MATRIX_DIAG *D)
94 {
95 int nprocs,my_rank;
96 LIS_INT is,ie;
97 LIS_INT *ranges;
98 #ifdef USE_MPI
99 LIS_INT i;
100 #endif
101
102 LIS_DEBUG_FUNC_IN;
103
104 *D = NULL;
105
106 if( global_n>0 && local_n>global_n )
107 {
108 LIS_SETERR2(LIS_ERR_ILL_ARG,"local n(=%D) is larger than global n(=%D)\n",local_n,global_n);
109 return LIS_ERR_ILL_ARG;
110 }
111 if( local_n<0 || global_n<0 )
112 {
113 LIS_SETERR2(LIS_ERR_ILL_ARG,"local n(=%D) or global n(=%D) are less than 0\n",local_n,global_n);
114 return LIS_ERR_ILL_ARG;
115 }
116 if( local_n==0 && global_n==0 )
117 {
118 LIS_SETERR2(LIS_ERR_ILL_ARG,"local n(=%D) and global n(=%D) are 0\n",local_n,global_n);
119 return LIS_ERR_ILL_ARG;
120 }
121
122 *D = (LIS_MATRIX_DIAG)lis_malloc( sizeof(struct LIS_MATRIX_DIAG_STRUCT),"lis_matrix_diag_create::D" );
123 if( NULL==*D )
124 {
125 LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_DIAG_STRUCT));
126 return LIS_OUT_OF_MEMORY;
127 }
128 lis_matrix_diag_init(D);
129
130 #ifdef USE_MPI
131 MPI_Comm_size(comm,&nprocs);
132 MPI_Comm_rank(comm,&my_rank);
133 ranges = (LIS_INT *)lis_malloc( (nprocs+1)*sizeof(LIS_INT),"lis_matrix_diag_create::ranges" );
134 if( ranges==NULL )
135 {
136 LIS_SETERR_MEM((nprocs+1)*sizeof(LIS_INT));
137 lis_matrix_diag_destroy(*D);
138 *D = NULL;
139 return LIS_OUT_OF_MEMORY;
140 }
141 #else
142 nprocs = 1;
143 my_rank = 0;
144 ranges = NULL;
145 #endif
146
147 #ifdef USE_MPI
148 MPI_Allreduce(&local_n,&i,1,LIS_MPI_INT,MPI_SUM,comm);
149 if( i==0 )
150 #else
151 if( local_n==0 )
152 #endif
153 {
154 #ifdef USE_MPI
155 LIS_GET_ISIE(my_rank,nprocs,global_n,is,ie);
156 local_n = ie-is;
157 MPI_Allgather(&ie,1,LIS_MPI_INT,&ranges[1],1,LIS_MPI_INT,comm);
158 ranges[0] = 0;
159 #else
160 local_n = global_n;
161 is = 0;
162 ie = global_n;
163 #endif
164 }
165 else
166 {
167 #ifdef USE_MPI
168 MPI_Allgather(&local_n,1,LIS_MPI_INT,&ranges[1],1,LIS_MPI_INT,comm);
169 ranges[0] = 0;
170 for(i=0;i<nprocs;i++)
171 {
172 ranges[i+1] += ranges[i];
173 }
174 global_n = ranges[nprocs];
175 is = ranges[my_rank];
176 ie = ranges[my_rank+1];
177 #else
178 global_n = local_n;
179 is = 0;
180 ie = local_n;
181 #endif
182 }
183 (*D)->ranges = ranges;
184
185 (*D)->value = (LIS_SCALAR *)lis_malloc( local_n*sizeof(LIS_SCALAR),"lis_matrix_diag_create::D->value" );
186 if( NULL==(*D)->value )
187 {
188 LIS_SETERR_MEM(local_n*sizeof(LIS_SCALAR));
189 lis_matrix_diag_destroy(*D);
190 *D = NULL;
191 return LIS_OUT_OF_MEMORY;
192 }
193
194 (*D)->n = local_n;
195 (*D)->nr = local_n;
196 (*D)->gn = global_n;
197 (*D)->np = local_n;
198 (*D)->comm = comm;
199 (*D)->my_rank = my_rank;
200 (*D)->nprocs = nprocs;
201 (*D)->is = is;
202 (*D)->ie = ie;
203
204 LIS_DEBUG_FUNC_OUT;
205 return LIS_SUCCESS;
206 }
207
208 #undef __FUNC__
209 #define __FUNC__ "lis_matrix_diag_destroy"
lis_matrix_diag_destroy(LIS_MATRIX_DIAG D)210 LIS_INT lis_matrix_diag_destroy(LIS_MATRIX_DIAG D)
211 {
212 LIS_INT i;
213 LIS_DEBUG_FUNC_IN;
214
215 if( D )
216 {
217 if( D->value ) lis_free( D->value );
218 if( D->work ) lis_free( D->work );
219 if( D->bns )
220 {
221 for(i=0;i<D->nr;i++)
222 {
223 if( D->v_value[i] ) free(D->v_value[i]);
224 }
225 lis_free2(2,D->bns,D->v_value);
226 }
227 if( D->ptr ) lis_free( D->ptr );
228 if( D->ranges ) lis_free( D->ranges );
229 lis_free(D);
230 }
231
232 LIS_DEBUG_FUNC_OUT;
233 return LIS_SUCCESS;
234 }
235
236 #undef __FUNC__
237 #define __FUNC__ "lis_matrix_diag_duplicate"
lis_matrix_diag_duplicate(LIS_MATRIX_DIAG Din,LIS_MATRIX_DIAG * Dout)238 LIS_INT lis_matrix_diag_duplicate(LIS_MATRIX_DIAG Din, LIS_MATRIX_DIAG *Dout)
239 {
240 LIS_INT err,nr,bnmax,t;
241 LIS_INT nprocs;
242 LIS_INT i;
243 #ifdef USE_MPI
244 LIS_INT *ranges;
245 #endif
246
247 LIS_DEBUG_FUNC_IN;
248
249 err = lis_matrix_diag_check(Din,LIS_MATRIX_CHECK_ALL);
250 if( err ) return err;
251
252 nprocs = Din->nprocs;
253 *Dout = NULL;
254
255 *Dout = (LIS_MATRIX_DIAG)lis_malloc( sizeof(struct LIS_MATRIX_DIAG_STRUCT),"lis_matrix_diag_duplicate::Dout" );
256 if( NULL==*Dout )
257 {
258 LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_DIAG_STRUCT));
259 return LIS_OUT_OF_MEMORY;
260 }
261 lis_matrix_diag_init(Dout);
262
263 #ifdef USE_MPI
264 ranges = (LIS_INT *)lis_malloc( (nprocs+1)*sizeof(LIS_INT),"lis_matrix_diag_duplicate::ranges" );
265 if( ranges==NULL )
266 {
267 LIS_SETERR_MEM((nprocs+1)*sizeof(LIS_INT));
268 lis_matrix_diag_destroy(*Dout);
269 *Dout = NULL;
270 return LIS_OUT_OF_MEMORY;
271 }
272 for(i=0;i<nprocs+1;i++) ranges[i] = Din->ranges[i];
273 (*Dout)->ranges = ranges;
274 #else
275 (*Dout)->ranges = NULL;
276 #endif
277
278 if( Din->bns==NULL )
279 {
280 (*Dout)->value = (LIS_SCALAR *)lis_malloc( Din->bn*Din->bn*Din->nr*sizeof(LIS_SCALAR),"lis_matrix_diag_duplicate::Dout->value" );
281 if( NULL==(*Dout)->value )
282 {
283 LIS_SETERR_MEM(Din->bn*Din->bn*Din->nr*sizeof(LIS_SCALAR));
284 lis_matrix_diag_destroy(*Dout);
285 *Dout = NULL;
286 return LIS_OUT_OF_MEMORY;
287 }
288 (*Dout)->bn = Din->bn;
289 }
290 else
291 {
292 nr = Din->nr;
293 (*Dout)->bns = (LIS_INT *)lis_malloc( nr*sizeof(LIS_INT),"lis_matrix_diag_duplicate::Dout->bns" );
294 if( NULL==(*Dout)->bns )
295 {
296 LIS_SETERR_MEM(nr*sizeof(LIS_INT));
297 lis_matrix_diag_destroy(*Dout);
298 *Dout = NULL;
299 return LIS_OUT_OF_MEMORY;
300 }
301 (*Dout)->v_value = (LIS_SCALAR **)lis_malloc( nr*sizeof(LIS_SCALAR *),"lis_matrix_diag_duplicate::Dout->value" );
302 if( NULL==(*Dout)->v_value )
303 {
304 LIS_SETERR_MEM(nr*sizeof(LIS_SCALAR *));
305 lis_matrix_diag_destroy(*Dout);
306 *Dout = NULL;
307 return LIS_OUT_OF_MEMORY;
308 }
309 bnmax = 0;
310 for(i=0;i<nr;i++)
311 {
312 t = Din->bns[i];
313 bnmax = _max(bnmax,t);
314 (*Dout)->bns[i] = t;
315 (*Dout)->v_value[i] = (LIS_SCALAR *)malloc( t*t*sizeof(LIS_SCALAR));
316 }
317 (*Dout)->bn = bnmax;
318 (*Dout)->nr = nr;
319 }
320
321 (*Dout)->n = Din->n;
322 (*Dout)->nr = Din->nr;
323 (*Dout)->gn = Din->gn;
324 (*Dout)->np = Din->np;
325 (*Dout)->comm = Din->comm;
326 (*Dout)->my_rank = Din->my_rank;
327 (*Dout)->nprocs = Din->nprocs;
328 (*Dout)->is = Din->is;
329 (*Dout)->ie = Din->ie;
330 (*Dout)->origin = Din->origin;
331
332 LIS_DEBUG_FUNC_OUT;
333 return LIS_SUCCESS;
334 }
335
336 #undef __FUNC__
337 #define __FUNC__ "lis_matrix_diag_duplicate"
lis_matrix_diag_duplicateM(LIS_MATRIX Ain,LIS_MATRIX_DIAG * Dout)338 LIS_INT lis_matrix_diag_duplicateM(LIS_MATRIX Ain, LIS_MATRIX_DIAG *Dout)
339 {
340 LIS_INT nr,err;
341 LIS_INT nprocs;
342 LIS_INT i,k,t,bnmax;
343 #ifdef USE_MPI
344 LIS_INT *ranges;
345 #endif
346
347 LIS_DEBUG_FUNC_IN;
348
349 err = lis_matrix_check(Ain,LIS_MATRIX_CHECK_ALL);
350 if( err ) return err;
351
352 nprocs = Ain->nprocs;
353 *Dout = NULL;
354
355 *Dout = (LIS_MATRIX_DIAG)lis_malloc( sizeof(struct LIS_MATRIX_DIAG_STRUCT),"lis_matrix_diag_duplicateM::Dout" );
356 if( NULL==*Dout )
357 {
358 LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_DIAG_STRUCT));
359 return LIS_OUT_OF_MEMORY;
360 }
361 lis_matrix_diag_init(Dout);
362
363 #ifdef USE_MPI
364 ranges = (LIS_INT *)lis_malloc( (nprocs+1)*sizeof(LIS_INT),"lis_matrix_diag_duplicateM::ranges" );
365 if( ranges==NULL )
366 {
367 LIS_SETERR_MEM((nprocs+1)*sizeof(LIS_INT));
368 lis_matrix_diag_destroy(*Dout);
369 *Dout = NULL;
370 return LIS_OUT_OF_MEMORY;
371 }
372 for(i=0;i<nprocs+1;i++) ranges[i] = Ain->ranges[i];
373 (*Dout)->ranges = ranges;
374 #else
375 (*Dout)->ranges = NULL;
376 #endif
377
378 switch(Ain->matrix_type)
379 {
380 case LIS_MATRIX_BSR:
381 case LIS_MATRIX_BSC:
382 k = Ain->nr*Ain->bnr*Ain->bnc;
383 (*Dout)->value = (LIS_SCALAR *)lis_malloc( k*sizeof(LIS_SCALAR),"lis_matrix_diag_duplicateM::Dout->value" );
384 if( NULL==(*Dout)->value )
385 {
386 LIS_SETERR_MEM(k*sizeof(LIS_SCALAR));
387 lis_matrix_diag_destroy(*Dout);
388 *Dout = NULL;
389 return LIS_OUT_OF_MEMORY;
390 }
391 (*Dout)->bn = Ain->bnr;
392 (*Dout)->nr = Ain->nr;
393 break;
394 case LIS_MATRIX_VBR:
395 nr = Ain->nr;
396 (*Dout)->bns = (LIS_INT *)lis_malloc( nr*sizeof(LIS_INT),"lis_matrix_diag_duplicateM::Dout->bns" );
397 if( NULL==(*Dout)->bns )
398 {
399 LIS_SETERR_MEM(nr*sizeof(LIS_INT));
400 lis_matrix_diag_destroy(*Dout);
401 *Dout = NULL;
402 return LIS_OUT_OF_MEMORY;
403 }
404 (*Dout)->v_value = (LIS_SCALAR **)lis_malloc( nr*sizeof(LIS_SCALAR *),"lis_matrix_diag_duplicateM::Dout->value" );
405 if( NULL==(*Dout)->v_value )
406 {
407 LIS_SETERR_MEM(nr*sizeof(LIS_SCALAR *));
408 lis_matrix_diag_destroy(*Dout);
409 *Dout = NULL;
410 return LIS_OUT_OF_MEMORY;
411 }
412 bnmax = 0;
413 for(i=0;i<nr;i++)
414 {
415 t = Ain->row[i+1] - Ain->row[i];
416 bnmax = _max(bnmax,t);
417 (*Dout)->bns[i] = t;
418 (*Dout)->v_value[i] = (LIS_SCALAR *)malloc( t*t*sizeof(LIS_SCALAR));
419 }
420 (*Dout)->bn = bnmax;
421 (*Dout)->nr = nr;
422 break;
423 default:
424 (*Dout)->value = (LIS_SCALAR *)lis_malloc( Ain->np*sizeof(LIS_SCALAR),"lis_matrix_diag_duplicateM::Dout->value" );
425 if( NULL==(*Dout)->value )
426 {
427 LIS_SETERR_MEM(Ain->np*sizeof(LIS_SCALAR));
428 lis_matrix_diag_destroy(*Dout);
429 *Dout = NULL;
430 return LIS_OUT_OF_MEMORY;
431 }
432 (*Dout)->nr = Ain->n;
433 break;
434 }
435
436 (*Dout)->n = Ain->n;
437 (*Dout)->gn = Ain->gn;
438 (*Dout)->np = Ain->np;
439 (*Dout)->comm = Ain->comm;
440 (*Dout)->my_rank = Ain->my_rank;
441 (*Dout)->nprocs = Ain->nprocs;
442 (*Dout)->is = Ain->is;
443 (*Dout)->ie = Ain->ie;
444 (*Dout)->origin = Ain->origin;
445
446 LIS_DEBUG_FUNC_OUT;
447 return LIS_SUCCESS;
448 }
449
450 #undef __FUNC__
451 #define __FUNC__ "lis_matrix_diag_malloc"
lis_matrix_diag_mallocM(LIS_MATRIX A,LIS_SCALAR ** diag)452 LIS_INT lis_matrix_diag_mallocM(LIS_MATRIX A, LIS_SCALAR **diag)
453 {
454 LIS_INT err;
455 LIS_INT k;
456
457 LIS_DEBUG_FUNC_IN;
458
459 err = lis_matrix_check(A,LIS_MATRIX_CHECK_ALL);
460 if( err ) return err;
461
462 switch(A->matrix_type)
463 {
464 case LIS_MATRIX_BSR:
465 k = A->nr*A->bnr*A->bnc;
466 *diag = (LIS_SCALAR *)lis_malloc( k*sizeof(LIS_SCALAR),"lis_matrix_diag_mallocM::diag" );
467 if( NULL==*diag )
468 {
469 LIS_SETERR_MEM(k*sizeof(LIS_SCALAR));
470 *diag = NULL;
471 return LIS_OUT_OF_MEMORY;
472 }
473 break;
474 default:
475 *diag = (LIS_SCALAR *)lis_malloc( A->n*sizeof(LIS_SCALAR),"lis_matrix_diag_mallocM::diag" );
476 if( NULL==*diag )
477 {
478 LIS_SETERR_MEM(A->n*sizeof(LIS_SCALAR));
479 *diag = NULL;
480 return LIS_OUT_OF_MEMORY;
481 }
482 break;
483 }
484
485 LIS_DEBUG_FUNC_OUT;
486 return LIS_SUCCESS;
487 }
488
489 #undef __FUNC__
490 #define __FUNC__ "lis_matrix_diag_get_range"
lis_matrix_diag_get_range(LIS_MATRIX_DIAG D,LIS_INT * is,LIS_INT * ie)491 LIS_INT lis_matrix_diag_get_range(LIS_MATRIX_DIAG D, LIS_INT *is, LIS_INT *ie)
492 {
493 LIS_INT err;
494
495 LIS_DEBUG_FUNC_IN;
496
497 err = lis_matrix_diag_check(D,LIS_MATRIX_CHECK_NULL);
498 if( err ) return err;
499
500 *is = D->is;
501 *ie = D->ie;
502
503 LIS_DEBUG_FUNC_OUT;
504 return LIS_SUCCESS;
505 }
506
507 #undef __FUNC__
508 #define __FUNC__ "lis_matrix_diag_get_size"
lis_matrix_diag_get_size(LIS_MATRIX_DIAG D,LIS_INT * local_n,LIS_INT * global_n)509 LIS_INT lis_matrix_diag_get_size(LIS_MATRIX_DIAG D, LIS_INT *local_n, LIS_INT *global_n)
510 {
511 LIS_INT err;
512
513 LIS_DEBUG_FUNC_IN;
514
515 err = lis_matrix_diag_check(D,LIS_MATRIX_CHECK_NULL);
516 if( err ) return err;
517
518 *local_n = D->n;
519 *global_n = D->gn;
520
521 LIS_DEBUG_FUNC_OUT;
522 return LIS_SUCCESS;
523 }
524
525
526 #undef __FUNC__
527 #define __FUNC__ "lis_matrix_diag_set_blocksize"
lis_matrix_diag_set_blocksize(LIS_MATRIX_DIAG D,LIS_INT bn,LIS_INT * bns)528 LIS_INT lis_matrix_diag_set_blocksize(LIS_MATRIX_DIAG D, LIS_INT bn, LIS_INT *bns)
529 {
530 LIS_INT i,n,nr,bnmax,t;
531 LIS_INT err;
532 LIS_SCALAR *diag;
533
534 LIS_DEBUG_FUNC_IN;
535
536 err = lis_matrix_diag_check(D,LIS_MATRIX_CHECK_NULL);
537 if( err ) return err;
538
539 n = D->n;
540 if( bns==NULL )
541 {
542 nr = 1 + (n-1)/bn;
543 diag = (LIS_SCALAR *)lis_malloc( bn*bn*nr*sizeof(LIS_SCALAR),"lis_matrix_diag_set_blocksize::diag" );
544 if( NULL==diag )
545 {
546 LIS_SETERR_MEM(bn*bn*nr*sizeof(LIS_SCALAR));
547 return LIS_OUT_OF_MEMORY;
548 }
549 lis_free(D->value);
550 D->value = diag;
551 D->nr = nr;
552 D->bn = bn;
553 }
554 else
555 {
556 if( D->bns==NULL )
557 {
558 lis_free(D->value);
559 D->bns = (LIS_INT *)lis_malloc( bn*sizeof(LIS_INT),"lis_matrix_diag_duplicateM::Dout->bns" );
560 if( NULL==D->bns )
561 {
562 LIS_SETERR_MEM(bn*sizeof(LIS_INT));
563 lis_matrix_diag_destroy(D);
564 D = NULL;
565 return LIS_OUT_OF_MEMORY;
566 }
567 D->v_value = (LIS_SCALAR **)lis_malloc( bn*sizeof(LIS_SCALAR *),"lis_matrix_diag_duplicateM::Dout->value" );
568 if( NULL==D->v_value )
569 {
570 LIS_SETERR_MEM(bn*sizeof(LIS_SCALAR *));
571 lis_matrix_diag_destroy(D);
572 D = NULL;
573 return LIS_OUT_OF_MEMORY;
574 }
575 bnmax = 0;
576 for(i=0;i<bn;i++)
577 {
578 t = bns[i];
579 bnmax = _max(bnmax,t);
580 D->bns[i] = t;
581 D->v_value[i] = (LIS_SCALAR *)malloc( t*t*sizeof(LIS_SCALAR));
582 }
583 D->bn = bnmax;
584 D->nr = bn;
585 }
586 }
587 LIS_DEBUG_FUNC_OUT;
588 return LIS_SUCCESS;
589 }
590
591 #undef __FUNC__
592 #define __FUNC__ "lis_matrix_diag_copy"
lis_matrix_diag_copy(LIS_MATRIX_DIAG X,LIS_MATRIX_DIAG Y)593 LIS_INT lis_matrix_diag_copy(LIS_MATRIX_DIAG X, LIS_MATRIX_DIAG Y)
594 {
595 LIS_INT i,n,nr,bn;
596 LIS_SCALAR *x,*y;
597
598 LIS_DEBUG_FUNC_IN;
599
600 x = X->value;
601 y = Y->value;
602 n = X->n;
603 nr = X->nr;
604 if( n!=Y->n )
605 {
606 LIS_SETERR(LIS_ERR_ILL_ARG,"length of diagonal matrix X and Y is not equal\n");
607 return LIS_ERR_ILL_ARG;
608 }
609 if( X->bns )
610 {
611 #ifdef _OPENMP
612 #pragma omp parallel for private(i)
613 #endif
614 for(i=0;i<nr;i++)
615 {
616 bn = X->bns[i]*X->bns[i];
617 memcpy(Y->v_value[i],X->v_value[i],bn*sizeof(LIS_SCALAR));
618 }
619 }
620 else
621 {
622 bn = X->bn*X->bn;
623 #ifdef _OPENMP
624 #pragma omp parallel for private(i)
625 #endif
626 for(i=0; i<nr; i++)
627 {
628 memcpy(&y[i*bn],&x[i*bn],bn*sizeof(LIS_SCALAR));
629 }
630 }
631 LIS_DEBUG_FUNC_OUT;
632 return LIS_SUCCESS;
633 }
634
635 #undef __FUNC__
636 #define __FUNC__ "lis_matrix_diag_scale"
lis_matrix_diag_scale(LIS_SCALAR alpha,LIS_MATRIX_DIAG D)637 LIS_INT lis_matrix_diag_scale(LIS_SCALAR alpha, LIS_MATRIX_DIAG D)
638 {
639 LIS_INT i,j,nr,bn;
640 LIS_SCALAR *d;
641
642 LIS_DEBUG_FUNC_IN;
643
644 d = D->value;
645 nr = D->nr;
646 bn = D->bn;
647 if( D->bns )
648 {
649 #ifdef _OPENMP
650 #pragma omp parallel for private(i)
651 #endif
652 for(i=0; i<nr; i++)
653 {
654 bn = D->bns[i]*D->bns[i];
655 for(j=0;j<bn;j++)
656 {
657 D->v_value[i][j] *= alpha;
658 }
659 }
660 }
661 else
662 {
663 if( bn==1 )
664 {
665 #ifdef _OPENMP
666 #pragma omp parallel for private(i)
667 #endif
668 for(i=0; i<nr; i++)
669 {
670 d[i] = alpha * d[i];
671 }
672 }
673 else if( bn==2 )
674 {
675 #ifdef _OPENMP
676 #pragma omp parallel for private(i)
677 #endif
678 for(i=0; i<nr; i++)
679 {
680 d[4*i] = alpha * d[4*i];
681 d[4*i+1] = alpha * d[4*i+1];
682 d[4*i+2] = alpha * d[4*i+2];
683 d[4*i+3] = alpha * d[4*i+3];
684 }
685 }
686 else if( bn==3 )
687 {
688 #ifdef _OPENMP
689 #pragma omp parallel for private(i)
690 #endif
691 for(i=0; i<nr; i++)
692 {
693 d[9*i] = alpha * d[9*i];
694 d[9*i+1] = alpha * d[9*i+1];
695 d[9*i+2] = alpha * d[9*i+2];
696 d[9*i+3] = alpha * d[9*i+3];
697 d[9*i+4] = alpha * d[9*i+4];
698 d[9*i+5] = alpha * d[9*i+5];
699 d[9*i+6] = alpha * d[9*i+6];
700 d[9*i+7] = alpha * d[9*i+7];
701 d[9*i+8] = alpha * d[9*i+8];
702 }
703 }
704 else if( bn==4 )
705 {
706 #ifdef _OPENMP
707 #pragma omp parallel for private(i)
708 #endif
709 for(i=0; i<nr; i++)
710 {
711 d[16*i] = alpha * d[16*i];
712 d[16*i+1] = alpha * d[16*i+1];
713 d[16*i+2] = alpha * d[16*i+2];
714 d[16*i+3] = alpha * d[16*i+3];
715 d[16*i+4] = alpha * d[16*i+4];
716 d[16*i+5] = alpha * d[16*i+5];
717 d[16*i+6] = alpha * d[16*i+6];
718 d[16*i+7] = alpha * d[16*i+7];
719 d[16*i+8] = alpha * d[16*i+8];
720 d[16*i+9] = alpha * d[16*i+9];
721 d[16*i+10] = alpha * d[16*i+10];
722 d[16*i+11] = alpha * d[16*i+11];
723 d[16*i+12] = alpha * d[16*i+12];
724 d[16*i+13] = alpha * d[16*i+13];
725 d[16*i+14] = alpha * d[16*i+14];
726 d[16*i+15] = alpha * d[16*i+15];
727 }
728 }
729 else
730 {
731 bn = bn*bn;
732 #ifdef _OPENMP
733 #pragma omp parallel for private(i)
734 #endif
735 for(i=0; i<nr; i++)
736 {
737 for(j=0;j<bn;j++)
738 {
739 d[i*bn+j] = alpha * d[i*bn+j];
740 }
741 }
742 }
743 }
744 LIS_DEBUG_FUNC_OUT;
745 return LIS_SUCCESS;
746 }
747
748 #undef __FUNC__
749 #define __FUNC__ "lis_matrix_diag_inverse"
lis_matrix_diag_inverse(LIS_MATRIX_DIAG D)750 LIS_INT lis_matrix_diag_inverse(LIS_MATRIX_DIAG D)
751 {
752 LIS_INT i,k,nr,bn,bs,n;
753 LIS_SCALAR *d;
754
755 LIS_DEBUG_FUNC_IN;
756
757 n = D->n;
758 d = D->value;
759 nr = D->nr;
760 bn = D->bn;
761 bs = D->bn*D->bn;
762 if( D->bns )
763 {
764 #ifdef _OPENMP
765 #pragma omp parallel for private(i,bn)
766 #endif
767 for(i=0; i<nr; i++)
768 {
769 bn = D->bns[i];
770 lis_array_ge(bn,D->v_value[i]);
771 }
772 }
773 else
774 {
775 if( bn==1 )
776 {
777 #ifdef _OPENMP
778 #pragma omp parallel for private(i)
779 #endif
780 for(i=0; i<nr; i++)
781 {
782 d[i] = 1.0 / d[i];
783 }
784 }
785 else
786 {
787 k = n%bn;
788 if( k!=0 )
789 {
790 for(i=bn-1;i>=k;i--)
791 {
792 d[bs*(nr-1)+i*(bn+1)] = 1.0;
793 }
794 }
795 #ifdef _OPENMP
796 #pragma omp parallel for private(i)
797 #endif
798 for(i=0;i<nr;i++)
799 {
800 lis_array_ge(bn,&d[i*bs]);
801 }
802 }
803 }
804 LIS_DEBUG_FUNC_OUT;
805 return LIS_SUCCESS;
806 }
807
808 #undef __FUNC__
809 #define __FUNC__ "lis_matrix_diag_matvec"
lis_matrix_diag_matvec(LIS_MATRIX_DIAG D,LIS_VECTOR X,LIS_VECTOR Y)810 LIS_INT lis_matrix_diag_matvec(LIS_MATRIX_DIAG D, LIS_VECTOR X, LIS_VECTOR Y)
811 {
812 LIS_INT i,nr,bn,bs;
813 LIS_SCALAR *d,*x,*y;
814
815 LIS_DEBUG_FUNC_IN;
816
817 d = D->value;
818 x = X->value;
819 y = Y->value;
820 nr = D->nr;
821 bn = D->bn;
822 bs = D->bn*D->bn;
823 if( D->bns )
824 {
825 #ifdef _OPENMP
826 #pragma omp parallel for private(i,bn)
827 #endif
828 for(i=0; i<nr; i++)
829 {
830 bn = D->bns[i];
831 lis_array_matvec(bn,D->v_value[i],&x[i*bn],&y[i*bn],LIS_INS_VALUE);
832 }
833 }
834 else
835 {
836 if( bn==1 )
837 {
838 #ifdef _OPENMP
839 #pragma omp parallel for private(i)
840 #endif
841 for(i=0; i<nr; i++)
842 {
843 y[i] = x[i] * d[i];
844 }
845 }
846 else if( bn==2 )
847 {
848 #ifdef _OPENMP
849 #pragma omp parallel for private(i)
850 #endif
851 for(i=0; i<nr; i++)
852 {
853 y[2*i] = d[4*i] * x[2*i] + d[4*i+2] * x[2*i+1];
854 y[2*i+1] = d[4*i+1] * x[2*i] + d[4*i+3] * x[2*i+1];
855 }
856 }
857 else if( bn==3 )
858 {
859 #ifdef _OPENMP
860 #pragma omp parallel for private(i)
861 #endif
862 for(i=0; i<nr; i++)
863 {
864 y[3*i] = d[9*i] * x[3*i] + d[9*i+3] * x[3*i+1] + d[9*i+6] * x[3*i+2];
865 y[3*i+1] = d[9*i+1] * x[3*i] + d[9*i+4] * x[3*i+1] + d[9*i+7] * x[3*i+2];
866 y[3*i+2] = d[9*i+2] * x[3*i] + d[9*i+5] * x[3*i+1] + d[9*i+8] * x[3*i+2];
867 }
868 }
869 else if( bn==4 )
870 {
871 #ifdef _OPENMP
872 #pragma omp parallel for private(i)
873 #endif
874 for(i=0; i<nr; i++)
875 {
876 y[4*i] = d[16*i] * x[4*i] + d[16*i+4] * x[4*i+1] + d[16*i+8] * x[4*i+2] + d[16*i+12] * x[4*i+3];
877 y[4*i+1] = d[16*i+1] * x[4*i] + d[16*i+5] * x[4*i+1] + d[16*i+9] * x[4*i+2] + d[16*i+13] * x[4*i+3];
878 y[4*i+2] = d[16*i+2] * x[4*i] + d[16*i+6] * x[4*i+1] + d[16*i+10] * x[4*i+2] + d[16*i+14] * x[4*i+3];
879 y[4*i+3] = d[16*i+3] * x[4*i] + d[16*i+7] * x[4*i+1] + d[16*i+11] * x[4*i+2] + d[16*i+15] * x[4*i+3];
880 }
881 }
882 else
883 {
884 #ifdef _OPENMP
885 #pragma omp parallel for private(i)
886 #endif
887 for(i=0; i<nr; i++)
888 {
889 lis_array_matvec(bn,&d[i*bs],&x[i*bn],&y[i*bn],LIS_INS_VALUE);
890 }
891 }
892 }
893 LIS_DEBUG_FUNC_OUT;
894 return LIS_SUCCESS;
895 }
896
897 #undef __FUNC__
898 #define __FUNC__ "lis_matrix_diag_matvech"
lis_matrix_diag_matvech(LIS_MATRIX_DIAG D,LIS_VECTOR X,LIS_VECTOR Y)899 LIS_INT lis_matrix_diag_matvech(LIS_MATRIX_DIAG D, LIS_VECTOR X, LIS_VECTOR Y)
900 {
901 LIS_INT i,nr,bn,bs;
902 LIS_SCALAR *d,*x,*y;
903
904 LIS_DEBUG_FUNC_IN;
905
906 d = D->value;
907 x = X->value;
908 y = Y->value;
909 nr = D->nr;
910 bn = D->bn;
911 bs = D->bn*D->bn;
912 if( D->bns )
913 {
914 #ifdef _OPENMP
915 #pragma omp parallel for private(i,bn)
916 #endif
917 for(i=0; i<nr; i++)
918 {
919 bn = D->bns[i];
920 lis_array_matvech(bn,D->v_value[i],&x[i*bn],&y[i*bn],LIS_INS_VALUE);
921 }
922 }
923 else
924 {
925 #if 0
926 if( bn==1 )
927 {
928 #pragma omp parallel for private(i)
929 for(i=0; i<nr; i++)
930 {
931 y[i] = x[i] * conj(d[i]);
932 }
933 }
934 else if( bn==2 )
935 {
936 #pragma omp parallel for private(i)
937 for(i=0; i<nr; i++)
938 {
939 y[2*i] = conj(d[4*i]) * x[2*i] + conj(d[4*i+1]) * x[2*i+1];
940 y[2*i+1] = conj(d[4*i+2]) * x[2*i] + conj(d[4*i+3]) * x[2*i+1];
941 }
942 }
943 else if( bn==3 )
944 {
945 #pragma omp parallel for private(i)
946 for(i=0; i<nr; i++)
947 {
948 y[3*i] = conj(d[9*i]) * x[3*i] + conj(d[9*i+1]) * x[3*i+1] + conj(d[9*i+2]) * x[3*i+2];
949 y[3*i+1] = conj(d[9*i+3]) * x[3*i] + conj(d[9*i+4]) * x[3*i+1] + conj(d[9*i+5]) * x[3*i+2];
950 y[3*i+2] = conj(d[9*i+6]) * x[3*i] + conj(d[9*i+7]) * x[3*i+1] + conj(d[9*i+8]) * x[3*i+2];
951 }
952 }
953 else if( bn==4 )
954 {
955 #pragma omp parallel for private(i)
956 for(i=0; i<nr; i++)
957 {
958 y[4*i] = conj(d[16*i] * x[4*i] + conj(d[16*i+ 1]) * x[4*i+1] + conj(d[16*i+ 2]) * x[4*i+2] + conj(d[16*i+ 3]) * x[4*i+3];
959 y[4*i+1] = conj(d[16*i+ 4]) * x[4*i] + conj(d[16*i+ 5]) * x[4*i+1] + conj(d[16*i+ 6]) * x[4*i+2] + conj(d[16*i+ 7]) * x[4*i+3];
960 y[4*i+2] = conj(d[16*i+ 8]) * x[4*i] + conj(d[16*i+ 9]) * x[4*i+1] + conj(d[16*i+10]) * x[4*i+2] + conj(d[16*i+11]) * x[4*i+3];
961 y[4*i+3] = conj(d[16*i+12]) * x[4*i] + conj(d[16*i+13]) * x[4*i+1] + conj(d[16*i+14]) * x[4*i+2] + conj(d[16*i+15]) * x[4*i+3];
962 }
963 }
964 else
965 #endif
966 {
967 #ifdef _OPENMP
968 #pragma omp parallel for private(i)
969 #endif
970 for(i=0; i<nr; i++)
971 {
972 lis_array_matvech(bn,&d[i*bs],&x[i*bn],&y[i*bn],LIS_INS_VALUE);
973 }
974 }
975 }
976 LIS_DEBUG_FUNC_OUT;
977 return LIS_SUCCESS;
978 }
979
980 #undef __FUNC__
981 #define __FUNC__ "lis_matrix_diag_print"
lis_matrix_diag_print(LIS_MATRIX_DIAG D)982 LIS_INT lis_matrix_diag_print(LIS_MATRIX_DIAG D)
983 {
984 LIS_INT k,i,j,nr,bn,nn;
985
986 LIS_DEBUG_FUNC_IN;
987
988 nr = D->nr;
989 if( D->bns )
990 {
991 nn = 0;
992 for(k=0; k<nr; k++)
993 {
994 bn = D->bns[k];
995 for(i=0;i<bn;i++)
996 {
997 #ifdef _LONG__LONG
998 printf("%4lld (", nn+i);
999 #else
1000 printf("%4d (", nn+i);
1001 #endif
1002 for(j=0; j<bn; j++)
1003 {
1004 #ifdef _COMPLEX
1005 printf("%6.2f %6.2f ", (double)creal(D->v_value[k][j*bn+i]), (double)cimag(D->v_value[k][j*bn+i]));
1006 #else
1007 printf("%6.2f ", (double)D->v_value[k][j*bn+i]);
1008 #endif
1009 }
1010 printf(")\n");
1011 }
1012 nn += bn;
1013 }
1014 }
1015 else
1016 {
1017 bn = D->bn;
1018 nn = D->bn*D->bn;
1019 for(k=0; k<nr; k++)
1020 {
1021 for(i=0;i<bn;i++)
1022 {
1023 #ifdef _LONG__LONG
1024 printf("%4lld (", k*nn+i);
1025 #else
1026 printf("%4d (", k*nn+i);
1027 #endif
1028 for(j=0; j<bn; j++)
1029 {
1030 #ifdef _COMPLEX
1031 printf("%6.2f %6.2f ", (double)creal(D->value[k*nn + j*bn+i]), (double)cimag(D->value[k*nn + j*bn+i]));
1032 #else
1033 printf("%6.2f ", (double)D->value[k*nn + j*bn+i]);
1034 #endif
1035 }
1036 printf(")\n");
1037 }
1038 }
1039 }
1040 LIS_DEBUG_FUNC_OUT;
1041 return LIS_SUCCESS;
1042 }
1043