1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
5 1. Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7 2. Redistributions in binary form must reproduce the above copyright
8 notice, this list of conditions and the following disclaimer in the
9 documentation and/or other materials provided with the distribution.
10 3. Neither the name of the project nor the names of its contributors
11 may be used to endorse or promote products derived from this software
12 without specific prior written permission.
13
14 THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18 PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 #include "lis_config_win.h"
32 #endif
33 #endif
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #ifdef HAVE_MALLOC_H
38 #include <malloc.h>
39 #endif
40 #include <math.h>
41 #include <string.h>
42 #include <stdarg.h>
43 #ifdef USE_SSE2
44 #include <emmintrin.h>
45 #endif
46 #ifdef _OPENMP
47 #include <omp.h>
48 #endif
49 #ifdef USE_MPI
50 #include <mpi.h>
51 #endif
52 #include "lislib.h"
53
54 /***************************************
55 * Power Iteration *
56 ***************************************
57 v = (1,...,1)^T
58 ***************************************
59 for k=1,2,...
60 v = v / ||v||_2
61 y = A * v
62 theta = <v,y>
63 resid = ||y - theta * v||_2 / |theta|
64 v = y
65 if resid < tol then stop
66 lambda = theta
67 x = v / ||v||_2
68 ***************************************/
69
70 #define NWORK 2
71 #undef __FUNC__
72 #define __FUNC__ "lis_epi_check_params"
lis_epi_check_params(LIS_ESOLVER esolver)73 LIS_INT lis_epi_check_params(LIS_ESOLVER esolver)
74 {
75 LIS_DEBUG_FUNC_IN;
76 LIS_DEBUG_FUNC_OUT;
77 return LIS_SUCCESS;
78 }
79
80 #undef __FUNC__
81 #define __FUNC__ "lis_epi_malloc_work"
lis_epi_malloc_work(LIS_ESOLVER esolver)82 LIS_INT lis_epi_malloc_work(LIS_ESOLVER esolver)
83 {
84 LIS_VECTOR *work;
85 LIS_INT i,j,worklen,err;
86
87 LIS_DEBUG_FUNC_IN;
88
89 worklen = NWORK;
90 work = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_epi_malloc_work::work" );
91 if( work==NULL )
92 {
93 LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR));
94 return LIS_ERR_OUT_OF_MEMORY;
95 }
96 if( esolver->eprecision==LIS_PRECISION_DEFAULT )
97 {
98 for(i=0;i<worklen;i++)
99 {
100 err = lis_vector_duplicate(esolver->A,&work[i]);
101 if( err ) break;
102 }
103 }
104 else
105 {
106 for(i=0;i<worklen;i++)
107 {
108 err = lis_vector_duplicateex(LIS_PRECISION_QUAD,esolver->A,&work[i]);
109 if( err ) break;
110 }
111 }
112 if( i<worklen )
113 {
114 for(j=0;j<i;j++) lis_vector_destroy(work[j]);
115 lis_free(work);
116 return err;
117 }
118 esolver->worklen = worklen;
119 esolver->work = work;
120
121 LIS_DEBUG_FUNC_OUT;
122 return LIS_SUCCESS;
123 }
124
125 #undef __FUNC__
126 #define __FUNC__ "lis_epi"
lis_epi(LIS_ESOLVER esolver)127 LIS_INT lis_epi(LIS_ESOLVER esolver)
128 {
129 LIS_Comm comm;
130 LIS_MATRIX A;
131 LIS_VECTOR v,y,q;
132 LIS_SCALAR theta;
133 LIS_INT emaxiter;
134 LIS_REAL tol;
135 LIS_INT iter,output;
136 LIS_SCALAR oshift,ishift;
137 LIS_REAL nrm2,resid;
138
139 LIS_DEBUG_FUNC_IN;
140
141 comm = LIS_COMM_WORLD;
142
143 emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
144 tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN];
145 output = esolver->options[LIS_EOPTIONS_OUTPUT];
146 #ifdef _COMPLEX
147 oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN] + esolver->params[LIS_EPARAMS_SHIFT_IM - LIS_EOPTIONS_LEN] * _Complex_I;
148 #else
149 oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN];
150 #endif
151
152 A = esolver->A;
153 v = esolver->x;
154 if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] )
155 {
156 lis_vector_set_all(1.0,v);
157 }
158 y = esolver->work[0];
159 q = esolver->work[1];
160
161 if ( esolver->ishift != 0.0 ) oshift = ishift;
162 if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, oshift);
163
164 if( output )
165 {
166 #ifdef _COMPLEX
167 lis_printf(comm,"shift : (%e, %e)\n", (double)creal(oshift), (double)cimag(oshift));
168 #else
169 lis_printf(comm,"shift : %e\n", (double)oshift);
170 #endif
171 }
172
173 iter=0;
174 while (iter<emaxiter)
175 {
176 iter = iter+1;
177
178 /* v = v / ||v||_2 */
179 lis_vector_nrm2(v, &nrm2);
180 lis_vector_scale(1.0/nrm2, v);
181
182 /* y = A * v */
183 lis_matvec(A,v,y);
184
185 /* theta = <v,y> */
186 lis_vector_dot(v, y, &theta);
187
188 /* resid = ||y - theta * v||_2 / |theta| */
189 lis_vector_axpyz(-theta, v, y, q);
190 lis_vector_nrm2(q, &resid);
191 resid = resid / fabs(theta);
192
193 /* v = y */
194 lis_vector_copy(y, v);
195
196 /* convergence check */
197 if( output )
198 {
199 if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid;
200 if( output & LIS_EPRINT_OUT ) lis_print_rhistory(comm,iter,resid);
201 }
202
203 if( tol >= resid )
204 {
205 esolver->retcode = LIS_SUCCESS;
206 esolver->iter[0] = iter;
207 esolver->resid[0] = resid;
208 esolver->evalue[0] = theta + oshift;
209 lis_vector_nrm2(v, &nrm2);
210 lis_vector_scale(1.0/nrm2, v);
211 if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, -oshift);
212 LIS_DEBUG_FUNC_OUT;
213 return LIS_SUCCESS;
214 }
215 }
216
217 esolver->retcode = LIS_MAXITER;
218 esolver->iter[0] = iter;
219 esolver->resid[0] = resid;
220 esolver->evalue[0] = theta + oshift;
221 lis_vector_nrm2(v, &nrm2);
222 lis_vector_scale(1.0/nrm2, v);
223 if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, -oshift);
224 LIS_DEBUG_FUNC_OUT;
225 return LIS_MAXITER;
226 }
227
228 /***************************************
229 * Generalized Power Iteration *
230 ***************************************
231 v = (1,...,1)^T
232 ***************************************
233 for k=1,2,...
234 v = v / ||v||_2
235 y = A * v
236 v = v / <v,w>^1/2
237 w = w / <v,w>^1/2
238 y = B^-1 * w
239 theta = <w,y>
240 resid = ||y - theta * v||_2 / |theta|
241 v = y
242 if resid < tol then stop
243 lambda = theta
244 x = v / ||v||_2
245 ***************************************/
246
247 #undef NWORK
248 #define NWORK 3
249 #undef __FUNC__
250 #define __FUNC__ "lis_egpi_check_params"
lis_egpi_check_params(LIS_ESOLVER esolver)251 LIS_INT lis_egpi_check_params(LIS_ESOLVER esolver)
252 {
253 LIS_DEBUG_FUNC_IN;
254 LIS_DEBUG_FUNC_OUT;
255 return LIS_SUCCESS;
256 }
257
258 #undef __FUNC__
259 #define __FUNC__ "lis_egpi_malloc_work"
lis_egpi_malloc_work(LIS_ESOLVER esolver)260 LIS_INT lis_egpi_malloc_work(LIS_ESOLVER esolver)
261 {
262 LIS_VECTOR *work;
263 LIS_INT i,j,worklen,err;
264
265 LIS_DEBUG_FUNC_IN;
266
267 worklen = NWORK;
268 work = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_egpi_malloc_work::work" );
269 if( work==NULL )
270 {
271 LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR));
272 return LIS_ERR_OUT_OF_MEMORY;
273 }
274 if( esolver->eprecision==LIS_PRECISION_DEFAULT )
275 {
276 for(i=0;i<worklen;i++)
277 {
278 err = lis_vector_duplicate(esolver->A,&work[i]);
279 if( err ) break;
280 }
281 }
282 else
283 {
284 for(i=0;i<worklen;i++)
285 {
286 err = lis_vector_duplicateex(LIS_PRECISION_QUAD,esolver->A,&work[i]);
287 if( err ) break;
288 }
289 }
290 if( i<worklen )
291 {
292 for(j=0;j<i;j++) lis_vector_destroy(work[j]);
293 lis_free(work);
294 return err;
295 }
296 esolver->worklen = worklen;
297 esolver->work = work;
298
299 LIS_DEBUG_FUNC_OUT;
300 return LIS_SUCCESS;
301 }
302
303 #undef __FUNC__
304 #define __FUNC__ "lis_egpi"
lis_egpi(LIS_ESOLVER esolver)305 LIS_INT lis_egpi(LIS_ESOLVER esolver)
306 {
307 LIS_Comm comm;
308 LIS_MATRIX A,B;
309 LIS_VECTOR w,v,y,q;
310 LIS_SCALAR eta,theta;
311 LIS_INT emaxiter;
312 LIS_REAL tol;
313 LIS_INT iter,iter2,output;
314 LIS_SCALAR oshift,ishift;
315 LIS_REAL nrm2,resid;
316 LIS_SOLVER solver;
317 double time,itime,ptime,p_c_time,p_i_time;
318
319 LIS_INT err;
320 LIS_PRECON precon;
321 LIS_INT nsol, precon_type;
322 char solvername[128], preconname[128];
323
324 LIS_DEBUG_FUNC_IN;
325
326 comm = LIS_COMM_WORLD;
327
328 emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
329 tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN];
330 output = esolver->options[LIS_EOPTIONS_OUTPUT];
331 #ifdef _COMPLEX
332 oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN] + esolver->params[LIS_EPARAMS_SHIFT_IM - LIS_EOPTIONS_LEN] * _Complex_I;
333 #else
334 oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN];
335 #endif
336
337 A = esolver->A;
338 B = esolver->B;
339 v = esolver->x;
340 if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] )
341 {
342 lis_vector_set_all(1.0,v);
343 }
344 w = esolver->work[0];
345 y = esolver->work[1];
346 q = esolver->work[2];
347
348 if ( esolver->ishift != 0.0 ) oshift = ishift;
349 if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, oshift);
350
351 if( output )
352 {
353 #ifdef _COMPLEX
354 lis_printf(comm,"shift : (%e, %e)\n", (double)creal(oshift), (double)cimag(oshift));
355 #else
356 lis_printf(comm,"shift : %e\n", (double)oshift);
357 #endif
358 }
359
360 lis_solver_create(&solver);
361 lis_solver_set_option("-i bicg -p none",solver);
362 err = lis_solver_set_optionC(solver);
363 CHKERR(err);
364 lis_solver_get_solver(solver, &nsol);
365 lis_solver_get_precon(solver, &precon_type);
366 lis_solver_get_solvername(nsol, solvername);
367 lis_solver_get_preconname(precon_type, preconname);
368 if( output )
369 {
370 lis_printf(comm,"linear solver : %s\n", solvername);
371 lis_printf(comm,"preconditioner : %s\n", preconname);
372 }
373
374 /* create preconditioner */
375 solver->A = B;
376 err = lis_precon_create(solver, &precon);
377 if( err )
378 {
379 lis_solver_work_destroy(solver);
380 solver->retcode = err;
381 return err;
382 }
383
384 iter=0;
385 while (iter<emaxiter)
386 {
387 iter = iter+1;
388
389 /* v = v / ||v||_2 */
390 lis_vector_nrm2(v, &nrm2);
391 lis_vector_scale(1.0/nrm2, v);
392
393 /* w = A * v */
394 lis_matvec(A, v, w);
395
396 /* v = v / <v,w>^1/2, w = w / <v,w>^1/2 */
397 lis_vector_dot(v, w, &eta);
398 eta = sqrt(eta);
399 lis_vector_scale(1.0/eta, v);
400 lis_vector_scale(1.0/eta, w);
401
402 /* y = B^-1 * w */
403 err = lis_solve_kernel(B, w, y, solver, precon);
404 if( err )
405 {
406 lis_solver_work_destroy(solver);
407 solver->retcode = err;
408 return err;
409 }
410 lis_solver_get_iter(solver, &iter2);
411
412 /* theta = <w,y> */
413 lis_vector_dot(w, y, &theta);
414
415 /* resid = ||y - theta * v||_2 / |theta| */
416 lis_vector_axpyz(-theta, v, y, q);
417 lis_vector_nrm2(q, &resid);
418 resid = resid / fabs(theta);
419
420 /* v = y */
421 lis_vector_copy(y, v);
422
423 /* convergence check */
424 lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time);
425 esolver->ptime += solver->ptime;
426 esolver->itime += solver->itime;
427 esolver->p_c_time += solver->p_c_time;
428 esolver->p_i_time += solver->p_i_time;
429
430 if( output )
431 {
432 if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid;
433 if( output & LIS_EPRINT_OUT ) lis_print_rhistory(comm,iter,resid);
434 }
435
436 if( tol >= resid )
437 {
438 esolver->retcode = LIS_SUCCESS;
439 esolver->iter[0] = iter;
440 esolver->resid[0] = resid;
441 esolver->evalue[0] = theta + oshift;
442 lis_vector_nrm2(v, &nrm2);
443 lis_vector_scale(1.0/nrm2, v);
444 if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, -oshift);
445 lis_precon_destroy(precon);
446 lis_solver_destroy(solver);
447 LIS_DEBUG_FUNC_OUT;
448 return LIS_SUCCESS;
449 }
450 }
451
452 lis_precon_destroy(precon);
453
454 esolver->retcode = LIS_MAXITER;
455 esolver->iter[0] = iter;
456 esolver->resid[0] = resid;
457 esolver->evalue[0] = theta + oshift;
458 lis_vector_nrm2(v, &nrm2);
459 lis_vector_scale(1.0/nrm2, v);
460 if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, -oshift);
461 lis_solver_destroy(solver);
462 LIS_DEBUG_FUNC_OUT;
463 return LIS_MAXITER;
464 }
465
466