1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2 
3    Redistribution and use in source and binary forms, with or without
4    modification, are permitted provided that the following conditions are met:
5    1. Redistributions of source code must retain the above copyright
6       notice, this list of conditions and the following disclaimer.
7    2. Redistributions in binary form must reproduce the above copyright
8       notice, this list of conditions and the following disclaimer in the
9       documentation and/or other materials provided with the distribution.
10    3. Neither the name of the project nor the names of its contributors
11       may be used to endorse or promote products derived from this software
12       without specific prior written permission.
13 
14    THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18    PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19    OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24    POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28 	#include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 	#include "lis_config_win.h"
32 #endif
33 #endif
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #ifdef HAVE_MALLOC_H
38         #include <malloc.h>
39 #endif
40 #include <math.h>
41 #include <string.h>
42 #include <stdarg.h>
43 #ifdef USE_SSE2
44 	#include <emmintrin.h>
45 #endif
46 #ifdef _OPENMP
47 	#include <omp.h>
48 #endif
49 #ifdef USE_MPI
50 	#include <mpi.h>
51 #endif
52 #include "lislib.h"
53 
54 /***************************************
55  * Power Iteration                     *
56  ***************************************
57  v    = (1,...,1)^T
58  ***************************************
59  for k=1,2,...
60    v         = v / ||v||_2
61    y         = A * v
62    theta     = <v,y>
63    resid     = ||y - theta * v||_2 / |theta|
64    v         = y
65    if resid < tol then stop
66  lambda      = theta
67  x           = v / ||v||_2
68  ***************************************/
69 
70 #define NWORK 2
71 #undef __FUNC__
72 #define __FUNC__ "lis_epi_check_params"
lis_epi_check_params(LIS_ESOLVER esolver)73 LIS_INT lis_epi_check_params(LIS_ESOLVER esolver)
74 {
75 	LIS_DEBUG_FUNC_IN;
76 	LIS_DEBUG_FUNC_OUT;
77 	return LIS_SUCCESS;
78 }
79 
80 #undef __FUNC__
81 #define __FUNC__ "lis_epi_malloc_work"
lis_epi_malloc_work(LIS_ESOLVER esolver)82 LIS_INT lis_epi_malloc_work(LIS_ESOLVER esolver)
83 {
84 	LIS_VECTOR *work;
85 	LIS_INT	i,j,worklen,err;
86 
87 	LIS_DEBUG_FUNC_IN;
88 
89 	worklen = NWORK;
90 	work    = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_epi_malloc_work::work" );
91 	if( work==NULL )
92 	{
93 		LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR));
94 		return LIS_ERR_OUT_OF_MEMORY;
95 	}
96 	if( esolver->eprecision==LIS_PRECISION_DEFAULT )
97 	{
98 		for(i=0;i<worklen;i++)
99 		{
100 			err = lis_vector_duplicate(esolver->A,&work[i]);
101 			if( err ) break;
102 		}
103 	}
104 	else
105 	{
106 		for(i=0;i<worklen;i++)
107 		{
108 			err = lis_vector_duplicateex(LIS_PRECISION_QUAD,esolver->A,&work[i]);
109 			if( err ) break;
110 		}
111 	}
112 	if( i<worklen )
113 	{
114 		for(j=0;j<i;j++) lis_vector_destroy(work[j]);
115 		lis_free(work);
116 		return err;
117 	}
118 	esolver->worklen = worklen;
119 	esolver->work    = work;
120 
121 	LIS_DEBUG_FUNC_OUT;
122 	return LIS_SUCCESS;
123 }
124 
125 #undef __FUNC__
126 #define __FUNC__ "lis_epi"
lis_epi(LIS_ESOLVER esolver)127 LIS_INT lis_epi(LIS_ESOLVER esolver)
128 {
129   LIS_Comm comm;
130   LIS_MATRIX A;
131   LIS_VECTOR v,y,q;
132   LIS_SCALAR theta;
133   LIS_INT emaxiter;
134   LIS_REAL tol;
135   LIS_INT iter,output;
136   LIS_SCALAR oshift,ishift;
137   LIS_REAL nrm2,resid;
138 
139   LIS_DEBUG_FUNC_IN;
140 
141   comm = LIS_COMM_WORLD;
142 
143   emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
144   tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN];
145   output  = esolver->options[LIS_EOPTIONS_OUTPUT];
146 #ifdef _COMPLEX
147   oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN] + esolver->params[LIS_EPARAMS_SHIFT_IM - LIS_EOPTIONS_LEN] * _Complex_I;
148 #else
149   oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN];
150 #endif
151 
152   A = esolver->A;
153   v = esolver->x;
154   if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] )
155     {
156       lis_vector_set_all(1.0,v);
157     }
158   y = esolver->work[0];
159   q = esolver->work[1];
160 
161   if ( esolver->ishift != 0.0 ) oshift = ishift;
162   if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, oshift);
163 
164   if( output )
165     {
166 #ifdef _COMPLEX
167       lis_printf(comm,"shift                 : (%e, %e)\n", (double)creal(oshift), (double)cimag(oshift));
168 #else
169       lis_printf(comm,"shift                 : %e\n", (double)oshift);
170 #endif
171     }
172 
173   iter=0;
174   while (iter<emaxiter)
175     {
176       iter = iter+1;
177 
178       /* v = v / ||v||_2 */
179       lis_vector_nrm2(v, &nrm2);
180       lis_vector_scale(1.0/nrm2, v);
181 
182       /* y = A * v */
183       lis_matvec(A,v,y);
184 
185       /* theta = <v,y> */
186       lis_vector_dot(v, y, &theta);
187 
188       /* resid = ||y - theta * v||_2 / |theta| */
189       lis_vector_axpyz(-theta, v, y, q);
190       lis_vector_nrm2(q, &resid);
191       resid = resid / fabs(theta);
192 
193       /* v = y */
194       lis_vector_copy(y, v);
195 
196       /* convergence check */
197       if( output )
198 	{
199 	  if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid;
200 	  if( output & LIS_EPRINT_OUT ) lis_print_rhistory(comm,iter,resid);
201 	}
202 
203       if( tol >= resid )
204 	{
205 	  esolver->retcode    = LIS_SUCCESS;
206 	  esolver->iter[0]    = iter;
207 	  esolver->resid[0]   = resid;
208 	  esolver->evalue[0]  = theta + oshift;
209 	  lis_vector_nrm2(v, &nrm2);
210 	  lis_vector_scale(1.0/nrm2, v);
211 	  if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, -oshift);
212 	  LIS_DEBUG_FUNC_OUT;
213 	  return LIS_SUCCESS;
214 	}
215     }
216 
217   esolver->retcode   = LIS_MAXITER;
218   esolver->iter[0]   = iter;
219   esolver->resid[0]  = resid;
220   esolver->evalue[0] = theta + oshift;
221   lis_vector_nrm2(v, &nrm2);
222   lis_vector_scale(1.0/nrm2, v);
223   if ( oshift != 0.0 ) lis_matrix_shift_diagonal(A, -oshift);
224   LIS_DEBUG_FUNC_OUT;
225   return LIS_MAXITER;
226 }
227 
228 /***************************************
229  * Generalized Power Iteration         *
230  ***************************************
231  v    = (1,...,1)^T
232  ***************************************
233  for k=1,2,...
234    v         = v / ||v||_2
235    y         = A * v
236    v         = v / <v,w>^1/2
237    w         = w / <v,w>^1/2
238    y         = B^-1 * w
239    theta     = <w,y>
240    resid     = ||y - theta * v||_2 / |theta|
241    v         = y
242    if resid < tol then stop
243  lambda      = theta
244  x           = v / ||v||_2
245  ***************************************/
246 
247 #undef NWORK
248 #define NWORK 3
249 #undef __FUNC__
250 #define __FUNC__ "lis_egpi_check_params"
lis_egpi_check_params(LIS_ESOLVER esolver)251 LIS_INT lis_egpi_check_params(LIS_ESOLVER esolver)
252 {
253 	LIS_DEBUG_FUNC_IN;
254 	LIS_DEBUG_FUNC_OUT;
255 	return LIS_SUCCESS;
256 }
257 
258 #undef __FUNC__
259 #define __FUNC__ "lis_egpi_malloc_work"
lis_egpi_malloc_work(LIS_ESOLVER esolver)260 LIS_INT lis_egpi_malloc_work(LIS_ESOLVER esolver)
261 {
262 	LIS_VECTOR *work;
263 	LIS_INT	i,j,worklen,err;
264 
265 	LIS_DEBUG_FUNC_IN;
266 
267 	worklen = NWORK;
268 	work    = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_egpi_malloc_work::work" );
269 	if( work==NULL )
270 	{
271 		LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR));
272 		return LIS_ERR_OUT_OF_MEMORY;
273 	}
274 	if( esolver->eprecision==LIS_PRECISION_DEFAULT )
275 	{
276 		for(i=0;i<worklen;i++)
277 		{
278 			err = lis_vector_duplicate(esolver->A,&work[i]);
279 			if( err ) break;
280 		}
281 	}
282 	else
283 	{
284 		for(i=0;i<worklen;i++)
285 		{
286 			err = lis_vector_duplicateex(LIS_PRECISION_QUAD,esolver->A,&work[i]);
287 			if( err ) break;
288 		}
289 	}
290 	if( i<worklen )
291 	{
292 		for(j=0;j<i;j++) lis_vector_destroy(work[j]);
293 		lis_free(work);
294 		return err;
295 	}
296 	esolver->worklen = worklen;
297 	esolver->work    = work;
298 
299 	LIS_DEBUG_FUNC_OUT;
300 	return LIS_SUCCESS;
301 }
302 
303 #undef __FUNC__
304 #define __FUNC__ "lis_egpi"
lis_egpi(LIS_ESOLVER esolver)305 LIS_INT lis_egpi(LIS_ESOLVER esolver)
306 {
307   LIS_Comm comm;
308   LIS_MATRIX A,B;
309   LIS_VECTOR w,v,y,q;
310   LIS_SCALAR eta,theta;
311   LIS_INT emaxiter;
312   LIS_REAL tol;
313   LIS_INT iter,iter2,output;
314   LIS_SCALAR oshift,ishift;
315   LIS_REAL nrm2,resid;
316   LIS_SOLVER solver;
317   double time,itime,ptime,p_c_time,p_i_time;
318 
319   LIS_INT err;
320   LIS_PRECON precon;
321   LIS_INT nsol, precon_type;
322   char solvername[128], preconname[128];
323 
324   LIS_DEBUG_FUNC_IN;
325 
326   comm = LIS_COMM_WORLD;
327 
328   emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
329   tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN];
330   output  = esolver->options[LIS_EOPTIONS_OUTPUT];
331 #ifdef _COMPLEX
332   oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN] + esolver->params[LIS_EPARAMS_SHIFT_IM - LIS_EOPTIONS_LEN] * _Complex_I;
333 #else
334   oshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN];
335 #endif
336 
337   A = esolver->A;
338   B = esolver->B;
339   v = esolver->x;
340   if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] )
341     {
342       lis_vector_set_all(1.0,v);
343     }
344   w = esolver->work[0];
345   y = esolver->work[1];
346   q = esolver->work[2];
347 
348   if ( esolver->ishift != 0.0 ) oshift = ishift;
349   if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, oshift);
350 
351   if( output )
352     {
353 #ifdef _COMPLEX
354       lis_printf(comm,"shift                 : (%e, %e)\n", (double)creal(oshift), (double)cimag(oshift));
355 #else
356       lis_printf(comm,"shift                 : %e\n", (double)oshift);
357 #endif
358     }
359 
360   lis_solver_create(&solver);
361   lis_solver_set_option("-i bicg -p none",solver);
362   err = lis_solver_set_optionC(solver);
363   CHKERR(err);
364   lis_solver_get_solver(solver, &nsol);
365   lis_solver_get_precon(solver, &precon_type);
366   lis_solver_get_solvername(nsol, solvername);
367   lis_solver_get_preconname(precon_type, preconname);
368   if( output )
369     {
370       lis_printf(comm,"linear solver         : %s\n", solvername);
371       lis_printf(comm,"preconditioner        : %s\n", preconname);
372     }
373 
374   /* create preconditioner */
375   solver->A = B;
376   err = lis_precon_create(solver, &precon);
377   if( err )
378     {
379       lis_solver_work_destroy(solver);
380       solver->retcode = err;
381       return err;
382     }
383 
384   iter=0;
385   while (iter<emaxiter)
386     {
387       iter = iter+1;
388 
389       /* v = v / ||v||_2 */
390       lis_vector_nrm2(v, &nrm2);
391       lis_vector_scale(1.0/nrm2, v);
392 
393       /* w = A * v */
394       lis_matvec(A, v, w);
395 
396       /* v = v / <v,w>^1/2, w = w / <v,w>^1/2 */
397       lis_vector_dot(v, w, &eta);
398       eta = sqrt(eta);
399       lis_vector_scale(1.0/eta, v);
400       lis_vector_scale(1.0/eta, w);
401 
402       /* y = B^-1 * w */
403       err = lis_solve_kernel(B, w, y, solver, precon);
404       if( err )
405 	{
406 	  lis_solver_work_destroy(solver);
407 	  solver->retcode = err;
408 	  return err;
409 	}
410       lis_solver_get_iter(solver, &iter2);
411 
412       /* theta = <w,y> */
413       lis_vector_dot(w, y, &theta);
414 
415       /* resid = ||y - theta * v||_2 / |theta| */
416       lis_vector_axpyz(-theta, v, y, q);
417       lis_vector_nrm2(q, &resid);
418       resid = resid / fabs(theta);
419 
420       /* v = y */
421       lis_vector_copy(y, v);
422 
423       /* convergence check */
424       lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time);
425       esolver->ptime += solver->ptime;
426       esolver->itime += solver->itime;
427       esolver->p_c_time += solver->p_c_time;
428       esolver->p_i_time += solver->p_i_time;
429 
430       if( output )
431 	{
432 	  if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid;
433 	  if( output & LIS_EPRINT_OUT ) lis_print_rhistory(comm,iter,resid);
434 	}
435 
436       if( tol >= resid )
437 	{
438 	  esolver->retcode    = LIS_SUCCESS;
439 	  esolver->iter[0]    = iter;
440 	  esolver->resid[0]   = resid;
441 	  esolver->evalue[0]  = theta + oshift;
442 	  lis_vector_nrm2(v, &nrm2);
443 	  lis_vector_scale(1.0/nrm2, v);
444 	  if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, -oshift);
445 	  lis_precon_destroy(precon);
446 	  lis_solver_destroy(solver);
447 	  LIS_DEBUG_FUNC_OUT;
448 	  return LIS_SUCCESS;
449 	}
450     }
451 
452   lis_precon_destroy(precon);
453 
454   esolver->retcode   = LIS_MAXITER;
455   esolver->iter[0]   = iter;
456   esolver->resid[0]  = resid;
457   esolver->evalue[0] = theta + oshift;
458   lis_vector_nrm2(v, &nrm2);
459   lis_vector_scale(1.0/nrm2, v);
460   if ( oshift != 0.0 ) lis_matrix_shift_matrix(A, B, -oshift);
461   lis_solver_destroy(solver);
462   LIS_DEBUG_FUNC_OUT;
463   return LIS_MAXITER;
464 }
465 
466