1c* ///////////////////////////////////////////////////////////////////////////
2c* @file    nrchdrvd.f
3c* @author  Michael Holst
4c* @brief   Driver for nonlinear Richardson iteration.
5c* @version $Id: nrchdrvd.f,v 1.2 2010/08/12 05:45:37 fetk Exp $
6c* @attention
7c* @verbatim
8c*
9c* PMG -- Parallel algebraic MultiGrid
10c* Copyright (C) 1994-- Michael Holst.
11c*
12c* Michael Holst <mholst@math.ucsd.edu>
13c* University of California, San Diego
14c* Department of Mathematics, 5739 AP&M
15c* 9500 Gilman Drive, Dept. 0112
16c* La Jolla, CA 92093-0112 USA
17c* http://math.ucsd.edu/~mholst
18c*
19c* This file is part of PMG.
20c*
21c* PMG is free software; you can redistribute it and/or modify
22c* it under the terms of the GNU General Public License as published by
23c* the Free Software Foundation; either version 2 of the License, or
24c* (at your option) any later version.
25c*
26c* PMG is distributed in the hope that it will be useful,
27c* but WITHOUT ANY WARRANTY; without even the implied warranty of
28c* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29c* GNU General Public License for more details.
30c*
31c* You should have received a copy of the GNU General Public License
32c* along with PMG; if not, write to the Free Software
33c* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
34c*
35c* Linking PMG statically or dynamically with other modules is making a
36c* combined work based on PMG. Thus, the terms and conditions of the GNU
37c* General Public License cover the whole combination.
38c*
39c* SPECIAL GPL EXCEPTION
40c* In addition, as a special exception, the copyright holders of PMG
41c* give you permission to combine the PMG program with free software
42c* programs and libraries that are released under the GNU LGPL or with
43c* code included in releases of ISIM, PMV, PyMOL, SMOL, VMD, and Vision.
44c* Such combined software may be linked with PMG and redistributed together
45c* in original or modified form as mere aggregation without requirement that
46c* the entire work be under the scope of the GNU General Public License.
47c* This special exception permission is also extended to any software listed
48c* in the SPECIAL GPL EXCEPTION clauses by the FEtk and APBS libraries.
49c*
50c* Note that people who make modified versions of PMG are not obligated
51c* to grant this special exception for their modified versions; it is
52c* their choice whether to do so. The GNU General Public License gives
53c* permission to release a modified version without this exception; this
54c* exception also makes it possible to release a modified version which
55c* carries forward this exception.
56c*
57c* @endverbatim
58c* ///////////////////////////////////////////////////////////////////////////
59
60      subroutine nrichdriv(iparm,rparm,iwork,rwork,u,
61     2   xf,yf,zf,gxcf,gycf,gzcf,a1cf,a2cf,a3cf,ccf,fcf,tcf)
62c* *********************************************************************
63c* purpose:
64c*
65c*    driver for linear/nonlinear richardson's iteration.
66c*
67c* author:  michael holst
68c* *********************************************************************
69      implicit         none
70c*
71c*    *** input parameters ***
72      integer          iparm(*),iwork(*)
73      double precision rparm(*),rwork(*),u(*)
74      double precision xf(*),yf(*),zf(*),gxcf(*),gycf(*),gzcf(*)
75      double precision a1cf(*),a2cf(*),a3cf(*),ccf(*),fcf(*),tcf(*)
76c*
77c*    *** variables returned from mgsz ***
78      integer          iretot,iintot
79c*
80c*    *** misc variables ***
81      integer          nrwk,niwk,nx,ny,nz,nlev,ierror,n
82      integer          k_iz,k_w0
83      integer          k_ipc,k_rpc,k_ac,k_cc,k_fc
84      integer          n_iz,n_ipc,n_rpc
85c*
86c*    *** decode some parameters ***
87      nrwk    = iparm(1)
88      niwk    = iparm(2)
89      nx      = iparm(3)
90      ny      = iparm(4)
91      nz      = iparm(5)
92      nlev    = iparm(6)
93      n       = nx * ny * nz
94      n_iz    = 10*(nlev+1)
95      n_ipc   = 100*(nlev+1)
96      n_rpc   = 100*(nlev+1)
97c*
98c*    *** compute required work array sizes ***
99      iintot  = n_iz + n_ipc
100      iretot  = n_rpc + (3*n) + (4*n)
101c*
102c*    *** some more checks on input ***
103      if ((nrwk.lt.iretot) .or. (niwk.lt.iintot)) then
104         print*,'% NRICHDRIV: real    work space must be: ',iretot
105         print*,'% NRICHDRIV: integer work space must be: ',iintot
106         ierror = -3
107         iparm(51) = ierror
108         return
109      endif
110c*
111c*    *** iwork offsets ***
112      k_iz  = 1
113      k_ipc = k_iz  + n_iz
114c*
115c*    *** rwork offsets ***
116      k_rpc = 1
117      k_cc  = k_rpc + n_rpc
118      k_fc  = k_cc  + n
119      k_w0  = k_fc  + n
120      k_ac  = k_w0  + n
121c* ***k_ac_after  = k_ac + 4*n
122c*
123c*    *** call the multigrid driver ***
124      call nrichdriv2(iparm,rparm,nx,ny,nz,u,iwork(k_iz),
125     2   rwork(k_w0),
126     3   iwork(k_ipc),rwork(k_rpc),
127     4   rwork(k_ac),rwork(k_cc),rwork(k_fc),
128     5   xf,yf,zf,gxcf,gycf,gzcf,a1cf,a2cf,a3cf,ccf,fcf,tcf)
129c*
130c*    *** return and end ***
131      return
132      end
133      subroutine nrichdriv2(iparm,rparm,nx,ny,nz,u,iz,
134     2   w0,
135     3   ipc,rpc,ac,cc,fc,
136     4   xf,yf,zf,gxcf,gycf,gzcf,a1cf,a2cf,a3cf,ccf,fcf,tcf)
137c* *********************************************************************
138c* purpose:
139c*
140c*    driver for linear/nonlinear richardson's iteration.
141c*
142c* author:  michael holst
143c* *********************************************************************
144      implicit         none
145c*
146c*    *** input parameters ***
147      integer          iparm(*),ipc(*),iz(*)
148      double precision rparm(*),rpc(*)
149      double precision u(*),w0(*),ac(*),cc(*),fc(*)
150      double precision xf(*),yf(*),zf(*),gxcf(*),gycf(*),gzcf(*)
151      double precision a1cf(*),a2cf(*),a3cf(*),ccf(*),fcf(*),tcf(*)
152c*
153c*    *** misc variables ***
154      integer          mgkey,nlev,itmax,iok,iinfo,istop,ipkey,nu1,nu2
155      integer          nx,ny,nz,ilev,ido,iters,ierror,ibound,mode
156      integer          mgprol,mgcoar,mgsolv,mgdisc
157      double precision epsiln,epsmac,errtol,omegal,omegan
158      double precision pc_dumm
159      double precision bf,oh,tsetupf,tsetupc,tsolve
160c*
161c*    *** decode the iparm array ***
162      nlev   = iparm(6)
163      nu1    = iparm(7)
164      nu2    = iparm(8)
165      mgkey  = iparm(9)
166      itmax  = iparm(10)
167      istop  = iparm(11)
168      iinfo  = iparm(12)
169      ipkey  = iparm(14)
170      mode   = iparm(16)
171      mgprol = iparm(17)
172      mgcoar = iparm(18)
173      mgdisc = iparm(19)
174      mgsolv = iparm(21)
175      errtol = rparm(1)
176      omegal = rparm(9)
177      omegan = rparm(10)
178c*
179c*    *** intitialize the iteration timer ***
180      call prtstp(0,-99,0.0d0,0.0d0,0.0d0)
181c*
182c*    *** build the multigrid data structure in iz ***
183      call buildstr (nx,ny,nz,nlev,iz)
184c*
185c*    *** start timer ***
186      call tstart(bf,oh)
187c*
188c*    *** build op and rhs on fine grid ***
189      ido = 0
190      call buildops (nx,ny,nz,nlev,ipkey,iinfo,ido,iz,
191     2   mgprol,mgcoar,mgsolv,mgdisc,
192     3   ipc,rpc,pc_dumm,ac,cc,fc,
193     4   xf,yf,zf,gxcf,gycf,gzcf,a1cf,a2cf,a3cf,ccf,fcf,tcf)
194c*
195c*    *** stop timer ***
196      call tstop(bf,oh,tsetupf)
197      print*,'% NRICHDRIV2: fine problem setup time: ',tsetupf
198      tsetupc = 0.0d0
199c*
200c* ******************************************************************
201c* *** this overwrites the rhs array provided by pde specification
202c* ****** compute an algebraically produced rhs for the given tcf ***
203      if ((istop .eq. 4) .or. (istop .eq. 5)) then
204         if ((mode .eq. 1) .or. (mode .eq. 2)) then
205            call nmatvec(nx,ny,nz,ipc,rpc,ac,cc,tcf,fc,w0)
206         else
207            call matvec(nx,ny,nz,ipc,rpc,ac,cc,tcf,fc)
208         endif
209      endif
210c* ******************************************************************
211c*
212c*    *** determine machine epsilon ***
213      epsiln = epsmac(0)
214c*
215c*    *** impose zero dirichlet boundary conditions (now in source fcn) ***
216      call fbound00(nx,ny,nz,u)
217c*
218c*    *** MATLAB ***
219      print*,' rich = [ '
220c*
221c*    *** start timer ***
222      call tstart(bf,oh)
223c*
224c*    *** call specified multigrid method ***
225      if ((mode .eq. 0) .or. (mode .eq. 2)) then
226         print*,'% NRICHDRIV2: linear mode...'
227         iok  = 1
228         ilev = 1
229         call richgo(nx,ny,nz,u,w0,a1cf,a2cf,
230     2      istop,itmax,iters,ierror,
231     3      iok,iinfo,epsiln,errtol,omegal,
232     4      ipc,rpc,ac,cc,fc,tcf)
233      endif
234      if ((mode .eq. 1) .or. (mode .eq. 2)) then
235         print*,'% NRICHDRIV2: nonlinear mode...'
236         iok  = 1
237         ilev = 1
238         call nrichgo(nx,ny,nz,u,w0,a1cf,a2cf,
239     2      istop,itmax,iters,ierror,
240     3      iok,iinfo,epsiln,errtol,omegan,
241     4      ipc,rpc,ac,cc,fc,tcf)
242      endif
243c*
244c*    *** stop timer ***
245      call tstop(bf,oh,tsolve)
246      print*,'% NRICHDRIV2: solve time: ',tsolve
247c*
248c*    *** MATLAB ***
249      write(*,100) 'rich_sf',tsetupf,'rich_sc',tsetupc,
250     2   'rich_st',(tsetupf+tsetupc),'rich_so',tsolve
251 100  format(' ];',4(' ',a7,'=',1pe9.3,';'))
252c*
253c*    *** restore boundary conditions ***
254      ibound = 1
255      call fbound(ibound,nx,ny,nz,u,gxcf,gycf,gzcf)
256c*
257c*    *** return and end ***
258      return
259      end
260      subroutine nrichgo(nx,ny,nz,x,r,w1,w2,
261     2   istop,itmax,iters,ierror,
262     3   iok,iinfo,epsiln,errtol,omega,
263     4   ipc,rpc,ac,cc,fc,tru)
264c* *********************************************************************
265c* purpose:
266c*
267c*    nonlinear richardson's iteration.
268c*
269c* author:  michael holst
270c* *********************************************************************
271      implicit         none
272c*
273c*    *** other declarations ***
274      integer          ipc(*),iok,iinfo
275      integer          itmax,iters,ierror
276      integer          iresid,iadjoint,istop,itmax_s,iters_s
277      integer          nx,ny,nz
278      double precision omega,errtol,epsiln,errtol_s,omega_s
279      double precision rsden,rsnrm,orsnrm,xnrm1,xnrm2,xdot
280      double precision x(*),r(*),w1(*),w2(*)
281      double precision rpc(*),ac(*),cc(*),fc(*),tru(*)
282c*
283c*    *** do some i/o if requested ***
284      if (iinfo.ne.0) then
285         write(6,100)'% NRICHGO: starting:',nx,ny,nz
286 100     format(a,(2x,' [',i3,',',i3,',',i3,'] '))
287      endif
288c*
289c*    *** initial wall clock ***
290      call prtini(istop)
291      call prtstp(iok,-1,0.0d0,0.0d0,0.0d0)
292c*
293c*    **************************************************************
294c*    *** note: if (iok.ne.0) then:  use a stopping test.        ***
295c*    ***       else:  use just the itmax to stop iteration.     ***
296c*    **************************************************************
297c*    *** istop=0 most efficient (whatever it is)                ***
298c*    *** istop=1 relative residual                              ***
299c*    *** istop=2 rms difference of successive iterates          ***
300c*    *** istop=3 relative true error (provided for testing)     ***
301c*    **************************************************************
302c*
303c*    *** compute denominator for stopping criterion ***
304      if (istop .eq. 0) then
305         rsden = 1.0d0
306      elseif (istop .eq. 1) then
307c*       *** compute initial residual with zero initial guess ***
308c*       *** this is analogous to the linear case where one can ***
309c*       *** simply take norm of rhs for a zero initial guess ***
310         call azeros(nx,ny,nz,w1)
311         call nmresid(nx,ny,nz,ipc,rpc,ac,cc,fc,w1,r,w2)
312         rsden = xnrm1(nx,ny,nz,r)
313      elseif (istop .eq. 2) then
314         rsden = dsqrt(dble(nx*ny*nz))
315      elseif (istop .eq. 3) then
316         rsden = xnrm2(nx,ny,nz,tru)
317      elseif (istop .eq. 4) then
318         rsden = xnrm2(nx,ny,nz,tru)
319      elseif (istop .eq. 5) then
320         call nmatvec(nx,ny,nz,ipc,rpc,ac,cc,tru,w1,w2)
321         rsden = dsqrt(xdot(nx,ny,nz,tru,w1))
322      else
323         print*,'% NRICHGO: bad istop value... '
324      endif
325      if (rsden.eq.0.0d0) then
326         rsden = 1.0d0
327         print*,'% NRICHGO: rhs is zero '
328      endif
329      rsnrm = rsden
330      orsnrm = rsnrm
331      call prtstp (iok,0,rsnrm,rsden,orsnrm)
332c*
333c*    *** setup for the looping ***
334      iters  = 0
335 30   continue
336         iters  = iters  + 10
337c*
338c*       *** save iterate if stop test will use it on next iter ***
339         if (istop .eq. 2) call xcopy(nx,ny,nz,x,tru)
340c*
341c*       *** do 10 iterations ***
342         iresid = 1
343         iadjoint = 0
344         errtol_s = 0.0d0
345         itmax_s  = 10
346         omega_s  = omega
347         call nrich(nx,ny,nz,ipc,rpc,ac,cc,fc,x,w1,w2,r,
348     2      itmax_s,iters_s,errtol_s,omega_s,iresid,iadjoint)
349c*
350c*       *** compute/check the current stopping test ***
351         orsnrm = rsnrm
352         if (istop .eq. 0) then
353            rsnrm = xnrm1(nx,ny,nz,r)
354         elseif (istop .eq. 1) then
355            rsnrm = xnrm1(nx,ny,nz,r)
356         elseif (istop .eq. 2) then
357            call xcopy(nx,ny,nz,tru,w1)
358            call xaxpy(nx,ny,nz,(-1.0d0),x,w1)
359            rsnrm = xnrm1(nx,ny,nz,w1)
360         elseif (istop .eq. 3) then
361            call xcopy(nx,ny,nz,tru,w1)
362            call xaxpy(nx,ny,nz,(-1.0d0),x,w1)
363            rsnrm = xnrm2(nx,ny,nz,w1)
364         elseif (istop .eq. 4) then
365            call xcopy(nx,ny,nz,tru,w1)
366            call xaxpy(nx,ny,nz,(-1.0d0),x,w1)
367            rsnrm = xnrm2(nx,ny,nz,w1)
368         elseif (istop .eq. 5) then
369            call xcopy(nx,ny,nz,tru,w1)
370            call xaxpy(nx,ny,nz,(-1.0d0),x,w1)
371            call nmatvec(nx,ny,nz,ipc,rpc,ac,cc,w1,w2,r)
372            rsnrm = dsqrt(xdot(nx,ny,nz,w1,w2))
373         else
374            print*,'% NRICHGO: bad istop value... '
375         endif
376         call prtstp (iok,iters,rsnrm,rsden,orsnrm)
377         if ((rsnrm/rsden) .le. errtol) goto 99
378         if (iters .ge. itmax) goto 99
379      goto 30
380c*
381c*    *** return and end ***
382 99   continue
383      return
384      end
385      subroutine richgo(nx,ny,nz,x,r,w1,w2,
386     2   istop,itmax,iters,ierror,
387     3   iok,iinfo,epsiln,errtol,omega,
388     4   ipc,rpc,ac,cc,fc,tru)
389c* *********************************************************************
390c* purpose:
391c*
392c*    linear richardson's iteration.
393c*
394c* author:  michael holst
395c* *********************************************************************
396      implicit         none
397c*
398c*    *** other declarations ***
399      integer          ipc(*),iok,iinfo
400      integer          itmax,iters,ierror
401      integer          iresid,iadjoint,istop,itmax_s,iters_s
402      integer          nx,ny,nz
403      double precision omega,errtol,epsiln,errtol_s,omega_s
404      double precision rsden,rsnrm,orsnrm,xnrm1,xnrm2,xdot
405      double precision x(*),r(*),w1(*),w2(*)
406      double precision rpc(*),ac(*),cc(*),fc(*),tru(*)
407c*
408c*    *** do some i/o if requested ***
409      if (iinfo.ne.0) then
410         write(6,100)'% RICHGO: starting: ',nx,ny,nz
411 100     format(a,(2x,' [',i3,',',i3,',',i3,'] '))
412      endif
413c*
414c*    *** initial wall clock ***
415      call prtini(istop)
416      call prtstp(iok,-1,0.0d0,0.0d0,0.0d0)
417c*
418c*    **************************************************************
419c*    *** note: if (iok.ne.0) then:  use a stopping test.        ***
420c*    ***       else:  use just the itmax to stop iteration.     ***
421c*    **************************************************************
422c*    *** istop=0 most efficient (whatever it is)                ***
423c*    *** istop=1 relative residual                              ***
424c*    *** istop=2 rms difference of successive iterates          ***
425c*    *** istop=3 relative true error (provided for testing)     ***
426c*    **************************************************************
427c*
428c*    *** compute denominator for stopping criterion ***
429      if (istop .eq. 0) then
430         rsden = 1.0d0
431      elseif (istop .eq. 1) then
432         rsden = xnrm1(nx,ny,nz,fc)
433      elseif (istop .eq. 2) then
434         rsden = dsqrt(dble(nx*ny*nz))
435      elseif (istop .eq. 3) then
436         rsden = xnrm2(nx,ny,nz,tru)
437      elseif (istop .eq. 4) then
438         rsden = xnrm2(nx,ny,nz,tru)
439      elseif (istop .eq. 5) then
440         call matvec(nx,ny,nz,ipc,rpc,ac,cc,tru,w1)
441         rsden = dsqrt(xdot(nx,ny,nz,tru,w1))
442      else
443         print*,'% RICHGO: bad istop value... '
444      endif
445      if (rsden.eq.0.0d0) then
446         rsden = 1.0d0
447         print*,'% RICHGO: rhs is zero '
448      endif
449      rsnrm = rsden
450      orsnrm = rsnrm
451      call prtstp (iok,0,rsnrm,rsden,orsnrm)
452c*
453c*    *** setup for the looping ***
454      iters  = 0
455 30   continue
456         iters  = iters  + 10
457c*
458c*       *** save iterate if stop test will use it on next iter ***
459         if (istop .eq. 2) call xcopy(nx,ny,nz,x,tru)
460c*
461c*       *** do 10 iterations ***
462         iresid = 1
463         iadjoint = 0
464         errtol_s = 0.0d0
465         itmax_s  = 10
466         omega_s  = omega
467         call rich(nx,ny,nz,ipc,rpc,ac,cc,fc,x,w1,w2,r,
468     2      itmax_s,iters_s,errtol_s,omega_s,iresid,iadjoint)
469c*
470c*       *** compute/check the current stopping test ***
471         orsnrm = rsnrm
472         if (istop .eq. 0) then
473            rsnrm = xnrm1(nx,ny,nz,r)
474         elseif (istop .eq. 1) then
475            rsnrm = xnrm1(nx,ny,nz,r)
476         elseif (istop .eq. 2) then
477            call xcopy(nx,ny,nz,tru,w1)
478            call xaxpy(nx,ny,nz,(-1.0d0),x,w1)
479            rsnrm = xnrm1(nx,ny,nz,w1)
480         elseif (istop .eq. 3) then
481            call xcopy(nx,ny,nz,tru,w1)
482            call xaxpy(nx,ny,nz,(-1.0d0),x,w1)
483            rsnrm = xnrm2(nx,ny,nz,w1)
484         elseif (istop .eq. 4) then
485            call xcopy(nx,ny,nz,tru,w1)
486            call xaxpy(nx,ny,nz,(-1.0d0),x,w1)
487            rsnrm = xnrm2(nx,ny,nz,w1)
488         elseif (istop .eq. 5) then
489            call xcopy(nx,ny,nz,tru,w1)
490            call xaxpy(nx,ny,nz,(-1.0d0),x,w1)
491            call matvec(nx,ny,nz,ipc,rpc,ac,cc,w1,w2)
492            rsnrm = dsqrt(xdot(nx,ny,nz,w1,w2))
493         else
494            print*,'% RICHGO: bad istop value... '
495         endif
496         call prtstp (iok,iters,rsnrm,rsden,orsnrm)
497         if ((rsnrm/rsden) .le. errtol) goto 99
498         if (iters .ge. itmax) goto 99
499      goto 30
500c*
501c*    *** return and end ***
502 99   continue
503      return
504      end
505