1c* /////////////////////////////////////////////////////////////////////////// 2c* @file nrchdrvd.f 3c* @author Michael Holst 4c* @brief Driver for nonlinear Richardson iteration. 5c* @version $Id: nrchdrvd.f,v 1.2 2010/08/12 05:45:37 fetk Exp $ 6c* @attention 7c* @verbatim 8c* 9c* PMG -- Parallel algebraic MultiGrid 10c* Copyright (C) 1994-- Michael Holst. 11c* 12c* Michael Holst <mholst@math.ucsd.edu> 13c* University of California, San Diego 14c* Department of Mathematics, 5739 AP&M 15c* 9500 Gilman Drive, Dept. 0112 16c* La Jolla, CA 92093-0112 USA 17c* http://math.ucsd.edu/~mholst 18c* 19c* This file is part of PMG. 20c* 21c* PMG is free software; you can redistribute it and/or modify 22c* it under the terms of the GNU General Public License as published by 23c* the Free Software Foundation; either version 2 of the License, or 24c* (at your option) any later version. 25c* 26c* PMG is distributed in the hope that it will be useful, 27c* but WITHOUT ANY WARRANTY; without even the implied warranty of 28c* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 29c* GNU General Public License for more details. 30c* 31c* You should have received a copy of the GNU General Public License 32c* along with PMG; if not, write to the Free Software 33c* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 34c* 35c* Linking PMG statically or dynamically with other modules is making a 36c* combined work based on PMG. Thus, the terms and conditions of the GNU 37c* General Public License cover the whole combination. 38c* 39c* SPECIAL GPL EXCEPTION 40c* In addition, as a special exception, the copyright holders of PMG 41c* give you permission to combine the PMG program with free software 42c* programs and libraries that are released under the GNU LGPL or with 43c* code included in releases of ISIM, PMV, PyMOL, SMOL, VMD, and Vision. 44c* Such combined software may be linked with PMG and redistributed together 45c* in original or modified form as mere aggregation without requirement that 46c* the entire work be under the scope of the GNU General Public License. 47c* This special exception permission is also extended to any software listed 48c* in the SPECIAL GPL EXCEPTION clauses by the FEtk and APBS libraries. 49c* 50c* Note that people who make modified versions of PMG are not obligated 51c* to grant this special exception for their modified versions; it is 52c* their choice whether to do so. The GNU General Public License gives 53c* permission to release a modified version without this exception; this 54c* exception also makes it possible to release a modified version which 55c* carries forward this exception. 56c* 57c* @endverbatim 58c* /////////////////////////////////////////////////////////////////////////// 59 60 subroutine nrichdriv(iparm,rparm,iwork,rwork,u, 61 2 xf,yf,zf,gxcf,gycf,gzcf,a1cf,a2cf,a3cf,ccf,fcf,tcf) 62c* ********************************************************************* 63c* purpose: 64c* 65c* driver for linear/nonlinear richardson's iteration. 66c* 67c* author: michael holst 68c* ********************************************************************* 69 implicit none 70c* 71c* *** input parameters *** 72 integer iparm(*),iwork(*) 73 double precision rparm(*),rwork(*),u(*) 74 double precision xf(*),yf(*),zf(*),gxcf(*),gycf(*),gzcf(*) 75 double precision a1cf(*),a2cf(*),a3cf(*),ccf(*),fcf(*),tcf(*) 76c* 77c* *** variables returned from mgsz *** 78 integer iretot,iintot 79c* 80c* *** misc variables *** 81 integer nrwk,niwk,nx,ny,nz,nlev,ierror,n 82 integer k_iz,k_w0 83 integer k_ipc,k_rpc,k_ac,k_cc,k_fc 84 integer n_iz,n_ipc,n_rpc 85c* 86c* *** decode some parameters *** 87 nrwk = iparm(1) 88 niwk = iparm(2) 89 nx = iparm(3) 90 ny = iparm(4) 91 nz = iparm(5) 92 nlev = iparm(6) 93 n = nx * ny * nz 94 n_iz = 10*(nlev+1) 95 n_ipc = 100*(nlev+1) 96 n_rpc = 100*(nlev+1) 97c* 98c* *** compute required work array sizes *** 99 iintot = n_iz + n_ipc 100 iretot = n_rpc + (3*n) + (4*n) 101c* 102c* *** some more checks on input *** 103 if ((nrwk.lt.iretot) .or. (niwk.lt.iintot)) then 104 print*,'% NRICHDRIV: real work space must be: ',iretot 105 print*,'% NRICHDRIV: integer work space must be: ',iintot 106 ierror = -3 107 iparm(51) = ierror 108 return 109 endif 110c* 111c* *** iwork offsets *** 112 k_iz = 1 113 k_ipc = k_iz + n_iz 114c* 115c* *** rwork offsets *** 116 k_rpc = 1 117 k_cc = k_rpc + n_rpc 118 k_fc = k_cc + n 119 k_w0 = k_fc + n 120 k_ac = k_w0 + n 121c* ***k_ac_after = k_ac + 4*n 122c* 123c* *** call the multigrid driver *** 124 call nrichdriv2(iparm,rparm,nx,ny,nz,u,iwork(k_iz), 125 2 rwork(k_w0), 126 3 iwork(k_ipc),rwork(k_rpc), 127 4 rwork(k_ac),rwork(k_cc),rwork(k_fc), 128 5 xf,yf,zf,gxcf,gycf,gzcf,a1cf,a2cf,a3cf,ccf,fcf,tcf) 129c* 130c* *** return and end *** 131 return 132 end 133 subroutine nrichdriv2(iparm,rparm,nx,ny,nz,u,iz, 134 2 w0, 135 3 ipc,rpc,ac,cc,fc, 136 4 xf,yf,zf,gxcf,gycf,gzcf,a1cf,a2cf,a3cf,ccf,fcf,tcf) 137c* ********************************************************************* 138c* purpose: 139c* 140c* driver for linear/nonlinear richardson's iteration. 141c* 142c* author: michael holst 143c* ********************************************************************* 144 implicit none 145c* 146c* *** input parameters *** 147 integer iparm(*),ipc(*),iz(*) 148 double precision rparm(*),rpc(*) 149 double precision u(*),w0(*),ac(*),cc(*),fc(*) 150 double precision xf(*),yf(*),zf(*),gxcf(*),gycf(*),gzcf(*) 151 double precision a1cf(*),a2cf(*),a3cf(*),ccf(*),fcf(*),tcf(*) 152c* 153c* *** misc variables *** 154 integer mgkey,nlev,itmax,iok,iinfo,istop,ipkey,nu1,nu2 155 integer nx,ny,nz,ilev,ido,iters,ierror,ibound,mode 156 integer mgprol,mgcoar,mgsolv,mgdisc 157 double precision epsiln,epsmac,errtol,omegal,omegan 158 double precision pc_dumm 159 double precision bf,oh,tsetupf,tsetupc,tsolve 160c* 161c* *** decode the iparm array *** 162 nlev = iparm(6) 163 nu1 = iparm(7) 164 nu2 = iparm(8) 165 mgkey = iparm(9) 166 itmax = iparm(10) 167 istop = iparm(11) 168 iinfo = iparm(12) 169 ipkey = iparm(14) 170 mode = iparm(16) 171 mgprol = iparm(17) 172 mgcoar = iparm(18) 173 mgdisc = iparm(19) 174 mgsolv = iparm(21) 175 errtol = rparm(1) 176 omegal = rparm(9) 177 omegan = rparm(10) 178c* 179c* *** intitialize the iteration timer *** 180 call prtstp(0,-99,0.0d0,0.0d0,0.0d0) 181c* 182c* *** build the multigrid data structure in iz *** 183 call buildstr (nx,ny,nz,nlev,iz) 184c* 185c* *** start timer *** 186 call tstart(bf,oh) 187c* 188c* *** build op and rhs on fine grid *** 189 ido = 0 190 call buildops (nx,ny,nz,nlev,ipkey,iinfo,ido,iz, 191 2 mgprol,mgcoar,mgsolv,mgdisc, 192 3 ipc,rpc,pc_dumm,ac,cc,fc, 193 4 xf,yf,zf,gxcf,gycf,gzcf,a1cf,a2cf,a3cf,ccf,fcf,tcf) 194c* 195c* *** stop timer *** 196 call tstop(bf,oh,tsetupf) 197 print*,'% NRICHDRIV2: fine problem setup time: ',tsetupf 198 tsetupc = 0.0d0 199c* 200c* ****************************************************************** 201c* *** this overwrites the rhs array provided by pde specification 202c* ****** compute an algebraically produced rhs for the given tcf *** 203 if ((istop .eq. 4) .or. (istop .eq. 5)) then 204 if ((mode .eq. 1) .or. (mode .eq. 2)) then 205 call nmatvec(nx,ny,nz,ipc,rpc,ac,cc,tcf,fc,w0) 206 else 207 call matvec(nx,ny,nz,ipc,rpc,ac,cc,tcf,fc) 208 endif 209 endif 210c* ****************************************************************** 211c* 212c* *** determine machine epsilon *** 213 epsiln = epsmac(0) 214c* 215c* *** impose zero dirichlet boundary conditions (now in source fcn) *** 216 call fbound00(nx,ny,nz,u) 217c* 218c* *** MATLAB *** 219 print*,' rich = [ ' 220c* 221c* *** start timer *** 222 call tstart(bf,oh) 223c* 224c* *** call specified multigrid method *** 225 if ((mode .eq. 0) .or. (mode .eq. 2)) then 226 print*,'% NRICHDRIV2: linear mode...' 227 iok = 1 228 ilev = 1 229 call richgo(nx,ny,nz,u,w0,a1cf,a2cf, 230 2 istop,itmax,iters,ierror, 231 3 iok,iinfo,epsiln,errtol,omegal, 232 4 ipc,rpc,ac,cc,fc,tcf) 233 endif 234 if ((mode .eq. 1) .or. (mode .eq. 2)) then 235 print*,'% NRICHDRIV2: nonlinear mode...' 236 iok = 1 237 ilev = 1 238 call nrichgo(nx,ny,nz,u,w0,a1cf,a2cf, 239 2 istop,itmax,iters,ierror, 240 3 iok,iinfo,epsiln,errtol,omegan, 241 4 ipc,rpc,ac,cc,fc,tcf) 242 endif 243c* 244c* *** stop timer *** 245 call tstop(bf,oh,tsolve) 246 print*,'% NRICHDRIV2: solve time: ',tsolve 247c* 248c* *** MATLAB *** 249 write(*,100) 'rich_sf',tsetupf,'rich_sc',tsetupc, 250 2 'rich_st',(tsetupf+tsetupc),'rich_so',tsolve 251 100 format(' ];',4(' ',a7,'=',1pe9.3,';')) 252c* 253c* *** restore boundary conditions *** 254 ibound = 1 255 call fbound(ibound,nx,ny,nz,u,gxcf,gycf,gzcf) 256c* 257c* *** return and end *** 258 return 259 end 260 subroutine nrichgo(nx,ny,nz,x,r,w1,w2, 261 2 istop,itmax,iters,ierror, 262 3 iok,iinfo,epsiln,errtol,omega, 263 4 ipc,rpc,ac,cc,fc,tru) 264c* ********************************************************************* 265c* purpose: 266c* 267c* nonlinear richardson's iteration. 268c* 269c* author: michael holst 270c* ********************************************************************* 271 implicit none 272c* 273c* *** other declarations *** 274 integer ipc(*),iok,iinfo 275 integer itmax,iters,ierror 276 integer iresid,iadjoint,istop,itmax_s,iters_s 277 integer nx,ny,nz 278 double precision omega,errtol,epsiln,errtol_s,omega_s 279 double precision rsden,rsnrm,orsnrm,xnrm1,xnrm2,xdot 280 double precision x(*),r(*),w1(*),w2(*) 281 double precision rpc(*),ac(*),cc(*),fc(*),tru(*) 282c* 283c* *** do some i/o if requested *** 284 if (iinfo.ne.0) then 285 write(6,100)'% NRICHGO: starting:',nx,ny,nz 286 100 format(a,(2x,' [',i3,',',i3,',',i3,'] ')) 287 endif 288c* 289c* *** initial wall clock *** 290 call prtini(istop) 291 call prtstp(iok,-1,0.0d0,0.0d0,0.0d0) 292c* 293c* ************************************************************** 294c* *** note: if (iok.ne.0) then: use a stopping test. *** 295c* *** else: use just the itmax to stop iteration. *** 296c* ************************************************************** 297c* *** istop=0 most efficient (whatever it is) *** 298c* *** istop=1 relative residual *** 299c* *** istop=2 rms difference of successive iterates *** 300c* *** istop=3 relative true error (provided for testing) *** 301c* ************************************************************** 302c* 303c* *** compute denominator for stopping criterion *** 304 if (istop .eq. 0) then 305 rsden = 1.0d0 306 elseif (istop .eq. 1) then 307c* *** compute initial residual with zero initial guess *** 308c* *** this is analogous to the linear case where one can *** 309c* *** simply take norm of rhs for a zero initial guess *** 310 call azeros(nx,ny,nz,w1) 311 call nmresid(nx,ny,nz,ipc,rpc,ac,cc,fc,w1,r,w2) 312 rsden = xnrm1(nx,ny,nz,r) 313 elseif (istop .eq. 2) then 314 rsden = dsqrt(dble(nx*ny*nz)) 315 elseif (istop .eq. 3) then 316 rsden = xnrm2(nx,ny,nz,tru) 317 elseif (istop .eq. 4) then 318 rsden = xnrm2(nx,ny,nz,tru) 319 elseif (istop .eq. 5) then 320 call nmatvec(nx,ny,nz,ipc,rpc,ac,cc,tru,w1,w2) 321 rsden = dsqrt(xdot(nx,ny,nz,tru,w1)) 322 else 323 print*,'% NRICHGO: bad istop value... ' 324 endif 325 if (rsden.eq.0.0d0) then 326 rsden = 1.0d0 327 print*,'% NRICHGO: rhs is zero ' 328 endif 329 rsnrm = rsden 330 orsnrm = rsnrm 331 call prtstp (iok,0,rsnrm,rsden,orsnrm) 332c* 333c* *** setup for the looping *** 334 iters = 0 335 30 continue 336 iters = iters + 10 337c* 338c* *** save iterate if stop test will use it on next iter *** 339 if (istop .eq. 2) call xcopy(nx,ny,nz,x,tru) 340c* 341c* *** do 10 iterations *** 342 iresid = 1 343 iadjoint = 0 344 errtol_s = 0.0d0 345 itmax_s = 10 346 omega_s = omega 347 call nrich(nx,ny,nz,ipc,rpc,ac,cc,fc,x,w1,w2,r, 348 2 itmax_s,iters_s,errtol_s,omega_s,iresid,iadjoint) 349c* 350c* *** compute/check the current stopping test *** 351 orsnrm = rsnrm 352 if (istop .eq. 0) then 353 rsnrm = xnrm1(nx,ny,nz,r) 354 elseif (istop .eq. 1) then 355 rsnrm = xnrm1(nx,ny,nz,r) 356 elseif (istop .eq. 2) then 357 call xcopy(nx,ny,nz,tru,w1) 358 call xaxpy(nx,ny,nz,(-1.0d0),x,w1) 359 rsnrm = xnrm1(nx,ny,nz,w1) 360 elseif (istop .eq. 3) then 361 call xcopy(nx,ny,nz,tru,w1) 362 call xaxpy(nx,ny,nz,(-1.0d0),x,w1) 363 rsnrm = xnrm2(nx,ny,nz,w1) 364 elseif (istop .eq. 4) then 365 call xcopy(nx,ny,nz,tru,w1) 366 call xaxpy(nx,ny,nz,(-1.0d0),x,w1) 367 rsnrm = xnrm2(nx,ny,nz,w1) 368 elseif (istop .eq. 5) then 369 call xcopy(nx,ny,nz,tru,w1) 370 call xaxpy(nx,ny,nz,(-1.0d0),x,w1) 371 call nmatvec(nx,ny,nz,ipc,rpc,ac,cc,w1,w2,r) 372 rsnrm = dsqrt(xdot(nx,ny,nz,w1,w2)) 373 else 374 print*,'% NRICHGO: bad istop value... ' 375 endif 376 call prtstp (iok,iters,rsnrm,rsden,orsnrm) 377 if ((rsnrm/rsden) .le. errtol) goto 99 378 if (iters .ge. itmax) goto 99 379 goto 30 380c* 381c* *** return and end *** 382 99 continue 383 return 384 end 385 subroutine richgo(nx,ny,nz,x,r,w1,w2, 386 2 istop,itmax,iters,ierror, 387 3 iok,iinfo,epsiln,errtol,omega, 388 4 ipc,rpc,ac,cc,fc,tru) 389c* ********************************************************************* 390c* purpose: 391c* 392c* linear richardson's iteration. 393c* 394c* author: michael holst 395c* ********************************************************************* 396 implicit none 397c* 398c* *** other declarations *** 399 integer ipc(*),iok,iinfo 400 integer itmax,iters,ierror 401 integer iresid,iadjoint,istop,itmax_s,iters_s 402 integer nx,ny,nz 403 double precision omega,errtol,epsiln,errtol_s,omega_s 404 double precision rsden,rsnrm,orsnrm,xnrm1,xnrm2,xdot 405 double precision x(*),r(*),w1(*),w2(*) 406 double precision rpc(*),ac(*),cc(*),fc(*),tru(*) 407c* 408c* *** do some i/o if requested *** 409 if (iinfo.ne.0) then 410 write(6,100)'% RICHGO: starting: ',nx,ny,nz 411 100 format(a,(2x,' [',i3,',',i3,',',i3,'] ')) 412 endif 413c* 414c* *** initial wall clock *** 415 call prtini(istop) 416 call prtstp(iok,-1,0.0d0,0.0d0,0.0d0) 417c* 418c* ************************************************************** 419c* *** note: if (iok.ne.0) then: use a stopping test. *** 420c* *** else: use just the itmax to stop iteration. *** 421c* ************************************************************** 422c* *** istop=0 most efficient (whatever it is) *** 423c* *** istop=1 relative residual *** 424c* *** istop=2 rms difference of successive iterates *** 425c* *** istop=3 relative true error (provided for testing) *** 426c* ************************************************************** 427c* 428c* *** compute denominator for stopping criterion *** 429 if (istop .eq. 0) then 430 rsden = 1.0d0 431 elseif (istop .eq. 1) then 432 rsden = xnrm1(nx,ny,nz,fc) 433 elseif (istop .eq. 2) then 434 rsden = dsqrt(dble(nx*ny*nz)) 435 elseif (istop .eq. 3) then 436 rsden = xnrm2(nx,ny,nz,tru) 437 elseif (istop .eq. 4) then 438 rsden = xnrm2(nx,ny,nz,tru) 439 elseif (istop .eq. 5) then 440 call matvec(nx,ny,nz,ipc,rpc,ac,cc,tru,w1) 441 rsden = dsqrt(xdot(nx,ny,nz,tru,w1)) 442 else 443 print*,'% RICHGO: bad istop value... ' 444 endif 445 if (rsden.eq.0.0d0) then 446 rsden = 1.0d0 447 print*,'% RICHGO: rhs is zero ' 448 endif 449 rsnrm = rsden 450 orsnrm = rsnrm 451 call prtstp (iok,0,rsnrm,rsden,orsnrm) 452c* 453c* *** setup for the looping *** 454 iters = 0 455 30 continue 456 iters = iters + 10 457c* 458c* *** save iterate if stop test will use it on next iter *** 459 if (istop .eq. 2) call xcopy(nx,ny,nz,x,tru) 460c* 461c* *** do 10 iterations *** 462 iresid = 1 463 iadjoint = 0 464 errtol_s = 0.0d0 465 itmax_s = 10 466 omega_s = omega 467 call rich(nx,ny,nz,ipc,rpc,ac,cc,fc,x,w1,w2,r, 468 2 itmax_s,iters_s,errtol_s,omega_s,iresid,iadjoint) 469c* 470c* *** compute/check the current stopping test *** 471 orsnrm = rsnrm 472 if (istop .eq. 0) then 473 rsnrm = xnrm1(nx,ny,nz,r) 474 elseif (istop .eq. 1) then 475 rsnrm = xnrm1(nx,ny,nz,r) 476 elseif (istop .eq. 2) then 477 call xcopy(nx,ny,nz,tru,w1) 478 call xaxpy(nx,ny,nz,(-1.0d0),x,w1) 479 rsnrm = xnrm1(nx,ny,nz,w1) 480 elseif (istop .eq. 3) then 481 call xcopy(nx,ny,nz,tru,w1) 482 call xaxpy(nx,ny,nz,(-1.0d0),x,w1) 483 rsnrm = xnrm2(nx,ny,nz,w1) 484 elseif (istop .eq. 4) then 485 call xcopy(nx,ny,nz,tru,w1) 486 call xaxpy(nx,ny,nz,(-1.0d0),x,w1) 487 rsnrm = xnrm2(nx,ny,nz,w1) 488 elseif (istop .eq. 5) then 489 call xcopy(nx,ny,nz,tru,w1) 490 call xaxpy(nx,ny,nz,(-1.0d0),x,w1) 491 call matvec(nx,ny,nz,ipc,rpc,ac,cc,w1,w2) 492 rsnrm = dsqrt(xdot(nx,ny,nz,w1,w2)) 493 else 494 print*,'% RICHGO: bad istop value... ' 495 endif 496 call prtstp (iok,iters,rsnrm,rsden,orsnrm) 497 if ((rsnrm/rsden) .le. errtol) goto 99 498 if (iters .ge. itmax) goto 99 499 goto 30 500c* 501c* *** return and end *** 502 99 continue 503 return 504 end 505