1from array import * 2from math import sqrt 3 4def zerovector(n): 5 a = array('d',range(n)) 6 for i in range(n): 7 a[i] = 0.0 8 return a 9 10def zeromatrix(n,m): 11 a = list(range(n)) 12 for i in range(n): 13 a[i] = zerovector(m) 14 return a 15 16def copyvector(x): 17 a = array('d',range(len(x))) 18 for i in range(len(x)): 19 a[i] = x[i] 20 return a 21 22def copymatrix(x): 23 n = len(x) 24 m = len(x[0]) 25 a = zeromatrix(n,m) 26 for i in range(n): 27 for j in range(m): 28 a[i][j] = x[i][j] 29 return a 30 31def transpose(x): 32 n = len(x) 33 m = len(x[0]) 34 a = zeromatrix(m,n) 35 for i in range(n): 36 for j in range(m): 37 a[j][i] = x[i][j] 38 return a 39 40def dot(a,b): 41 sum = 0.0 42 for i in range(len(a)): 43 sum = sum + a[i]*b[i] 44 return sum 45 46def mxm(a,b): 47 n = len(a) 48 k = len(a[0]) 49 kk= len(b) 50 m = len(b[0]) 51 if kk != k: 52 raise "matrices do not conform for multiplication" 53 c = zeromatrix(n,m) 54 for i in range(n): 55 for j in range(m): 56 sum = 0.0 57 for l in range(k): 58 sum = sum + a[i][l]*b[l][j] 59 c[i][j] = sum 60 return c 61 62def mxv(a,b): 63 n = len(a) 64 k = len(a[0]) 65 kk = len(b) 66 if k != kk: 67 raise "matrix and vector do not conform for multiplication" 68 c = zerovector(n) 69 for i in range(n): 70 c[i] = dot(a[i],b) 71 return c 72 73def printvector(a): 74 n = len(a) 75 for i in range(n): 76 print ("%12.5e "%a[i]), 77 print(" ") 78 79def printmatrix(a): 80 n = len(a) 81 for i in range(n): 82 printvector(a[i]) 83 84def numderiv(func,x,step,eps): 85 ''' 86 Use central differences to compute the gradient and diagonal 87 elements of the Hessian. 88 func(x) = function to be differentiated 89 x[] = (array) point at which to differentiate 90 step[] = (array) remembers finite difference step between 91 . successive calls. Set to zero on first call 92 . or set close to appropriate value 93 eps = expected precision in func 94 95 Some care is taken to adjust the step so that the gradient and 96 Hessian diagonal are estimated with about 4 digits of precision 97 but some noise is unavaoidable due either to the noise in the 98 function or cubic/higher terms in the Taylor expansion. 99 ''' 100 101 n = len(x) 102 g = zerovector(n) 103 h = zerovector(n) 104 f0 = func(x) 105 for i in range(n): 106 if step[i] == 0.0: 107 step[i] = max(abs(x[i])*0.01,0.0001) 108 xi = x[i] 109 while 1: 110 x[i] = xi + step[i] 111 f1 = func(x) 112 if abs(f1-f0) < (1e4*eps): 113 #print ' Increasing step ',i,step[i],abs(f1-f0) 114 step[i] = step[i]*2.0 115 elif abs(f1-f0) > (1e5*eps): 116 #print ' Decreasing step ',i,step[i],abs(f1-f0) 117 step[i] = step[i]/3.0 118 else: 119 break 120 x[i] = xi - step[i] 121 fm1 = func(x) 122 x[i] = xi 123 g[i] = (f1 - fm1)/(2*step[i]) 124 h[i] = (f1 + fm1 - 2.0*f0)/(step[i]*step[i]) 125 126 return (f0,g,h) 127 128 129def quadfit(alpha0, f0, alpha1, f1, alpha2, f2): 130 ''' 131 Given 3 points compute the gradient and hessian at point 0 132 using a quadratic fit. 133 ''' 134 delta1 = alpha1 - alpha0 135 delta2 = alpha2 - alpha0 136 d1 = (f1 - f0)/delta1 137 d2 = (f2 - f0)/delta2 138 h0 = 2.0*(d1 - d2)/(delta1-delta2) 139 g0 = d1 - 0.5*h0*delta1 140 141 test1 = f0 + g0*delta1 + 0.5*h0*delta1*delta1 142 test2 = f0 + g0*delta2 + 0.5*h0*delta2*delta2 143 return (f0, g0, h0) 144 145def takestep(x0, s, alpha): 146 x = zerovector(len(x0)) 147 for j in range(len(x)): 148 x[j] = x0[j] + s[j]*alpha 149 return x 150 151def quadratic_step(trust, g0, h0): 152 if h0 > 0: 153 delta2 = -g0/h0 154 if abs(delta2) > trust: 155 print(" Step restriction: %f %f " % (delta2, trust)) 156 delta2 = abs(trust*delta2)/delta2 157 else: 158 print(" Negative curvature ") 159 delta2 = -abs(trust*g0)/g0 160 return delta2 161 162 163def linesearch(func, x0, s, lsgrad, eps): 164 # Assume here that some halfway OK preconditioning 165 # is being used so we expect a step around unity. 166 # Nevertheless, must exercise some caution. 167 168 # First step in small increments until we've either 169 # bracketed the minimum or gone downhil with enough 170 # energy difference to start fitting 171 172 print(" Line search: step alpha grad hess value") 173 print(" ---- --------- -------- -------- ----------------") 174 175 trust = 0.2 176 alpha0 = 0.0 177 f0 = func(x0) 178 print(" %9.2e %8.1e %16.8f" % (alpha0, lsgrad, f0)) 179 180 if lsgrad < 0: 181 alpha1 = alpha0 + trust 182 else: 183 alpha1 = alpha0 - trust 184 f1 = func(takestep(x0,s,alpha1)) 185 print(" %9.2e %16.8f" % (alpha1, f1)) 186 187 while f1 > f0: 188 if trust < 0.00125: 189 print(" system is too badly conditioned for initial step") 190 return (alpha0,f0) # Cannot seem to find my way 191 trust = trust * 0.5 192 if lsgrad < 0: 193 alpha1 = alpha0 + trust 194 else: 195 alpha1 = alpha0 - trust 196 f1 = func(takestep(x0,s,alpha1)) 197 print(" %9.2e %16.8f" % (alpha1, f1)) 198 199 g0 = lsgrad 200 h0 = (f1-f0-alpha1*g0)/alpha1**2 201 if f1 < f0: 202 g0 = g0 + h0*(alpha1 - alpha0) 203 alpha0, alpha1, f0, f1 = alpha1, alpha0, f1, f0 204 205 alpha2 = alpha0 + quadratic_step(trust,g0,h0) 206 207 nbackstep =0 208 209 for iter in range(1,10): 210 f2 = func(takestep(x0,s,alpha2)) 211 #print ' alphas ', alpha0, alpha1, alpha2 212 #print ' fs ', f0, f1, f2 213 214 if iter == 1: 215 f2prev = f2 216 217 print(" %9.2e %16.8f" % (alpha2, f2)) 218 219 # Check for convergence or insufficient precision to proceed further 220 if (abs(f0-f1)<(10*eps)) and (abs(f1-f2)<(10*eps)): 221 print(" ") 222 print(" Insufficient precision ... terminating LS") 223 break 224 225 if (f2-f2prev) > 0: 226 # New point is higher than previous worst 227 if nbackstep < 3: 228 nbackstep = nbackstep + 1 229 print(" ") 230 print(" Back stepping due to uphill step") 231 trust = max(0.01,0.2*abs(alpha2 - alpha0)) # Reduce trust radius 232 alpha2 = alpha0 + 0.2*(alpha2 - alpha0) 233 continue 234 elif (f2-f0) < 0: 235 trust = min(4.0,trust*2.0) # Seem to have narrowed the search 236 237 nbackstep = 0 238 239 f2prev = f2 240 241 # Order in increasing energy 242 if f1 < f0: 243 alpha0, alpha1, f0, f1 = alpha1, alpha0, f1, f0 244 if f2 < f0: 245 alpha0, alpha2, f0, f2 = alpha2, alpha0, f2, f0 246 if f2 < f1: 247 alpha1, alpha2, f1, f2 = alpha2, alpha1, f2, f1 248 249 (f0, g0, h0) = quadfit(alpha0, f0, alpha1, f1, alpha2, f2) 250 print(" %4i %9.2e %8.1e %8.1e %16.8f" %(iter, alpha0, g0, h0, f0)) 251 252 if (h0>0.0) and (abs(g0) < 0.03*abs(lsgrad)): 253 print(" ") 254 print(" gradient reduced 30-fold ... terminating LS") 255 break 256 257 # Determine the next step 258 delta = quadratic_step(trust,g0,h0) 259 alpha2 = alpha0 + delta 260 df = g0*delta + 0.5*h0*delta*delta 261 if abs(df) < 10.0*eps: 262 print(" ") 263 print(" projected energy reduction < 10*eps ... terminating LS") 264 break 265 266 267 268 return (alpha0, f0) 269 270def jacobi(ainput): 271 ''' 272 Diagonalize a real symmetric matrix using the variable threshold 273 cyclic Jacobi method. 274 275 (v,e) = jacobi(a) 276 277 Input: a[n][m] is a real symmetric matrix 278 279 Returns: (v,e) where v is the list of eigenvectors and e is an 280 array of the corresponding eigenvalues in ascending order. 281 v[k] is a vector containing the kth eigenvector. These satisfy 282 283 A*Vt = Vt*e 284 285 or 286 287 V*A = e*V 288 289 or 290 291 sum(j)(a[i][j]v[k][j]) = e[k]*v[k][i] 292 ''' 293 a = copymatrix(ainput) 294 n = len(a) 295 m = len(a[0]) 296 if n != m: 297 raise 'Matrix must be square' 298 for i in range(n): 299 for j in range(m): 300 if a[i][j] != a[j][i]: 301 raise ' Matrix must be symmetric' 302 303 tolmin = 1e-14 304 tol = 1e-4 305 306 v = zeromatrix(n,n) 307 for i in range(n): 308 v[i][i] = 1.0 309 310 maxd = 0.0 311 for i in range(n): 312 maxd = max(abs(a[i][i]),maxd) 313 314 for iter in range(50): 315 nrot = 0 316 for i in range(n): 317 for j in range(i+1,n): 318 aii = a[i][i] 319 ajj = a[j][j] 320 daij = abs(a[i][j]) 321 if daij > tol*maxd: # Screen small elements 322 nrot = nrot + 1 323 s = aii - ajj 324 ds = abs(s) 325 if daij > (tolmin*ds): # Check for sufficient precision 326 if (tol*daij) > ds: 327 c = s = 1/sqrt(2.) 328 else: 329 t = a[i][j]/s 330 u = 0.25/sqrt(0.25+t*t) 331 c = sqrt(0.5+u) 332 s = 2.*t*u/c 333 334 for k in range(n): 335 u = a[i][k] 336 t = a[j][k] 337 a[i][k] = s*t + c*u 338 a[j][k] = c*t - s*u 339 340 for k in range(n): 341 u = a[k][i] 342 t = a[k][j] 343 a[k][i] = s*t + c*u 344 a[k][j]= c*t - s*u 345 346 for k in range(n): 347 u = v[i][k] 348 t = v[j][k] 349 v[i][k] = s*t + c*u 350 v[j][k] = c*t - s*u 351 352 a[j][i] = a[i][j] = 0.0 353 maxd = max(maxd,abs(a[i][i]),abs(a[j][j])) 354 if nrot == 0 and tol <= tolmin: 355 break 356 tol = max(tolmin,tol*0.99e-2) 357 358 if nrot != 0: 359 raise "Jacobi iteration did not converge in 50 passes" 360 361 # Sort eigenvectors and values into increasing order 362 e = zerovector(n) 363 for i in range(n): 364 e[i] = a[i][i] 365 for j in range(i): 366 if e[j] > e[i]: 367 (e[i],e[j]) = (e[j],e[i]) 368 (v[i],v[j]) = (v[j],v[i]) 369 370 return (v,e) 371 372def hessian_update_bfgs(hp, dx, g, gp): 373 ''' 374 Apply the BFGS update to the approximate Hessian h[][]. 375 376 hp[][] = Hessian matrix from previous iteration 377 dx[] = Step from previous iteration 378 . (dx[] = x[] - xp[] where xp[] is the previous point) 379 g[] = gradient at current point 380 gp[] = gradient at previous point 381 382 Returns the updated hessian 383 ''' 384 385 n = len(hp) 386 hdx = mxv(hp,dx) 387 dg = zerovector(n) 388 for i in range(n): 389 dg[i] = g[i] - gp[i] 390 391 dxhdx = dot(dx,hdx) 392 dxdx = dot(dx,dx) 393 dxdg = dot(dx,dg) 394 dgdg = dot(dg,dg) 395 h = copymatrix(hp) 396 397 if (dxdx > 0.0) and (dgdg > 0.0) and (abs(dxdg/sqrt(dxdx*dgdg)) > 1.e-4): 398 for i in range(n): 399 for j in range(n): 400 h[i][j] = h[i][j] + dg[i]*dg[j]/dxdg - hdx[i]*hdx[j]/dxhdx 401 else: 402 print(' BFGS not updating dxdg (%e), dgdg (%e), dxhdx (%f), dxdx(%e)' % (dxdg, dgdg, dxhdx, dxdx)) 403 404 return h 405 406def quasinr(func, guess, tol, eps, printvar=None): 407 ''' 408 Unconstrained minimization of a function of n variables 409 without analytic derivatives using quasi-Newtwon with BFGS update 410 and numerical gradients. 411 412 func(x) is a function that takes an array of n values and 413 returns the function value 414 415 guess[] is an array of n values for the initial guess 416 417 tol is the convergence criterion for the maximum value 418 of the gradient 419 420 eps is the expected precision in the function value 421 422 printvar(x) is an optional user function to print the values of 423 parameters each macro iteration 424 ''' 425 426 n = len(guess) 427 x = copyvector(guess) 428 s = zerovector(n) 429 g = zerovector(n) 430 gp = zerovector(n) 431 step = zerovector(n) 432 hessian = zeromatrix(n,n) 433 434 alpha = 0.0 435 436 for iter in range(50*n): 437 (value,g,h) = numderiv(func, x, step, eps) 438 gmax = max(map(abs,g)) 439 440 print(' ') 441 print(' iter gmax value ') 442 print(' ---- --------- ----------------') 443 print("%4i %9.2e %16.8f" % (iter,gmax,value)) 444 445 if (printvar): 446 printvar(x) 447 448 if gmax < tol: 449 print(' Converged!') 450 break 451 452 if iter == 0: 453 for i in range(n): 454 hessian[i][i] = max(abs(h[i]),1e-4) 455 else: 456 hessian = hessian_update_bfgs(hessian, s, g, gp) 457 458 (v,e) = jacobi(hessian) 459 emax = max(map(abs,e)) 460 emin = emax*1e-4 # Control noise in small eigenvalues 461 print('\n Eigenvalues of the Hessian:') 462 printvector(e) 463 464 # Transform to spectral form, take step, transform back 465 gs = mxv(v,g) 466 for i in range(n): 467 if e[i] < emin: 468 print(' Mode %d: small/negative eigenvalue (%f).' % (i, e[i])) 469 s[i] = -gs[i]/emin 470 else: 471 s[i] = -gs[i]/e[i] 472 s = mxv(transpose(v),s) 473 474 # Apply overall step restriction ... better LS obviates this 475 scale = 1.0 476 for i in range(n): 477 trust = max(abs(x[i]),abs(x[i]/sqrt(max(1e-4,abs(hessian[i][i]))))) 478 if abs(s[i]) > trust: 479 print(' restricting ', i, trust, abs(x[i]), abs(x[i]/sqrt(abs(hessian[i][i]))), s[i]) 480 scale = min(scale,trust/abs(s[i])) 481 if scale != 1.0: 482 for i in range(n): 483 s[i] = s[i]*scale 484 485 (alpha,value) = linesearch(func, x, s, dot(s,g), eps) 486 487 if alpha == 0.0: 488 print(' Insufficient precision to proceed further') 489 break 490 491 for i in range(n): 492 s[i] = s[i]*alpha 493 x[i] = x[i] + s[i] 494 gp[i]= g[i] 495 return (value,x) 496 497 498def cgminold(func, dfunc, guess, tol): 499 ''' 500 Simple conjugate gradient assuming analtyic derivatives. 501 ''' 502 n = len(guess) 503 x = copyvector(guess) 504 s = zerovector(n) 505 g = zerovector(n) 506 gp= zerovector(n) 507 value = func(x) 508 509 for iter in range(10*n): 510 g = dfunc(x) 511 gmax = max(map(abs,g)) 512 513 print(' ') 514 print(' iter gmax value ') 515 print(' ---- --------- ----------------') 516 print("%4i %9.2e %16.8f" % (iter,gmax,value)) 517 518 if gmax < tol: 519 print(' Converged!') 520 break 521 522 if (iter == 0) or ((iter%20) == 0): 523 beta = 0.0 524 else: 525 beta = (dot(g,g) - dot(g,gp))/(dot(s,g)-dot(s,gp)) 526 527 for i in range(n): 528 s[i] = -g[i] + beta*s[i] 529 530 (alpha,value) = linesearch(func, x, s, dot(s,g), 1e-12) 531 532 for i in range(n): 533 s[i] = s[i]*alpha 534 x[i] = x[i] + s[i] 535 gp[i]= g[i] 536 537 return (value,x) 538 539 540def cgmin(func, dfunc, guess, tol, precond=None, reset=None): 541 ''' 542 Conjugate gradient with optional preconditioning and 543 use of analytic gradients. 544 ''' 545 n = len(guess) 546 x = copyvector(guess) 547 s = zerovector(n) 548 g = zerovector(n) 549 gp= zerovector(n) 550 value = func(x) 551 552 if not reset: 553 reset = n 554 reset = min(reset,n) 555 556 for iter in range(10*n): 557 g = dfunc(x) 558 gmax = max(map(abs,g)) 559 560 print(' ') 561 print(' iter gmax value ') 562 print(' ---- --------- ----------------') 563 print("%4i %9.2e %16.8f" % (iter,gmax,value)) 564 565 if gmax < tol: 566 print(' Converged!') 567 break 568 569 if precond: 570 precondg = precond(g) 571 else: 572 precondg = g 573 574 if (iter % reset) == 0: 575 beta = 0.0 576 else: 577 beta = (dot(precondg,g) - dot(precondg,gp))/(dot(s,g)-dot(s,gp)) 578 579 for i in range(n): 580 s[i] = -precondg[i] + beta*s[i] 581 582 (alpha,value) = linesearch(func, x, s, dot(s,g), 583 max(1e-16,abs(value)*1e-12)) 584 585 for i in range(n): 586 s[i] = s[i]*alpha 587 x[i] = x[i] + s[i] 588 gp[i]= g[i] 589 590 return (value,x) 591 592def cgmin2(func, guess, tol, eps, printvar=None,reset=None): 593 ''' 594 Unconstrained minimization of a function of n variables 595 without analytic derivatives using conjugate gradient with 596 diagonal preconditioning. 597 598 func(x) is a function that takes an array of n values and 599 returns the function value 600 601 guess[] is an array of n values for the initial guess 602 603 tol is the convergence criterion for the maximum value 604 of the gradient 605 606 eps is the expected precision in the function value 607 608 printvar(x) is an optional user function to print the values of 609 parameters each iteration 610 611 reset is the number of iterations between forced resets of the 612 conjugacy. In principle this could be n but noise in the 613 numerical gradients makes a smaller number a better choice. 614 ''' 615 616 n = len(guess) 617 x = copyvector(guess) 618 s = zerovector(n) 619 g = zerovector(n) 620 gp = zerovector(n) 621 step = zerovector(n) 622 precondg = zerovector(n) 623 624 alpha = 0.0 625 626 if not reset: 627 reset = n 628 reset = min(reset,n) 629 630 for iter in range(50*n): 631 (value,g,hh) = numderiv(func, x, step, eps) 632 gmax = max(map(abs,g)) 633 634 print(' ') 635 print(' iter gmax value ') 636 print(' ---- --------- ----------------') 637 print("%4i %9.2e %16.8f" % (iter,gmax,value)) 638 639 if (printvar): 640 printvar(x) 641 642 if gmax < tol: 643 print(' Converged!') 644 break 645 646 if (iter % reset) == 0: 647 # On the first iteration or if not applying conjugacy 648 # we can recompute the diagonal preconditioner 649 h = copyvector(hh) 650 for i in range(n): 651 h[i] = max(abs(h[i]),1e-6) 652 653 # Preconditioning with the diagonal of the Hessian 654 for i in range(n): 655 precondg[i] = g[i] / h[i] 656 657 # Should be able to reset every n steps but noisy gradients 658 # means that we don't have enough info. 659 if (iter % reset) == 0: 660 if iter != 0: 661 print(" Resetting conjugacy") 662 beta = 0.0 663 else: 664 beta = (dot(precondg,g) - dot(precondg,gp))/(dot(s,g)-dot(s,gp)) 665 666 for i in range(n): 667 s[i] = -precondg[i] + beta*s[i] 668 669 (alpha,value) = linesearch(func, x, s, dot(s,g), eps) 670 671 if alpha == 0.0: 672 # LS failed, probably due to lack of precision. 673 if beta != 0.0: 674 print("LS failed - trying preconditioned steepest descent direction") 675 for i in range(n): 676 s[i] = -g[i] 677 (alpha,value) = linesearch(func, x, s, dot(s,g), eps) 678 if alpha == 0.0: 679 print(" Insufficient precision to proceed further") 680 break 681 682 for i in range(n): 683 s[i] = s[i]*alpha 684 x[i] = x[i] + s[i] 685 gp[i]= g[i] 686 return (value,x) 687 688 689if __name__ == '__main__': 690 691 def precond(g): 692 # Used to test optional preconditioner for cgmin(). 693 precondg = copyvector(g) 694 for i in range(len(g)): 695 precondg[i] = precondg[i]/(i+2.0) 696 return precondg 697 698 def df(x): 699 d = zerovector(len(x)) 700 for i in range(len(x)): 701 d[i] = x[i]*(i+1) 702 for j in range(len(x)): 703 d[i] = d[i] + x[j] 704 return d 705 706 def f(x): 707 sum = 0.0 708 for i in range(len(x)): 709 for j in range(len(x)): 710 sum = sum + 0.5*x[i]*x[j] 711 for i in range(len(x)): 712 sum = sum + 0.5*x[i]*x[i]*(i+1) 713 return sum 714 715 716 print('\n\n TESTING QUASI-NR SOLVER \n\n') 717 quasinr(f, [1.,0.5,0.3,-0.4], 1e-4, 1e-10) 718 719 print('\n\n TESTING GC WITH NUM. GRAD. AND DIAG. PRECOND.\n\n') 720 cgmin2(f, [1.,0.5,0.3,-0.4], 1e-4, 1e-10, reset=20) 721 722 print('\n\n TESTING GC WITH ANAL. GRAD. AND WITHOUT OPTIONAL PRECOND.\n\n') 723 cgmin(f, df, [1.,0.5,0.3,-0.4], 1e-4) 724 725 print('\n\n TESTING GC WITH ANAL. GRAD. AND WITH OPTIONAL PRECOND.\n\n') 726 cgmin(f, df, [1.,0.5,0.3,-0.4], 1e-4, precond=precond) 727 728 print('\n\n TESTING GC WITH ANAL. GRAD. AND NO PRECOND.\n\n') 729 cgminold(f, df, [1.,0.5,0.3,-0.4], 1e-4) 730 731 print('\n\n TESTING JACOBI EIGENSOLVER\n\n') 732 n = 50 733 a = zeromatrix(n,n) 734 for i in range(n): 735 for j in range(i,n): 736 a[j][i] = a[i][j] = (i*j+1.)/(i+j+1.) 737 738 (v,e)= jacobi(a) 739 740 print(' eigenvalues') 741 printvector(e) 742 #print ' v ' 743 #printmatrix(v) 744 ev = mxm(v,a) 745 for i in range(n): 746 err = 0.0 747 for j in range(n): 748 err = max(err,abs(ev[i][j] - e[i]*v[i][j])) 749 err = err/(n*max(1.0,abs(e[i]))) 750 if err > 1e-12: 751 print(' Error in eigenvector ', i, err) 752