1from array import *
2from math import sqrt
3
4def zerovector(n):
5    a = array('d',range(n))
6    for i in range(n):
7        a[i] = 0.0
8    return a
9
10def zeromatrix(n,m):
11    a = list(range(n))
12    for i in range(n):
13        a[i] = zerovector(m)
14    return a
15
16def copyvector(x):
17    a = array('d',range(len(x)))
18    for i in range(len(x)):
19        a[i] = x[i]
20    return a
21
22def copymatrix(x):
23    n = len(x)
24    m = len(x[0])
25    a = zeromatrix(n,m)
26    for i in range(n):
27        for j in range(m):
28            a[i][j] = x[i][j]
29    return a
30
31def transpose(x):
32    n = len(x)
33    m = len(x[0])
34    a = zeromatrix(m,n)
35    for i in range(n):
36        for j in range(m):
37            a[j][i] = x[i][j]
38    return a
39
40def dot(a,b):
41    sum = 0.0
42    for i in range(len(a)):
43        sum = sum + a[i]*b[i]
44    return sum
45
46def mxm(a,b):
47    n = len(a)
48    k = len(a[0])
49    kk= len(b)
50    m = len(b[0])
51    if kk != k:
52        raise "matrices do not conform for multiplication"
53    c = zeromatrix(n,m)
54    for i in range(n):
55        for j in range(m):
56            sum = 0.0
57            for l in range(k):
58                sum = sum + a[i][l]*b[l][j]
59            c[i][j] = sum
60    return c
61
62def mxv(a,b):
63    n = len(a)
64    k = len(a[0])
65    kk = len(b)
66    if k != kk:
67        raise "matrix and vector do not conform for multiplication"
68    c = zerovector(n)
69    for i in range(n):
70        c[i] = dot(a[i],b)
71    return c
72
73def printvector(a):
74    n = len(a)
75    for i in range(n):
76        print ("%12.5e "%a[i]),
77    print(" ")
78
79def printmatrix(a):
80    n = len(a)
81    for i in range(n):
82        printvector(a[i])
83
84def numderiv(func,x,step,eps):
85    '''
86    Use central differences to compute the gradient and diagonal
87    elements of the Hessian.
88    func(x) = function to be differentiated
89    x[] = (array) point at which to differentiate
90    step[] = (array) remembers finite difference step between
91    .        successive calls.  Set to zero on first call
92    .        or set close to appropriate value
93    eps = expected precision in func
94
95    Some care is taken to adjust the step so that the gradient and
96    Hessian diagonal are estimated with about 4 digits of precision
97    but some noise is unavaoidable due either to the noise in the
98    function or cubic/higher terms in the Taylor expansion.
99    '''
100
101    n = len(x)
102    g = zerovector(n)
103    h = zerovector(n)
104    f0 = func(x)
105    for i in range(n):
106        if step[i] == 0.0:
107            step[i] = max(abs(x[i])*0.01,0.0001)
108        xi = x[i]
109        while 1:
110            x[i] = xi + step[i]
111            f1 = func(x)
112            if abs(f1-f0) < (1e4*eps):
113                #print ' Increasing step ',i,step[i],abs(f1-f0)
114                step[i] = step[i]*2.0
115            elif abs(f1-f0) > (1e5*eps):
116                #print ' Decreasing step ',i,step[i],abs(f1-f0)
117                step[i] = step[i]/3.0
118            else:
119                break
120        x[i] = xi - step[i]
121        fm1 = func(x)
122        x[i] = xi
123        g[i] = (f1 - fm1)/(2*step[i])
124        h[i] = (f1 + fm1 - 2.0*f0)/(step[i]*step[i])
125
126    return (f0,g,h)
127
128
129def quadfit(alpha0, f0, alpha1, f1, alpha2, f2):
130    '''
131    Given 3 points compute the gradient and hessian at point 0
132    using a quadratic fit.
133    '''
134    delta1 = alpha1 - alpha0
135    delta2 = alpha2 - alpha0
136    d1 = (f1 - f0)/delta1
137    d2 = (f2 - f0)/delta2
138    h0 = 2.0*(d1 - d2)/(delta1-delta2)
139    g0 = d1 - 0.5*h0*delta1
140
141    test1 = f0 + g0*delta1 + 0.5*h0*delta1*delta1
142    test2 = f0 + g0*delta2 + 0.5*h0*delta2*delta2
143    return (f0, g0, h0)
144
145def takestep(x0, s, alpha):
146    x = zerovector(len(x0))
147    for j in range(len(x)):
148        x[j] = x0[j] + s[j]*alpha
149    return x
150
151def quadratic_step(trust, g0, h0):
152    if h0 > 0:
153        delta2 = -g0/h0
154        if abs(delta2) > trust:
155            print("                    Step restriction: %f %f " % (delta2, trust))
156            delta2 = abs(trust*delta2)/delta2
157    else:
158        print("                    Negative curvature ")
159        delta2 = -abs(trust*g0)/g0
160    return delta2
161
162
163def linesearch(func, x0, s, lsgrad, eps):
164    # Assume here that some halfway OK preconditioning
165    # is being used so we expect a step around unity.
166    # Nevertheless, must exercise some caution.
167
168    # First step in small increments until we've either
169    # bracketed the minimum or gone downhil with enough
170    # energy difference to start fitting
171
172    print(" Line search: step   alpha     grad     hess        value")
173    print("              ---- --------- -------- --------  ----------------")
174
175    trust = 0.2
176    alpha0 = 0.0
177    f0 = func(x0)
178    print("                   %9.2e %8.1e          %16.8f" % (alpha0, lsgrad, f0))
179
180    if lsgrad < 0:
181        alpha1 = alpha0 + trust
182    else:
183        alpha1 = alpha0 - trust
184    f1 = func(takestep(x0,s,alpha1))
185    print("                   %9.2e                   %16.8f" % (alpha1, f1))
186
187    while f1 > f0:
188        if trust < 0.00125:
189            print(" system is too badly conditioned for initial step")
190            return (alpha0,f0)          # Cannot seem to find my way
191        trust = trust * 0.5
192        if lsgrad < 0:
193            alpha1 = alpha0 + trust
194        else:
195            alpha1 = alpha0 - trust
196        f1 = func(takestep(x0,s,alpha1))
197        print("                   %9.2e                   %16.8f" % (alpha1, f1))
198
199    g0 = lsgrad
200    h0 = (f1-f0-alpha1*g0)/alpha1**2
201    if f1 < f0:
202        g0 = g0 + h0*(alpha1 - alpha0)
203        alpha0, alpha1, f0, f1 = alpha1, alpha0, f1, f0
204
205    alpha2 = alpha0 + quadratic_step(trust,g0,h0)
206
207    nbackstep =0
208
209    for iter in range(1,10):
210        f2 = func(takestep(x0,s,alpha2))
211        #print ' alphas ', alpha0, alpha1, alpha2
212        #print ' fs     ', f0, f1, f2
213
214        if iter == 1:
215            f2prev = f2
216
217        print("                   %9.2e                   %16.8f" % (alpha2, f2))
218
219        # Check for convergence or insufficient precision to proceed further
220        if (abs(f0-f1)<(10*eps)) and (abs(f1-f2)<(10*eps)):
221            print("                  ")
222            print(" Insufficient precision ... terminating LS")
223            break
224
225        if (f2-f2prev) > 0:
226            # New point is higher than previous worst
227            if  nbackstep < 3:
228                nbackstep = nbackstep + 1
229                print("                  ")
230                print(" Back stepping due to uphill step")
231                trust = max(0.01,0.2*abs(alpha2 - alpha0))  # Reduce trust radius
232                alpha2 = alpha0 + 0.2*(alpha2 - alpha0)
233                continue
234        elif (f2-f0) < 0:
235            trust = min(4.0,trust*2.0)  # Seem to have narrowed the search
236
237        nbackstep = 0
238
239        f2prev = f2
240
241        # Order in increasing energy
242        if f1 < f0:
243            alpha0, alpha1, f0, f1 = alpha1, alpha0, f1, f0
244        if f2 < f0:
245            alpha0, alpha2, f0, f2 = alpha2, alpha0, f2, f0
246        if f2 < f1:
247            alpha1, alpha2, f1, f2 = alpha2, alpha1, f2, f1
248
249        (f0, g0, h0) = quadfit(alpha0, f0, alpha1, f1, alpha2, f2)
250        print("              %4i %9.2e %8.1e %8.1e %16.8f" %(iter, alpha0, g0, h0, f0))
251
252        if (h0>0.0) and (abs(g0) < 0.03*abs(lsgrad)):
253            print("                  ")
254            print(" gradient reduced 30-fold ... terminating LS")
255            break
256
257        # Determine the next step
258        delta = quadratic_step(trust,g0,h0)
259        alpha2 = alpha0 + delta
260        df = g0*delta + 0.5*h0*delta*delta
261        if abs(df) < 10.0*eps:
262            print("                  ")
263            print(" projected energy reduction < 10*eps ... terminating LS")
264            break
265
266
267
268    return (alpha0, f0)
269
270def jacobi(ainput):
271    '''
272    Diagonalize a real symmetric matrix using the variable threshold
273    cyclic Jacobi method.
274
275    (v,e) = jacobi(a)
276
277    Input: a[n][m] is a real symmetric matrix
278
279    Returns: (v,e) where v is the list of eigenvectors and e is an
280    array of the corresponding eigenvalues in ascending order.
281    v[k] is a vector containing the kth eigenvector.  These satisfy
282
283    A*Vt = Vt*e
284
285    or
286
287    V*A = e*V
288
289    or
290
291    sum(j)(a[i][j]v[k][j]) = e[k]*v[k][i]
292    '''
293    a = copymatrix(ainput)
294    n = len(a)
295    m = len(a[0])
296    if n != m:
297        raise 'Matrix must be square'
298    for i in range(n):
299        for j in range(m):
300            if a[i][j] != a[j][i]:
301                raise ' Matrix must be symmetric'
302
303    tolmin = 1e-14
304    tol = 1e-4
305
306    v = zeromatrix(n,n)
307    for i in range(n):
308        v[i][i] = 1.0
309
310    maxd = 0.0
311    for i in range(n):
312        maxd = max(abs(a[i][i]),maxd)
313
314    for iter in range(50):
315        nrot = 0
316        for i in range(n):
317            for j in range(i+1,n):
318                aii = a[i][i]
319                ajj = a[j][j]
320                daij = abs(a[i][j])
321                if daij > tol*maxd: # Screen small elements
322                    nrot = nrot + 1
323                    s = aii - ajj
324                    ds = abs(s)
325                    if daij > (tolmin*ds): # Check for sufficient precision
326                        if (tol*daij) > ds:
327                            c = s = 1/sqrt(2.)
328                        else:
329                            t = a[i][j]/s
330                            u = 0.25/sqrt(0.25+t*t)
331                            c = sqrt(0.5+u)
332                            s = 2.*t*u/c
333
334                        for k in range(n):
335                            u = a[i][k]
336                            t = a[j][k]
337                            a[i][k] = s*t + c*u
338                            a[j][k] = c*t - s*u
339
340                        for k in range(n):
341                            u = a[k][i]
342                            t = a[k][j]
343                            a[k][i] = s*t + c*u
344                            a[k][j]= c*t - s*u
345
346                        for k in range(n):
347                            u = v[i][k]
348                            t = v[j][k]
349                            v[i][k] = s*t + c*u
350                            v[j][k] = c*t - s*u
351
352                        a[j][i] = a[i][j] = 0.0
353                        maxd = max(maxd,abs(a[i][i]),abs(a[j][j]))
354        if nrot == 0 and tol <= tolmin:
355            break
356        tol = max(tolmin,tol*0.99e-2)
357
358    if nrot != 0:
359        raise "Jacobi iteration did not converge in 50 passes"
360
361    # Sort eigenvectors and values into increasing order
362    e = zerovector(n)
363    for i in range(n):
364        e[i] = a[i][i]
365        for j in range(i):
366            if e[j] > e[i]:
367                (e[i],e[j]) = (e[j],e[i])
368                (v[i],v[j]) = (v[j],v[i])
369
370    return (v,e)
371
372def hessian_update_bfgs(hp, dx, g, gp):
373    '''
374    Apply the BFGS update to the approximate Hessian h[][].
375
376    hp[][] = Hessian matrix from previous iteration
377    dx[]  = Step from previous iteration
378    .       (dx[] = x[] - xp[] where xp[] is the previous point)
379    g[]   = gradient at current point
380    gp[]  = gradient at previous point
381
382    Returns the updated hessian
383    '''
384
385    n = len(hp)
386    hdx  = mxv(hp,dx)
387    dg = zerovector(n)
388    for i in range(n):
389        dg[i] = g[i] - gp[i]
390
391    dxhdx = dot(dx,hdx)
392    dxdx  = dot(dx,dx)
393    dxdg  = dot(dx,dg)
394    dgdg  = dot(dg,dg)
395    h = copymatrix(hp)
396
397    if (dxdx > 0.0) and (dgdg > 0.0) and (abs(dxdg/sqrt(dxdx*dgdg)) > 1.e-4):
398        for i in range(n):
399            for j in range(n):
400                h[i][j] = h[i][j] + dg[i]*dg[j]/dxdg - hdx[i]*hdx[j]/dxhdx
401    else:
402        print(' BFGS not updating dxdg (%e), dgdg (%e), dxhdx (%f), dxdx(%e)' % (dxdg, dgdg, dxhdx, dxdx))
403
404    return h
405
406def quasinr(func, guess, tol, eps, printvar=None):
407    '''
408    Unconstrained minimization of a function of n variables
409    without analytic derivatives using quasi-Newtwon with BFGS update
410    and numerical gradients.
411
412    func(x) is a function that takes an array of n values and
413    returns the function value
414
415    guess[] is an array of n values for the initial guess
416
417    tol is the convergence criterion for the maximum value
418    of the gradient
419
420    eps is the expected precision in the function value
421
422    printvar(x) is an optional user function to print the values of
423    parameters each macro iteration
424    '''
425
426    n = len(guess)
427    x = copyvector(guess)
428    s = zerovector(n)
429    g = zerovector(n)
430    gp = zerovector(n)
431    step = zerovector(n)
432    hessian = zeromatrix(n,n)
433
434    alpha = 0.0
435
436    for iter in range(50*n):
437        (value,g,h) = numderiv(func, x, step, eps)
438        gmax = max(map(abs,g))
439
440        print(' ')
441        print(' iter    gmax         value ')
442        print(' ---- --------- ----------------')
443        print("%4i %9.2e %16.8f" % (iter,gmax,value))
444
445        if (printvar):
446            printvar(x)
447
448        if gmax < tol:
449            print(' Converged!')
450            break
451
452        if iter == 0:
453            for i in range(n):
454                hessian[i][i] = max(abs(h[i]),1e-4)
455        else:
456            hessian = hessian_update_bfgs(hessian, s, g, gp)
457
458        (v,e) = jacobi(hessian)
459        emax = max(map(abs,e))
460        emin = emax*1e-4   # Control noise in small eigenvalues
461        print('\n Eigenvalues of the Hessian:')
462        printvector(e)
463
464        # Transform to spectral form, take step, transform back
465        gs = mxv(v,g)
466        for i in range(n):
467            if e[i] < emin:
468                print(' Mode %d: small/negative eigenvalue (%f).' % (i, e[i]))
469                s[i] = -gs[i]/emin
470            else:
471                s[i] = -gs[i]/e[i]
472        s = mxv(transpose(v),s)
473
474        # Apply overall step restriction ... better LS obviates this
475        scale = 1.0
476        for i in range(n):
477            trust = max(abs(x[i]),abs(x[i]/sqrt(max(1e-4,abs(hessian[i][i])))))
478            if abs(s[i]) > trust:
479                print(' restricting ', i, trust, abs(x[i]), abs(x[i]/sqrt(abs(hessian[i][i]))), s[i])
480                scale = min(scale,trust/abs(s[i]))
481        if scale != 1.0:
482            for i in range(n):
483                s[i] = s[i]*scale
484
485        (alpha,value) = linesearch(func, x, s, dot(s,g), eps)
486
487        if alpha == 0.0:
488            print(' Insufficient precision to proceed further')
489            break
490
491        for i in range(n):
492            s[i] = s[i]*alpha
493            x[i] = x[i] + s[i]
494            gp[i]= g[i]
495    return (value,x)
496
497
498def cgminold(func, dfunc, guess, tol):
499    '''
500    Simple conjugate gradient assuming analtyic derivatives.
501    '''
502    n = len(guess)
503    x = copyvector(guess)
504    s = zerovector(n)
505    g = zerovector(n)
506    gp= zerovector(n)
507    value = func(x)
508
509    for iter in range(10*n):
510        g = dfunc(x)
511        gmax = max(map(abs,g))
512
513        print(' ')
514        print(' iter    gmax         value ')
515        print(' ---- --------- ----------------')
516        print("%4i %9.2e %16.8f" % (iter,gmax,value))
517
518        if gmax < tol:
519            print(' Converged!')
520            break
521
522        if (iter == 0) or ((iter%20) == 0):
523            beta = 0.0
524        else:
525            beta = (dot(g,g) - dot(g,gp))/(dot(s,g)-dot(s,gp))
526
527        for i in range(n):
528            s[i] = -g[i] + beta*s[i]
529
530        (alpha,value) = linesearch(func, x, s, dot(s,g), 1e-12)
531
532        for i in range(n):
533            s[i] = s[i]*alpha
534            x[i] = x[i] + s[i]
535            gp[i]= g[i]
536
537    return (value,x)
538
539
540def cgmin(func, dfunc, guess, tol, precond=None, reset=None):
541    '''
542    Conjugate gradient with optional preconditioning and
543    use of analytic gradients.
544    '''
545    n = len(guess)
546    x = copyvector(guess)
547    s = zerovector(n)
548    g = zerovector(n)
549    gp= zerovector(n)
550    value = func(x)
551
552    if not reset:
553        reset = n
554    reset = min(reset,n)
555
556    for iter in range(10*n):
557        g = dfunc(x)
558        gmax = max(map(abs,g))
559
560        print(' ')
561        print(' iter    gmax         value ')
562        print(' ---- --------- ----------------')
563        print("%4i %9.2e %16.8f" % (iter,gmax,value))
564
565        if gmax < tol:
566            print(' Converged!')
567            break
568
569        if precond:
570            precondg = precond(g)
571        else:
572            precondg = g
573
574        if (iter % reset) == 0:
575            beta = 0.0
576        else:
577            beta = (dot(precondg,g) - dot(precondg,gp))/(dot(s,g)-dot(s,gp))
578
579        for i in range(n):
580            s[i] = -precondg[i] + beta*s[i]
581
582        (alpha,value) = linesearch(func, x, s, dot(s,g),
583                                   max(1e-16,abs(value)*1e-12))
584
585        for i in range(n):
586            s[i] = s[i]*alpha
587            x[i] = x[i] + s[i]
588            gp[i]= g[i]
589
590    return (value,x)
591
592def cgmin2(func, guess, tol, eps, printvar=None,reset=None):
593    '''
594    Unconstrained minimization of a function of n variables
595    without analytic derivatives using conjugate gradient with
596    diagonal preconditioning.
597
598    func(x) is a function that takes an array of n values and
599    returns the function value
600
601    guess[] is an array of n values for the initial guess
602
603    tol is the convergence criterion for the maximum value
604    of the gradient
605
606    eps is the expected precision in the function value
607
608    printvar(x) is an optional user function to print the values of
609    parameters each iteration
610
611    reset is the number of iterations between forced resets of the
612    conjugacy.  In principle this could be n but noise in the
613    numerical gradients makes a smaller number a better choice.
614    '''
615
616    n = len(guess)
617    x = copyvector(guess)
618    s = zerovector(n)
619    g = zerovector(n)
620    gp = zerovector(n)
621    step = zerovector(n)
622    precondg = zerovector(n)
623
624    alpha = 0.0
625
626    if not reset:
627        reset = n
628    reset = min(reset,n)
629
630    for iter in range(50*n):
631        (value,g,hh) = numderiv(func, x, step, eps)
632        gmax = max(map(abs,g))
633
634        print(' ')
635        print(' iter    gmax         value ')
636        print(' ---- --------- ----------------')
637        print("%4i %9.2e %16.8f" % (iter,gmax,value))
638
639        if (printvar):
640            printvar(x)
641
642        if gmax < tol:
643            print(' Converged!')
644            break
645
646        if (iter % reset) == 0:
647            # On the first iteration or if not applying conjugacy
648            # we can recompute the diagonal preconditioner
649            h = copyvector(hh)
650            for i in range(n):
651                h[i] = max(abs(h[i]),1e-6)
652
653        # Preconditioning with the diagonal of the Hessian
654        for i in range(n):
655            precondg[i] = g[i] / h[i]
656
657        # Should be able to reset every n steps but noisy gradients
658        # means that we don't have enough info.
659        if (iter % reset) == 0:
660            if iter != 0:
661                print(" Resetting conjugacy")
662            beta = 0.0
663        else:
664            beta = (dot(precondg,g) - dot(precondg,gp))/(dot(s,g)-dot(s,gp))
665
666        for i in range(n):
667            s[i] = -precondg[i] + beta*s[i]
668
669        (alpha,value) = linesearch(func, x, s, dot(s,g), eps)
670
671        if alpha == 0.0:
672            # LS failed, probably due to lack of precision.
673            if beta != 0.0:
674                print("LS failed - trying preconditioned steepest descent direction")
675                for i in range(n):
676                    s[i] = -g[i]
677                (alpha,value) = linesearch(func, x, s, dot(s,g), eps)
678            if alpha == 0.0:
679                print(" Insufficient precision to proceed further")
680                break
681
682        for i in range(n):
683            s[i] = s[i]*alpha
684            x[i] = x[i] + s[i]
685            gp[i]= g[i]
686    return (value,x)
687
688
689if __name__ == '__main__':
690
691    def precond(g):
692        # Used to test optional preconditioner for cgmin().
693        precondg = copyvector(g)
694        for i in range(len(g)):
695            precondg[i] = precondg[i]/(i+2.0)
696        return precondg
697
698    def df(x):
699        d = zerovector(len(x))
700        for i in range(len(x)):
701            d[i] = x[i]*(i+1)
702            for j in range(len(x)):
703                d[i] = d[i] + x[j]
704        return d
705
706    def f(x):
707        sum = 0.0
708        for i in range(len(x)):
709            for j in range(len(x)):
710                sum = sum + 0.5*x[i]*x[j]
711        for i in range(len(x)):
712            sum = sum + 0.5*x[i]*x[i]*(i+1)
713        return sum
714
715
716    print('\n\n   TESTING QUASI-NR SOLVER \n\n')
717    quasinr(f, [1.,0.5,0.3,-0.4], 1e-4, 1e-10)
718
719    print('\n\n   TESTING GC WITH NUM. GRAD. AND DIAG. PRECOND.\n\n')
720    cgmin2(f, [1.,0.5,0.3,-0.4], 1e-4, 1e-10, reset=20)
721
722    print('\n\n   TESTING GC WITH ANAL. GRAD. AND WITHOUT OPTIONAL PRECOND.\n\n')
723    cgmin(f, df, [1.,0.5,0.3,-0.4], 1e-4)
724
725    print('\n\n   TESTING GC WITH ANAL. GRAD. AND WITH OPTIONAL PRECOND.\n\n')
726    cgmin(f, df, [1.,0.5,0.3,-0.4], 1e-4, precond=precond)
727
728    print('\n\n   TESTING GC WITH ANAL. GRAD. AND NO PRECOND.\n\n')
729    cgminold(f, df, [1.,0.5,0.3,-0.4], 1e-4)
730
731    print('\n\n   TESTING JACOBI EIGENSOLVER\n\n')
732    n = 50
733    a = zeromatrix(n,n)
734    for i in range(n):
735        for j in range(i,n):
736            a[j][i] = a[i][j] = (i*j+1.)/(i+j+1.)
737
738    (v,e)= jacobi(a)
739
740    print(' eigenvalues')
741    printvector(e)
742    #print ' v '
743    #printmatrix(v)
744    ev = mxm(v,a)
745    for i in range(n):
746        err = 0.0
747        for j in range(n):
748            err = max(err,abs(ev[i][j] - e[i]*v[i][j]))
749        err = err/(n*max(1.0,abs(e[i])))
750        if err > 1e-12:
751            print(' Error in eigenvector ', i, err)
752