1#!/usr/local/bin/python3.8
2"""Multi purpose script for Iterative Integral Equation methods."""
3#
4# Copyright 2009-2021 The VOTCA Development Team (http://www.votca.org)
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#     http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17#
18# Symbols:
19# g: RDF
20# U: potential
21# dU: potential update (U_{k+1} - U_k)
22#
23# suffixes:
24# _cur: current (of step k if currently doing iteration k)
25# _tgt: target
26# _ce: core_end (where RDF becomes > 0)
27# _co: cut_off
28# _sc: single_component
29#
30# prefixes:
31# ndx_: index
32
33
34import argparse
35import sys
36try:
37    import numpy as np
38except ImportError:
39    print("Numpy is not installed, but needed for the iterative integral equation "
40          "methods.")
41    raise
42if not sys.version_info >= (3, 5):
43    raise Exception("This script needs Python 3.5+.")
44
45
46BAR_PER_MD_PRESSURE = 16.6053904
47G_MIN = 1e-10
48G_MIN_EXTRAPOLATE = 1e-1
49np.seterr(all='raise')
50
51
52def readin_table(filename):
53    """Read in votca table."""
54    table_dtype = {'names': ('x', 'y', 'y_flag'),
55                   'formats': ('f', 'f', 'U2')}
56    x, y, y_flag = np.loadtxt(filename, dtype=table_dtype, comments=['#', '@'],
57                              unpack=True)
58    return x, y, y_flag
59
60
61def saveto_table(filename, x, y, y_flag, comment=""):
62    """Save votca table."""
63    data = np.zeros((len(x),), dtype='f, f, U2')
64    data['f0'] = x
65    data['f1'] = y
66    data['f2'] = y_flag
67    np.savetxt(filename, data, header=comment, fmt=['%e', '%e', '%s'])
68
69
70def calc_grid_spacing(grid, relative_tolerance=0.01):
71    """Returns the spacing of an equidistant 1D grid."""
72    diffs = np.diff(grid)
73    if abs((max(diffs) - min(diffs)) / max(diffs)) > relative_tolerance:
74        raise Exception('the grid is not equidistant')
75    return np.mean(diffs)
76
77
78def test_calc_grid_spacing():
79    """Check spacing of some grid."""
80    grid = np.linspace(0, 2 * np.pi, num=361)
81    grid_spacing = calc_grid_spacing(grid)
82    assert np.allclose(grid_spacing, np.pi/180)
83
84
85def fourier(r, f):
86    """Compute the radially 3D FT of a radially symmetric function.
87
88    The frequency grid is also returned.  Some special extrapolations are used
89    to make the results consistent. This function is isometric meaning it can
90    be used to calculate the FT and the inverse FT.  That means inputs can also
91    be omega and f_hat which results in r and f.
92
93    Args:
94        r: Input grid. Must be evenly spaced. Can start at zero or at Δr, but nowhere
95            else.
96        f: Input function. Must have same length as r and correspond to its values.
97
98    Returns:
99        (omega, f_hat): The reciprocal grid and the FT of f.
100
101    """
102    Delta_r = calc_grid_spacing(r)
103    r0_added = False
104    if np.isclose(r[0], Delta_r):
105        r = np.concatenate(([0], r))
106        f = np.concatenate(([0], f))
107        r0_added = True
108    elif np.isclose(r[0], 0.0):
109        pass
110    else:
111        raise Exception('this function can not handle this input')
112    # if the input is even, np.fft.rfftfreq would end with the Nyquist frequency.
113    # But there the imaginary part of the FT is always zero, so we alwas append a zero
114    # to obtain a odd grid.
115    if len(r) % 2 == 0:  # even
116        r = np.concatenate((r, [r[-1]+Delta_r]))
117        f = np.concatenate((f, [0]))
118        n = (len(r)-1)*2-1
119    else:  # odd
120        n = len(r)*2-1
121    omega = np.fft.rfftfreq(n=n, d=Delta_r)
122    with np.errstate(divide='ignore', invalid='ignore'):
123        f_hat = -2 / omega / 1 * Delta_r * np.imag(np.fft.rfft(r * f, n=n))
124    if r0_added:
125        f_hat = f_hat[1:]
126        omega = omega[1:]
127    return omega, f_hat
128
129
130def test_fourier():
131    """Check that Fourier function is invertible."""
132    r = np.linspace(1, 100, 100)
133    f = np.random.random(100)
134    omega, f_hat = fourier(r, f)
135    r_, f_ = fourier(omega, f_hat)
136    assert np.allclose(r, r_)
137    assert np.allclose(f, f_)
138
139
140def gen_fourier_matrix(r, fourier_function):
141    """Make a fourier matrix."""
142    fourier_matrix = np.identity(len(r))
143    for col_index, col in enumerate(fourier_matrix.T):
144        _, fourier_matrix.T[col_index] = fourier_function(r, col)
145    return fourier_matrix
146
147
148def find_nearest_ndx(array, value):
149    """Find index of array where closest to value."""
150    array = np.asarray(array)
151    ndx = (np.abs(array - value)).argmin()
152    return ndx
153
154
155def test_find_nearest_ndx():
156    """Check finding the nearest index."""
157    tests = [
158        ([0, 1, 2, 3], -1, 0),
159        ([0, 1, 2, 3], 4.9, 3),
160        ([0, 1, 2, 3], 1.49, 1),
161        ([0, 1, 2, 3], 1.51, 2),
162    ]
163    for grid, val, ndx in tests:
164        assert find_nearest_ndx(grid, val) == ndx
165
166
167def find_after_cut_off_ndx(array, cut_off):
168    """
169    Find index of array after given cut_off.
170
171    Assumes array is sorted. Used for finding first index after cut_off.
172
173    """
174    array = np.asarray(array)
175    ndx_closest = find_nearest_ndx(array, cut_off)
176    if np.isclose(array[ndx_closest], cut_off):
177        return ndx_closest + 1
178    if array[-1] < cut_off:
179        return len(array)
180    ndx = np.where(array > cut_off)[0][0]
181    return ndx
182
183
184def test_find_after_cut_off_ndx():
185    """Check finding the index after a value."""
186    tests = [
187        ([0, 1, 2, 3], 1.0, 2),
188        ([0, 1, 2, 3], 1.01, 2),
189        ([0, 1, 2, 3], 1.99, 2),
190        ([0, 1, 2, 3], 3.5, 4),
191    ]
192    for grid, val, ndx in tests:
193        print(find_after_cut_off_ndx(grid, val), ndx)
194        assert find_after_cut_off_ndx(grid, val) == ndx
195
196
197def r0_removal(*arrays):
198    """
199    Remove the first element from a list of arrays.
200
201    Only does so if the first array starts with 0.
202
203    """
204    r0_removed = False
205    if np.isclose(arrays[0][0], 0.0):
206        r0_removed = True
207        arrays = tuple(map(lambda a: a[1:], arrays))
208    return r0_removed, arrays
209
210
211def calc_c(r, g_tgt, G_minus_g, n, rho):
212    """Calculate the direct correlation function c(r) from g(r)."""
213    r0_removed, (r, g_tgt, G_minus_g) = r0_removal(r, g_tgt, G_minus_g)
214    # total correlation function h
215    h = g_tgt - 1
216    # FT of h
217    omega, h_hat = fourier(r, h)
218    # direct correlation function c from OZ
219    if n == 1:
220        # single bead case
221        c_hat = h_hat / (1 + rho * h_hat)
222    else:
223        _, G_minus_g_hat = fourier(r, G_minus_g)
224        c_hat = h_hat / ((1 + n * rho * G_minus_g_hat)**2
225                         + (1 + n * rho * G_minus_g_hat) * n * rho * h_hat)
226    _, c = fourier(omega, c_hat)
227    if r0_removed:
228        c = np.concatenate(([np.nan], c))
229    return c
230
231
232def calc_g(r, c, G_minus_g, n, rho):
233    """Calculate the radial distribution function g(r) from c(r)."""
234    r0_removed, (r, c, G_minus_g) = r0_removal(r, c, G_minus_g)
235    omega, c_hat = fourier(r, c)
236    if n == 1:
237        h_hat = c_hat / (1 - rho * c_hat)
238    else:
239        _, G_minus_g_hat = fourier(r, G_minus_g)
240        h_hat = ((c_hat * (1 + n * rho * G_minus_g_hat)**2)
241                 / (1 - n * rho * (1 + n * rho * G_minus_g_hat) * c_hat))
242    _, h = fourier(omega, h_hat)
243    g = h + 1
244    if r0_removed:
245        g = np.concatenate(([np.nan], g))
246    return g
247
248
249def calc_dc_ext(r_short, r_long, c_k_short, g_k_short, g_tgt_short, G_minus_g_short, n,
250                rho):
251    """
252    Calculate Δc_ext with netwon method.
253
254    This term is used in the iterative extrapolation of g(r). Jacobian has an
255    implicit extrapolation of c with zeros on twice its original range.
256
257    """
258    # _k is iteration k
259    # _s is short
260    # _tgt is target
261    r0_removed, (r_short, c_k_short, g_k_short, g_tgt_short,
262                 G_minus_g_short) = r0_removal(r_short, c_k_short, g_k_short,
263                                               g_tgt_short, G_minus_g_short)
264    F = gen_fourier_matrix(r_long, fourier)
265    Finv = np.linalg.inv(F)
266    B = np.concatenate((np.diag(np.ones(len(r_short))),
267                        np.zeros((len(r_long) - len(r_short), len(r_short)))),
268                       axis=0)
269    Binv = np.linalg.pinv(B)
270    J = Binv @ (Finv @ np.diag((1 + n * rho * F @ B @ G_minus_g_short)**2
271                               / (1 - (1 + n * rho * F @ B @ G_minus_g_short)
272                                  * n * rho * F @ B @ c_k_short)**2) @ F) @ B
273    Jinv = np.linalg.pinv(J)
274    Δc = -1 * Jinv @ (g_k_short - g_tgt_short)
275    if r0_removed:
276        Δc = np.concatenate(([0], Δc))
277    return Δc
278
279
280def extrapolate_g(r_short, r_long, g_short, G_minus_g_short, n, rho,
281                  k_max=5, output_c=False, verbose=False):
282    """
283    Extrapolate an RDF with integral equation theory.
284
285    Assumes c = 0 in the extrapolated region. This is not a good aproximation
286    in systems with bonds.
287
288    Args:
289        r_short: Input grid.
290        r_long: Output grid.
291        g_short: Input RDF.
292        G_minus_g_short: Intramolecular distribution.
293        n: Number of equal beads per molecule.
294        rho: Number density of the molecules.
295        k_max: Number of iterations.
296        output_c: Wether to output the final direct correlation function.
297        verbose: Print convergence, dump direct correlation function.
298
299    Returns:
300        The extrapolated RDF and depending on output_c the c.
301
302    """
303    r0_removed, (r_short, r_long, g_short,
304                 G_minus_g_short) = r0_removal(r_short, r_long, g_short,
305                                               G_minus_g_short)
306    ndx_co = len(r_short)
307    G_minus_g_long = np.concatenate((G_minus_g_short,
308                                     np.zeros(len(r_long) - len(r_short))))
309    # starting guess for c
310    c_short_k = [calc_c(r_short, g_short, G_minus_g_short, n, rho)]
311    c_long_k = [np.zeros_like(r_long)]
312    c_long_k[0][:ndx_co] = c_short_k[0]
313    # evaluate starting guess
314    g_long_k = [calc_g(r_long, c_long_k[0], G_minus_g_long, n, rho)]
315    g_short_k = [g_long_k[0][:ndx_co]]
316    # Newton iterations
317    for it in range(1, k_max+1):
318        # update c
319        c_short_k.append(c_short_k[-1]
320                         + calc_dc_ext(r_short, r_long, c_short_k[-1], g_short_k[-1],
321                                       g_short, G_minus_g_short, n, rho))
322        c_long_k.append(np.zeros_like(r_long))
323        c_long_k[-1][:ndx_co] = c_short_k[-1]
324        # new g
325        g_long_k.append(calc_g(r_long, c_long_k[-1], G_minus_g_long, n, rho))
326        g_short_k.append(g_long_k[-1][:ndx_co])
327
328    if r0_removed:
329        for it in range(0, k_max+1):
330            c_short_k[it] = np.concatenate(([np.nan], c_short_k[it]))
331            c_long_k[it] = np.concatenate(([np.nan], c_long_k[it]))
332            g_long_k[it] = np.concatenate(([np.nan], g_long_k[it]))
333            g_short_k[it] = np.concatenate(([np.nan], g_short_k[it]))
334    if verbose:
335        np.savez_compressed('g-extrapolation.npz', c_short_k=c_short_k,
336                            c_long_k=c_long_k, g_long_k=g_long_k, g_short_k=g_short_k)
337    if output_c:
338        return g_long_k[-1], c_long_k[-1]
339    return g_long_k[-1]
340
341
342def calc_slices(r, g_tgt, g_cur, cut_off, verbose=False):
343    """
344    Generate slices for the regions used in the IIE methods.
345
346    There are different regions used:
347                 |       crucial     |                     # regions (slices)
348    |   core     |                 nocore               |
349    0---------core_end------------cut_off-----------r[-1]  # distances
350    0----------ndx_ce--------------ndx_co----------len(r)  # indices
351    nocore equals Δ from Delbary et al.
352    crucial equals Δ' from Delbary et al.
353    note: Vector w of HNCGN is in region crucial, but with one element less,
354    because of the antiderivative operator A0.
355
356    """
357    r, g_tgt, g_cur = map(np.asarray, (r, g_tgt, g_cur))
358    ndx_ce = max((np.where(g_tgt > G_MIN)[0][0],
359                  np.where(g_cur > G_MIN)[0][0]))
360    ndx_co = find_after_cut_off_ndx(r, cut_off)
361    crucial = slice(ndx_ce, ndx_co)
362    nocore = slice(ndx_ce, len(r))
363    if verbose:
364        print("ndx_ce: {}, ({})".format(ndx_ce, r[ndx_ce]))
365        print("ndx_co: {}, ({})".format(ndx_co, cut_off))
366        print("min(r): {}".format(min(r)))
367        print("max(r): {}".format(max(r)))
368        print("len(r): {}".format(len(r)))
369        print("crucial:", crucial.start, crucial.stop, min(r[crucial]), max(r[crucial]))
370        print("nocore:", nocore.start, nocore.stop, min(r[nocore]), max(r[nocore]))
371    return nocore, crucial
372
373
374def calc_U(r, g_tgt, G_minus_g, n, kBT, rho, closure):
375    """
376    Calculate a potential U using integral equation theory.
377
378    Supports symmetric molecules with n equal beads.
379
380    Args:
381        r: Distance grid.
382        g_tgt: Target RDF.
383        G_minus_g: Target intramolecular RDF. Can be an empty array if n == 1.
384        n: Number of equal beads per molecule.
385        kBT: Boltzmann constant times temperature.
386        rho: Number density of the molecules.
387        closure: OZ-equation closure ('hnc' or 'py').
388
389    Returns:
390        The calculated potential.
391
392    """
393    h = g_tgt - 1
394    c = calc_c(r, g_tgt, G_minus_g, n, rho)
395    with np.errstate(divide='ignore', invalid='ignore'):
396        if closure == 'hnc':
397            U = kBT * (-np.log(g_tgt) + h - c)
398        elif closure == 'py':
399            U = kBT * np.log(1 - c/g_tgt)
400    return U
401
402
403def calc_dU_newton(r, g_tgt, g_cur, G_minus_g, n, kBT, rho, cut_off,
404                   closure, newton_mod, cut_jacobian, verbose=False):
405    """
406    Calculate a potential update dU using Newtons method.
407
408    Supports symmetric molecules with n equal beads.
409
410    Args:
411        r: Distance grid.
412        g_tgt: Target RDF.
413        g_cur: Current RDF.
414        G_minus_g: Current intramolecular RDF. Can be an empty array if n == 1.
415        n: Number of equal beads per molecule.
416        kBT: Boltzmann constant times temperature.
417        rho: Number density of the molecules.
418        cut_off: Highest distance for potential update.
419        closure: OZ-equation closure ('hnc' or 'py').
420        newton_mod: Use IBI style update term.
421        cut_jacobian: Wether to cut the Jacobian. If False, then the full Jacobian will
422            be used.
423
424    Returns:
425        The calculated potential update.
426
427    """
428    r0_removed, (r, g_tgt, g_cur, G_minus_g) = r0_removal(r, g_tgt, g_cur, G_minus_g)
429    # calc slices and Delta_r
430    nocore, crucial = calc_slices(r, g_tgt, g_cur, cut_off, verbose=verbose)
431    # difference of rdf to target
432    Delta_g = g_cur - g_tgt
433    # FT of total correlation function 'h'
434    _, h_hat = fourier(r, g_cur - 1)
435    F = gen_fourier_matrix(r, fourier)
436    # dc/dg
437    if n == 1:
438        # single bead case
439        dcdg = np.linalg.inv(F) @ np.diag(1 / (1 + rho * h_hat)**2) @ F
440    else:
441        _, G_minus_g_hat = fourier(r, G_minus_g)
442        dcdg = np.linalg.inv(F) @ np.diag(1 / (1 + n * rho * G_minus_g_hat
443                                               + n * rho * h_hat)**2) @ F
444    # calculate jacobian^-1
445    # in the core where RDF=0, the jacobin will have -np.inf on the diagonal
446    # numpy correctly inverts this to zero
447    if closure == 'hnc':
448        if newton_mod:
449            with np.errstate(divide='ignore', invalid='ignore', under='ignore'):
450                jac_inv1 = kBT * (1 + np.log(g_tgt / g_cur) / Delta_g)
451            jac_inv2 = -kBT * dcdg
452            # Some fixes, because we want to define a jacobian matrix
453            # Unmodified Newton is less awkward
454            # Ensure this is zero, not nan, on the diagonal where Delta_g is zero
455            jac_inv1[Delta_g == 0] = 0
456            # Ensure this is -np.inf, not nan, on the diagonal in the core region
457            jac_inv1[:nocore.start] = -np.inf
458            jac_inv = np.diag(jac_inv1) + jac_inv2
459        else:
460            with np.errstate(divide='ignore', invalid='ignore', under='ignore'):
461                jac_inv = kBT * (np.diag(1 - 1 / g_cur) - dcdg)
462    elif closure == 'py':
463        raise NotImplementedError
464    jac = np.linalg.inv(jac_inv)
465    if cut_jacobian:
466        # cut jacobian and transform back
467        jac_cut = jac[crucial, crucial]
468        jac_inv_cut = np.linalg.inv(jac_cut)
469        dU = - (jac_inv_cut @ Delta_g[crucial])
470    else:
471        with np.errstate(invalid='ignore'):
472            dU = - (jac_inv @ Delta_g)[crucial]
473    if verbose:
474        np.savez_compressed('newton-arrays.npz', jac=jac, jac_inv=jac_inv, dU=dU)
475    # fill core and behind cut-off
476    dU = np.concatenate((np.full(nocore.start, np.nan), dU,
477                         np.full(len(r) - crucial.stop, np.nan)))
478    if r0_removed:
479        dU = np.concatenate(([np.nan], dU))
480    return dU
481
482
483def gauss_newton_constrained(A, C, b, d):
484    """Do a gauss-newton update, but eliminate Cx=d first."""
485    m, n = A.shape
486    p, n_ = C.shape
487    assert n == n_
488    b.shape = (m)
489    d.shape = (p)
490
491    if p > 1:
492        raise Exception("not implemented for p > 1")
493
494    A_elim = A.copy()
495    b_elim = b.copy()
496    for i in range(p):
497        pivot = np.argmax(abs(C[i]))  # find max value of C
498        A_elim = A - (np.ones_like(A) * A[:, pivot][:, np.newaxis]
499                      * C[i] / C[i, pivot])
500        b_elim = b - A[:, pivot] * d[i] / C[i, pivot]
501        A_elim = np.delete(A_elim, pivot, 1)
502    if p == n:
503        print("WARNING: solution of Gauss-Newton update determined fully "
504              "by constraints.")
505        x_elim = []
506    else:
507        x_elim = np.linalg.solve(A_elim.T @ A_elim, A_elim.T @ b_elim)
508    if p == 0:
509        # no constraints
510        x = x_elim
511    else:
512        x_pivot = (d[i] - np.delete(C, pivot, 1) @ x_elim) / C[i, pivot]
513        x = np.insert(x_elim, pivot, x_pivot)
514    return x
515
516
517def test_gauss_newton_constrained():
518    """Check Gauss-Newton with some simple cases."""
519    tests = [
520        (np.identity(10), np.ones((1, 10)), np.ones(10), np.array(2), [0.2]*10),
521        (np.identity(5), np.array([[0, 0, 1, 0, 0]]), np.ones(5), np.array(2),
522         [1, 1, 2, 1, 1]),
523        (np.array([[1, 0], [1, 1]]), np.zeros((0, 2)), np.ones(2), np.array([]),
524         [1.0, 0.0]),
525        (np.array([[1, 0], [1, 1]]), np.array([[0, 1]]), np.ones(2), np.array(0.1),
526         [0.95, 0.1]),
527    ]
528    for A, C, b, d, x in tests:
529        assert np.allclose(x, gauss_newton_constrained(A, C, b, d))
530
531
532def calc_dU_gauss_newton(r, g_tgt, g_cur, G_minus_g, n, kBT, rho,
533                         cut_off, constraints,
534                         verbose=False):
535    """
536    Calculate a potential update dU using the Gauss-Newton method.
537
538    Constraints can be added.
539
540    Args:
541        r: Distance grid.
542        g_tgt: Target RDF.
543        g_cur: Current RDF.
544        kBT: Boltzmann constant times temperature.
545        rho: Number density of the molecules.
546        cut_off: Highest distance for potential update.
547        constraints: List of dicts, which describe physical constraints.
548
549    Returns:
550        The calculated potential update.
551
552    """
553    r0_removed, (r, g_tgt, g_cur, G_minus_g) = r0_removal(r, g_tgt, g_cur, G_minus_g)
554    # calc slices and Delta_r
555    nocore, crucial = calc_slices(r, g_tgt, g_cur, cut_off, verbose=verbose)
556    Delta_r = calc_grid_spacing(r)
557    # pair correlation function 'h'
558    h = g_cur - 1
559    # special Fourier of h
560    _, h_hat = fourier(r, h)
561    # Fourier matrix
562    F = gen_fourier_matrix(r, fourier)
563    # dc/dg
564    if n == 1:
565        # single bead case
566        dcdg = np.linalg.inv(F) @ np.diag(1 / (1 + rho * h_hat)**2) @ F
567    else:
568        _, G_minus_g_hat = fourier(r, G_minus_g)
569        dcdg = np.linalg.inv(F) @ np.diag(1 / (1 + n * rho * G_minus_g_hat
570                                               + n * rho * h_hat)**2) @ F
571    # jacobian^-1 (matrix U in Delbary et al., with respect to potential)
572    with np.errstate(divide='ignore', invalid='ignore', under='ignore'):
573        jac_inv = kBT * (np.diag(1 - 1 / g_cur[nocore]) - dcdg[nocore, nocore])
574    # A0 matrix
575    A0 = Delta_r * np.triu(np.ones((len(r[nocore]), len(r[crucial])-1)), k=0)
576    # Jacobian with respect to force
577    J = np.linalg.inv(jac_inv) @ A0
578    # constraint matrix and vector
579    C = np.zeros((len(constraints), len(r[crucial])-1))
580    d = np.zeros(len(constraints))
581    # build constraint matrix and vector from constraints
582    if verbose:
583        print(constraints)
584    for c, constraint in enumerate(constraints):
585        if constraint['type'] == 'pressure':
586            # current pressure
587            p = constraint['current'] / BAR_PER_MD_PRESSURE
588            # target pressure
589            p_tgt = constraint['target'] / BAR_PER_MD_PRESSURE
590            # g_tgt(r_{i+1})
591            g_tgt_ip1 = g_tgt[crucial][1:]
592            # g_tgt(r_{i})
593            g_tgt_i = g_tgt[crucial][:-1]
594            # r_{i+1}
595            r_ip1 = r[crucial][1:]
596            # r_{i}
597            r_i = r[crucial][:-1]
598            # l vector
599            ll = (g_tgt_i + g_tgt_ip1) * (r_ip1**4 - r_i**4)
600            ll *= 1/12 * np.pi * rho**2
601            # set C row and d element
602            C[c, :] = ll
603            d[c] = p_tgt - p
604        else:
605            raise Exception("not implemented constraint type")
606    # residuum vector
607    res = g_tgt - g_cur
608    # switching to notation of Gander et al. for solving
609    A = J
610    b = res[nocore]
611    w = gauss_newton_constrained(A, C, b, d)
612    # dU
613    dU = A0 @ w
614    # fill core with nans
615    dU = np.concatenate((np.full(nocore.start, np.nan), dU))
616    # dump files
617    if verbose:
618        np.savez_compressed('gauss-newton-arrays.npz', A=A, b=b, C=C, d=d,
619                            jac_inv=jac_inv, A0=A0, J=J)
620    if r0_removed:
621        dU = np.concatenate(([np.nan], dU))
622    return dU
623
624
625def extrapolate_U_constant(dU, dU_flag):
626    """
627    Extrapolate the potential in the core region by a constant value.
628
629    Returns dU. U_{k+1} = U_k + dU is done by VOTCA.
630
631    """
632    dU_extrap = dU.copy()
633    # find first valid dU value
634    first_dU_index = np.where(dU_flag == 'i')[0][0]
635    first_dU = dU[first_dU_index]
636
637    # replace out of range dU values with constant first value
638    dU_extrap = np.where(dU_flag == 'i', dU, first_dU)
639    return dU_extrap
640
641
642def extrapolate_U_power(r, dU, U, g_tgt, g_min, kBT, verbose=False):
643    """
644    Extrapolate the potential in the core region.
645
646    This includes the point, where the RDF becomes larger than g_min or where
647    the new potential is convex.  A power function is used.  The PMF is fitted,
648    not U+dU. The fit is then shifted such that the graph is monotonous.
649
650    Extrapolation is done, because p-HNCGN often has artifacs at
651    the core, especially when pressure is far off.
652
653    Returns dU. U_{k+1} = U_k + dU is done by Votca.
654
655    """
656    # make copy
657    dU_extrap = dU.copy()
658    # calc PMF
659    with np.errstate(divide='ignore', over='ignore'):
660        pmf = np.nan_to_num(-kBT * np.log(g_tgt))
661    # index first minimum
662    ndx_fm = np.where(np.nan_to_num(np.diff(pmf)) > 0)[0][0]
663    # index core end
664    ndx_ce = np.where(g_tgt > g_min)[0][0]
665    if verbose:
666        print('ndx_fm, r_fm', ndx_fm, r[ndx_fm])
667        print('ndx_ce, r_ce', ndx_ce, r[ndx_ce])
668    # fit pmf region
669    fit_region = slice(ndx_ce, ndx_ce + 3)
670    # fit pmf with power function a*x^b
671    pmf_shift = -pmf[ndx_fm] + 0.01
672    fit_x = np.log(r[fit_region])
673    fit_y = np.log(pmf[fit_region] + pmf_shift)
674    b, log_a = np.polyfit(fit_x, fit_y, 1)
675    a = np.exp(log_a)
676    if verbose:
677        print('pmf fit a*x^b. Coefficients a, b:', a, b)
678    with np.errstate(divide='ignore', over='ignore', under='ignore'):
679        pmf_fit = np.nan_to_num(a * r**b - pmf_shift)
680
681    # region to extrapolate
682    ndx_ex1 = ndx_ce + 1
683    ndx_ex2 = np.where(np.nan_to_num(np.diff(np.diff(U + dU))) > 0)[0][0]
684    ndx_ex = max(ndx_ex1, ndx_ex2)
685    if verbose:
686        print('extrapolate up to:', r[ndx_ex])
687    # extrapolate
688    U_extrap = U + dU
689    U_extrap[:ndx_ex] = pmf_fit[:ndx_ex] + (U_extrap[ndx_ex] - pmf_fit[ndx_ex])
690    dU_extrap = U_extrap - U
691    return dU_extrap
692
693
694def shift_U_cutoff_zero(dU, r, U, cut_off):
695    """Make potential zero at and beyond cut-off."""
696    dU_shift = dU.copy()
697    # shift dU to be zero at cut_off and beyond
698    ndx_co = find_after_cut_off_ndx(r, cut_off)
699    U_before_cut_off = U[ndx_co-1] + dU[ndx_co-1]
700    dU_shift -= U_before_cut_off
701    dU_shift[ndx_co:] = -1 * U[ndx_co:]
702    return dU_shift
703
704
705def fix_U_near_cut_off_full(r, U, cut_off):
706    """
707    Modify the potential close to the cut-off to make it more smooth.
708
709    The derivative of the potential between the last two points will be equal
710    to the derivative between the two points before. The original last two
711    points of dU are therefore ignored.
712
713    This also helps agains an artifact of p-HNCGN, where the last value of dU
714    is a spike.
715
716    """
717    U_fixed = U.copy()
718    ndx_co = find_after_cut_off_ndx(r, cut_off)
719    second_last_deriv = U[ndx_co-2] - U[ndx_co-3]
720    shift = -1.0 * second_last_deriv - U[ndx_co-2]
721    # modify up to second last value
722    U_fixed[:ndx_co-1] += shift
723    return U_fixed
724
725
726def upd_flag_g_smaller_g_min(flag, g, g_min):
727    """
728    Update the flag to 'o' for small RDF.
729
730    Take a flag list, copy it, and set the flag to 'o'utside if g is smaller
731    g_min.
732
733    """
734    flag_new = flag.copy()
735    for i, gg in enumerate(g):
736        if gg < g_min:
737            flag_new[i] = 'o'
738    return flag_new
739
740
741def upd_flag_by_other_flag(flag, other_flag):
742    """
743    Update a flag array by another flag array.
744
745    Take a flag list, copy it, and set the flag to 'o'utside where some
746    other flag list is 'o'.
747
748    """
749    flag_new = flag.copy()
750    for i, of in enumerate(other_flag):
751        if of == 'o':
752            flag_new[i] = 'o'
753    return flag_new
754
755
756def upd_U_const_first_flag_i(U, flag):
757    """
758    Extrapolate potential with constant values where flag is 'o'.
759
760    Take a potential list, copy it, and set the potential at places where
761    flag is 'o' to the first value where flag is 'i'.
762
763    """
764    U_new = U.copy()
765    # find first valid U value
766    first_U_index = np.where(flag == 'i')[0][0]
767    first_U = U_new[first_U_index]
768    # replace out of range U values
769    U_new = np.where(flag == 'i', U_new, first_U)
770    return U_new
771
772
773def upd_U_zero_beyond_cut_off(r, U, cut_off):
774    """Shift potential to be zero at cut_off and beyond."""
775    U_new = U.copy()
776    index_cut_off = find_nearest_ndx(r, cut_off)
777    U_cut_off = U_new[index_cut_off]
778    U_new -= U_cut_off
779    U_new[index_cut_off:] = 0
780    return U_new
781
782
783def get_args():
784    """Define and parse command line arguments."""
785    description = """
786    Calculate U or ΔU with Integral Equations.
787    """
788    parser = argparse.ArgumentParser(description=description)
789    # subparsers
790    subparsers = parser.add_subparsers(dest='subcommand')
791    parser_pot_guess = subparsers.add_parser(
792        'potential_guess',
793        help='potential guess from inverting integral equation')
794    parser_newton = subparsers.add_parser(
795        'newton',
796        help='potential update using Newton method')
797    parser_newton_mod = subparsers.add_parser(
798        'newton-mod',
799        help='potential update using a modified Newton method')
800    parser_gauss_newton = subparsers.add_parser(
801        'gauss-newton',
802        help='potential update using Gauss-Newton method')
803
804    # all subparsers
805    for pars in [parser_pot_guess, parser_newton, parser_newton_mod,
806                 parser_gauss_newton]:
807        pars.add_argument('-v', '--verbose', dest='verbose',
808                          help='save some intermeditary results',
809                          action='store_const', const=True, default=False)
810        pars.add_argument('--closure', type=str, choices=['hnc', 'py'],
811                          required=True,
812                          help='Closure equation to use for the OZ equation')
813        pars.add_argument('--g-tgt', type=argparse.FileType('r'),
814                          nargs='+', required=True,
815                          metavar=('X-X.dist.tgt', 'X-Y.dist.tgt'),
816                          help='RDF target files')
817        pars.add_argument('--kBT', type=float, required=True, help='')
818        pars.add_argument('--densities', type=float,
819                          nargs='+', required=True,
820                          metavar=('rho_X', 'rho_Y'),
821                          help='list of number densities of beads')
822        pars.add_argument('--n-intra', type=int,
823                          nargs='+', required=True,
824                          metavar=('n_X', 'n_Y'),
825                          help='number of beads per molecule')
826        # todo: this might need to be split up for multicomponent
827        pars.add_argument('--cut-off', type=float, required=True,
828                          help='cutt-off (co) of potential')
829        pars.add_argument('--U-out', type=argparse.FileType('wb'),
830                          nargs='+', required=True,
831                          metavar=('X-X.dpot.new', 'X-Y.dpot.new'),
832                          help='U or ΔU output files')
833        pars.add_argument('--g-extrap-factor', type=float, required=False,
834                          help='factor by which to extrapolate RDFs')
835    # intial potential guess subparsers
836    for pars in [parser_pot_guess]:
837        pars.add_argument('--G-tgt', type=argparse.FileType('r'),
838                          nargs='+', required=False,
839                          metavar=('X-X-incl.dist.tgt', 'X-Y-incl.dist.tgt'),
840                          help='RDF (including intramolecular) target files')
841    # update potential subparsers
842    for pars in [parser_newton, parser_newton_mod, parser_gauss_newton]:
843        pars.add_argument('--g-cur', type=argparse.FileType('r'),
844                          nargs='+', required=True,
845                          metavar=('X-X.dist.cur', 'X-Y.dist.cur'),
846                          help='RDF current files')
847        pars.add_argument('--G-cur', type=argparse.FileType('r'),
848                          nargs='+', required=False,
849                          metavar=('X-X-incl.dist.cur', 'X-Y-incl.dist.cur'),
850                          help='RDF (including intramolecular) current files')
851        pars.add_argument('--U-cur', type=argparse.FileType('r'),
852                          nargs='+', required=True,
853                          metavar=('X-X.pot.cur', 'X-Y.pot.cur'),
854                          help='potential current files')
855    # Newton's method only options
856    for pars in [parser_newton, parser_newton_mod]:
857        pars.add_argument('--cut-jacobian', dest='cut_jacobian', action='store_true',
858                          help=('Cut the top-left part of the Jacobian before'
859                                + ' multiplying with Δg.'))
860        pars.set_defaults(cut_jacobian=False)
861    # HNCGN only options
862    parser_gauss_newton.add_argument('--pressure-constraint',
863                                     dest='pressure_constraint',
864                                     type=str, default=None)
865    parser_gauss_newton.add_argument('--extrap-near-core',
866                                     dest='extrap_near_core',
867                                     type=str, choices=['none', 'constant',
868                                                        'power'])
869    parser_gauss_newton.add_argument('--fix-near-cut-off',
870                                     dest='fix_near_cut_off',
871                                     type=str, choices=['none', 'full-deriv'])
872
873    args = parser.parse_args()
874
875    # check for subcommand
876    if args.subcommand is None:
877        parser.print_help()
878        raise Exception("subcommand needed")
879
880    # close writable files directly due to weird bug, where np.savetxt would
881    # write empty file, use filename later.
882    # I should report it, but on the other side, argparse opened files do not
883    # get closed at any point, so this is better
884    for f in args.U_out:
885        f.close()
886    args.U_out = [f.name for f in args.U_out]
887
888    return args
889
890
891def process_input(args):
892    """Process arguments and perform some checks."""
893    # infering variables from input
894    n_beads = len(args.densities)
895    # nr. of interactions equals nr. of elements in triangular matrix incl.
896    # the diagonal
897    n_interactions = (n_beads * (n_beads + 1)) // 2
898
899    # some checks on input
900    # test for same number of interactions
901    file_arguments_names = ['g_tgt', 'G_tgt', 'g_cur', 'G_cur', 'U_cur',
902                            'U_out']
903    file_arguments = [(argname, vars(args)[argname])
904                      for argname in file_arguments_names
905                      if vars(args).get(argname) is not None]
906    file_arguments_wrong = [(argname, flist)
907                            for argname, flist in file_arguments
908                            if len(flist) != n_interactions]
909    for argname, flist in file_arguments_wrong:
910        raise Exception("""N = {} densities provided, therefore
911                        there should be (N * (N + 1)) // 2 = {}
912                        files for {}, but {} was
913                        provided""".format(n_beads, n_interactions, argname,
914                                           [f.name for f in flist]))
915    # multicomponent not implemented
916    if any((len(files) != 1 for files in [args.g_tgt, args.densities])):
917        raise Exception('not implemented for multiple components!')
918    if len(args.n_intra) > 1:
919        raise Exception('not implemented for multiple components!')
920    # todo: if n_intra == 1, check if G close to g at G[-1]
921    # todo for multicomponent: check order of input and output by filename
922    # todo for multicomponent: allow not existing X-Y? particles would overlap
923    # todo for multicomponent: do not allow same bead on different moleculetypes
924
925    # load input arrays
926    input_arrays = {}  # input_arrays['g_tgt'][0]['x']
927    input_arguments_names = ['g_tgt', 'G_tgt', 'g_cur', 'G_cur', 'U_cur']
928    input_arguments = [(argname, vars(args)[argname])
929                       for argname in input_arguments_names
930                       if vars(args).get(argname) is not None]
931    for argname, flist in input_arguments:
932        input_arrays[argname] = []
933        for f in flist:
934            x, y, flag = readin_table(f)
935            input_arrays[argname].append({'x': x, 'y': y, 'flag': flag})
936
937    # todo: compare grids of all
938    r = input_arrays['g_tgt'][0]['x']
939    # compare grids of G_cur and G_tgt with g_tgt in smart way
940    # calculate G_minus_g
941    if 'G_tgt' in input_arrays:
942        g_name = 'g_tgt'
943        G_name = 'G_tgt'
944    elif 'G_cur' in input_arrays:
945        g_name = 'g_cur'
946        G_name = 'G_cur'
947    else:
948        print("No intramolecular correlations provided, assuming there are \
949              none.")
950        # this will result in zero arrays in input_arrays['G_minus_g'] in the
951        # below loop
952        g_name = 'g_tgt'
953        G_name = 'g_tgt'
954    input_arrays['G_minus_g'] = []
955    for g_dict, G_dict in zip(input_arrays[g_name], input_arrays[G_name]):
956        G_minus_g = np.zeros_like(g_dict['y'])
957        G_minus_g[:G_dict['y'].shape[0]] = (G_dict['y']
958                                            - g_dict['y']
959                                            [:G_dict['y'].shape[0]])
960        input_arrays['G_minus_g'].append({'y': G_minus_g})
961    return r, input_arrays
962
963
964def potential_guess(r, input_arrays, args):
965    """Do the potential guess."""
966    U1 = calc_U(r,
967                input_arrays['g_tgt'][0]['y'],
968                input_arrays['G_minus_g'][0]['y'],
969                args.n_intra[0], args.kBT, args.densities[0],
970                args.closure)
971    U_flag1 = np.array(['i'] * len(r))
972    U_flag2 = upd_flag_g_smaller_g_min(U_flag1,
973                                       input_arrays['g_tgt'][0]['y'],
974                                       G_MIN)
975    U2 = upd_U_const_first_flag_i(U1, U_flag2)
976    U3 = upd_U_zero_beyond_cut_off(r, U2, args.cut_off)
977    comment = "created by: {}".format(" ".join(sys.argv))
978    saveto_table(args.U_out[0], r, U3, U_flag2, comment)
979
980
981def newton_update(r, input_arrays, args):
982    """Do the Newton update."""
983    # further tests on input
984    # if g_extrap_factor != 1.0 then it should hold that cut-off == r[-1]
985    # this is basically HNCN (ex) vs HNCN (ed)
986    if not np.isclose(args.g_extrap_factor, 1.0):
987        if not np.isclose(args.cut_off, r[-1]):
988            raise Exception('if g_extrap_factor is not equal 1.0, the cut-off '
989                            'needs to be the same as the range of all RDFs for '
990                            'Newton method.')
991        # extrapolate RDFs
992        if args.g_extrap_factor < 1:
993            raise Exception('g_extrap_factor needs to be larger than 1.0')
994        Delta_r = calc_grid_spacing(r)
995        r_short = np.copy(r)
996        r = np.arange(r[0], r[-1] * args.g_extrap_factor, Delta_r)
997        g_tgt = extrapolate_g(r_short, r, input_arrays['g_tgt'][0]['y'],
998                              input_arrays['G_minus_g'][0]['y'],
999                              args.n_intra[0], args.densities[0],
1000                              verbose=args.verbose)
1001        g_cur = extrapolate_g(r_short, r, input_arrays['g_cur'][0]['y'],
1002                              input_arrays['G_minus_g'][0]['y'],
1003                              args.n_intra[0], args.densities[0],
1004                              verbose=args.verbose)
1005        if args.verbose:
1006            np.savez_compressed('extrapolated.npz', r=r, g_tgt=g_tgt, g_cur=g_cur)
1007        G_minus_g = np.concatenate((input_arrays['G_minus_g'][0]['y'],
1008                                    np.zeros(len(r)-len(r_short))))
1009        cut_off = r_short[-1]
1010    else:
1011        g_tgt = input_arrays['g_tgt'][0]['y']
1012        g_cur = input_arrays['g_cur'][0]['y']
1013        G_minus_g = input_arrays['G_minus_g'][0]['y']
1014        cut_off = args.cut_off
1015    dU1 = calc_dU_newton(r, g_tgt, g_cur, G_minus_g,
1016                         args.n_intra[0], args.kBT,
1017                         args.densities[0], cut_off, args.closure,
1018                         args.subcommand == 'newton-mod',
1019                         args.cut_jacobian,
1020                         verbose=args.verbose)
1021    # go back to short r range if we extrapolated
1022    if not np.isclose(args.g_extrap_factor, 1.0):
1023        dU1 = dU1[:len(r_short)]
1024        r = r_short
1025    dU_flag1 = np.array(['i'] * len(r))
1026    dU_flag2 = upd_flag_g_smaller_g_min(dU_flag1,
1027                                        input_arrays['g_tgt'][0]['y'],
1028                                        G_MIN)
1029    dU_flag3 = upd_flag_g_smaller_g_min(dU_flag2,
1030                                        input_arrays['g_cur'][0]['y'],
1031                                        G_MIN)
1032    dU_flag4 = upd_flag_by_other_flag(dU_flag3,
1033                                      input_arrays['U_cur'][0]['flag'])
1034
1035    dU2 = upd_U_const_first_flag_i(dU1, dU_flag4)
1036    U_temp = upd_U_zero_beyond_cut_off(r,
1037                                       dU2 + input_arrays['U_cur'][0]['y'],
1038                                       cut_off)
1039    dU3 = U_temp - input_arrays['U_cur'][0]['y']
1040    comment = "created by: {}".format(" ".join(sys.argv))
1041    saveto_table(args.U_out[0], r, dU3, dU_flag4, comment)
1042
1043
1044def gauss_newton_update(r, input_arrays, args):
1045    """Do the Gauss-Newton update."""
1046    # parse constraints
1047    constraints = []
1048    if args.pressure_constraint is not None:
1049        p_target = float(args.pressure_constraint.split(',')[0])
1050        p_current = float(args.pressure_constraint.split(',')[1])
1051        constraints.append({'type': 'pressure', 'target': p_target,
1052                            'current': p_current})
1053
1054    # calc dU_pure
1055    dU_pure = calc_dU_gauss_newton(r,
1056                                   input_arrays['g_tgt'][0]['y'],
1057                                   input_arrays['g_cur'][0]['y'],
1058                                   input_arrays['G_minus_g'][0]['y'],
1059                                   args.n_intra[0], args.kBT,
1060                                   args.densities[0], args.cut_off,
1061                                   constraints, verbose=args.verbose)
1062
1063    # set dU_flag to 'o' inside the core
1064    dU_flag = np.where(np.isnan(dU_pure), 'o', 'i')
1065
1066    # select extrapolation
1067    if args.extrap_near_core == 'none':
1068        dU_extrap = np.nan_to_num(dU_pure)
1069    elif args.extrap_near_core == 'constant':
1070        dU_extrap = extrapolate_U_constant(dU_pure, dU_flag)
1071    elif args.extrap_near_core == 'power':
1072        dU_extrap = extrapolate_U_power(r, dU_pure,
1073                                        input_arrays['U_cur'][0]['y'],
1074                                        input_arrays['g_tgt'][0]['y'],
1075                                        G_MIN_EXTRAPOLATE, args.kBT,
1076                                        verbose=args.verbose)
1077    else:
1078        raise Exception("unknown extrapolation scheme for inside and near "
1079                        "core region: " + args.extrap_near_core)
1080    # shifts to correct potential after cut-off
1081    dU_shift = shift_U_cutoff_zero(dU_extrap, r,
1082                                   input_arrays['U_cur'][0]['y'],
1083                                   args.cut_off)
1084    # shifts to correct potential near cut-off
1085    if args.fix_near_cut_off == 'none':
1086        dU = dU_shift.copy()
1087    elif args.fix_near_cut_off == 'full-deriv':
1088        U_new = input_arrays['U_cur'][0]['y'] + dU_shift
1089        U_new = fix_U_near_cut_off_full(r, U_new, args.cut_off)
1090        dU = U_new - input_arrays['U_cur'][0]['y']
1091    else:
1092        raise Exception("unknown fix scheme for near cut-off: "
1093                        + args.fix_near_cut_off)
1094
1095    if args.verbose:
1096        np.savez_compressed('hncgn-dU.npz', r=r, dU_pure=dU_pure,
1097                            dU_extrap=dU_extrap, dU_shift=dU_shift)
1098    comment = "created by: {}".format(" ".join(sys.argv))
1099    saveto_table(args.U_out[0], r, dU, dU_flag, comment)
1100
1101
1102def main():
1103    # get command line arguments
1104    args = get_args()
1105
1106    # process and prepare input
1107    r, input_arrays = process_input(args)
1108
1109    # guess potential from distribution
1110    if args.subcommand in ['potential_guess']:
1111        potential_guess(r, input_arrays, args)
1112
1113    # newton update
1114    if args.subcommand in ['newton', 'newton-mod']:
1115        newton_update(r, input_arrays, args)
1116
1117    # gauss-newton update
1118    if args.subcommand in ['gauss-newton']:
1119        gauss_newton_update(r, input_arrays, args)
1120
1121
1122if __name__ == '__main__':
1123    main()
1124