1#!/usr/local/bin/python3.8 2"""Multi purpose script for Iterative Integral Equation methods.""" 3# 4# Copyright 2009-2021 The VOTCA Development Team (http://www.votca.org) 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17# 18# Symbols: 19# g: RDF 20# U: potential 21# dU: potential update (U_{k+1} - U_k) 22# 23# suffixes: 24# _cur: current (of step k if currently doing iteration k) 25# _tgt: target 26# _ce: core_end (where RDF becomes > 0) 27# _co: cut_off 28# _sc: single_component 29# 30# prefixes: 31# ndx_: index 32 33 34import argparse 35import sys 36try: 37 import numpy as np 38except ImportError: 39 print("Numpy is not installed, but needed for the iterative integral equation " 40 "methods.") 41 raise 42if not sys.version_info >= (3, 5): 43 raise Exception("This script needs Python 3.5+.") 44 45 46BAR_PER_MD_PRESSURE = 16.6053904 47G_MIN = 1e-10 48G_MIN_EXTRAPOLATE = 1e-1 49np.seterr(all='raise') 50 51 52def readin_table(filename): 53 """Read in votca table.""" 54 table_dtype = {'names': ('x', 'y', 'y_flag'), 55 'formats': ('f', 'f', 'U2')} 56 x, y, y_flag = np.loadtxt(filename, dtype=table_dtype, comments=['#', '@'], 57 unpack=True) 58 return x, y, y_flag 59 60 61def saveto_table(filename, x, y, y_flag, comment=""): 62 """Save votca table.""" 63 data = np.zeros((len(x),), dtype='f, f, U2') 64 data['f0'] = x 65 data['f1'] = y 66 data['f2'] = y_flag 67 np.savetxt(filename, data, header=comment, fmt=['%e', '%e', '%s']) 68 69 70def calc_grid_spacing(grid, relative_tolerance=0.01): 71 """Returns the spacing of an equidistant 1D grid.""" 72 diffs = np.diff(grid) 73 if abs((max(diffs) - min(diffs)) / max(diffs)) > relative_tolerance: 74 raise Exception('the grid is not equidistant') 75 return np.mean(diffs) 76 77 78def test_calc_grid_spacing(): 79 """Check spacing of some grid.""" 80 grid = np.linspace(0, 2 * np.pi, num=361) 81 grid_spacing = calc_grid_spacing(grid) 82 assert np.allclose(grid_spacing, np.pi/180) 83 84 85def fourier(r, f): 86 """Compute the radially 3D FT of a radially symmetric function. 87 88 The frequency grid is also returned. Some special extrapolations are used 89 to make the results consistent. This function is isometric meaning it can 90 be used to calculate the FT and the inverse FT. That means inputs can also 91 be omega and f_hat which results in r and f. 92 93 Args: 94 r: Input grid. Must be evenly spaced. Can start at zero or at Δr, but nowhere 95 else. 96 f: Input function. Must have same length as r and correspond to its values. 97 98 Returns: 99 (omega, f_hat): The reciprocal grid and the FT of f. 100 101 """ 102 Delta_r = calc_grid_spacing(r) 103 r0_added = False 104 if np.isclose(r[0], Delta_r): 105 r = np.concatenate(([0], r)) 106 f = np.concatenate(([0], f)) 107 r0_added = True 108 elif np.isclose(r[0], 0.0): 109 pass 110 else: 111 raise Exception('this function can not handle this input') 112 # if the input is even, np.fft.rfftfreq would end with the Nyquist frequency. 113 # But there the imaginary part of the FT is always zero, so we alwas append a zero 114 # to obtain a odd grid. 115 if len(r) % 2 == 0: # even 116 r = np.concatenate((r, [r[-1]+Delta_r])) 117 f = np.concatenate((f, [0])) 118 n = (len(r)-1)*2-1 119 else: # odd 120 n = len(r)*2-1 121 omega = np.fft.rfftfreq(n=n, d=Delta_r) 122 with np.errstate(divide='ignore', invalid='ignore'): 123 f_hat = -2 / omega / 1 * Delta_r * np.imag(np.fft.rfft(r * f, n=n)) 124 if r0_added: 125 f_hat = f_hat[1:] 126 omega = omega[1:] 127 return omega, f_hat 128 129 130def test_fourier(): 131 """Check that Fourier function is invertible.""" 132 r = np.linspace(1, 100, 100) 133 f = np.random.random(100) 134 omega, f_hat = fourier(r, f) 135 r_, f_ = fourier(omega, f_hat) 136 assert np.allclose(r, r_) 137 assert np.allclose(f, f_) 138 139 140def gen_fourier_matrix(r, fourier_function): 141 """Make a fourier matrix.""" 142 fourier_matrix = np.identity(len(r)) 143 for col_index, col in enumerate(fourier_matrix.T): 144 _, fourier_matrix.T[col_index] = fourier_function(r, col) 145 return fourier_matrix 146 147 148def find_nearest_ndx(array, value): 149 """Find index of array where closest to value.""" 150 array = np.asarray(array) 151 ndx = (np.abs(array - value)).argmin() 152 return ndx 153 154 155def test_find_nearest_ndx(): 156 """Check finding the nearest index.""" 157 tests = [ 158 ([0, 1, 2, 3], -1, 0), 159 ([0, 1, 2, 3], 4.9, 3), 160 ([0, 1, 2, 3], 1.49, 1), 161 ([0, 1, 2, 3], 1.51, 2), 162 ] 163 for grid, val, ndx in tests: 164 assert find_nearest_ndx(grid, val) == ndx 165 166 167def find_after_cut_off_ndx(array, cut_off): 168 """ 169 Find index of array after given cut_off. 170 171 Assumes array is sorted. Used for finding first index after cut_off. 172 173 """ 174 array = np.asarray(array) 175 ndx_closest = find_nearest_ndx(array, cut_off) 176 if np.isclose(array[ndx_closest], cut_off): 177 return ndx_closest + 1 178 if array[-1] < cut_off: 179 return len(array) 180 ndx = np.where(array > cut_off)[0][0] 181 return ndx 182 183 184def test_find_after_cut_off_ndx(): 185 """Check finding the index after a value.""" 186 tests = [ 187 ([0, 1, 2, 3], 1.0, 2), 188 ([0, 1, 2, 3], 1.01, 2), 189 ([0, 1, 2, 3], 1.99, 2), 190 ([0, 1, 2, 3], 3.5, 4), 191 ] 192 for grid, val, ndx in tests: 193 print(find_after_cut_off_ndx(grid, val), ndx) 194 assert find_after_cut_off_ndx(grid, val) == ndx 195 196 197def r0_removal(*arrays): 198 """ 199 Remove the first element from a list of arrays. 200 201 Only does so if the first array starts with 0. 202 203 """ 204 r0_removed = False 205 if np.isclose(arrays[0][0], 0.0): 206 r0_removed = True 207 arrays = tuple(map(lambda a: a[1:], arrays)) 208 return r0_removed, arrays 209 210 211def calc_c(r, g_tgt, G_minus_g, n, rho): 212 """Calculate the direct correlation function c(r) from g(r).""" 213 r0_removed, (r, g_tgt, G_minus_g) = r0_removal(r, g_tgt, G_minus_g) 214 # total correlation function h 215 h = g_tgt - 1 216 # FT of h 217 omega, h_hat = fourier(r, h) 218 # direct correlation function c from OZ 219 if n == 1: 220 # single bead case 221 c_hat = h_hat / (1 + rho * h_hat) 222 else: 223 _, G_minus_g_hat = fourier(r, G_minus_g) 224 c_hat = h_hat / ((1 + n * rho * G_minus_g_hat)**2 225 + (1 + n * rho * G_minus_g_hat) * n * rho * h_hat) 226 _, c = fourier(omega, c_hat) 227 if r0_removed: 228 c = np.concatenate(([np.nan], c)) 229 return c 230 231 232def calc_g(r, c, G_minus_g, n, rho): 233 """Calculate the radial distribution function g(r) from c(r).""" 234 r0_removed, (r, c, G_minus_g) = r0_removal(r, c, G_minus_g) 235 omega, c_hat = fourier(r, c) 236 if n == 1: 237 h_hat = c_hat / (1 - rho * c_hat) 238 else: 239 _, G_minus_g_hat = fourier(r, G_minus_g) 240 h_hat = ((c_hat * (1 + n * rho * G_minus_g_hat)**2) 241 / (1 - n * rho * (1 + n * rho * G_minus_g_hat) * c_hat)) 242 _, h = fourier(omega, h_hat) 243 g = h + 1 244 if r0_removed: 245 g = np.concatenate(([np.nan], g)) 246 return g 247 248 249def calc_dc_ext(r_short, r_long, c_k_short, g_k_short, g_tgt_short, G_minus_g_short, n, 250 rho): 251 """ 252 Calculate Δc_ext with netwon method. 253 254 This term is used in the iterative extrapolation of g(r). Jacobian has an 255 implicit extrapolation of c with zeros on twice its original range. 256 257 """ 258 # _k is iteration k 259 # _s is short 260 # _tgt is target 261 r0_removed, (r_short, c_k_short, g_k_short, g_tgt_short, 262 G_minus_g_short) = r0_removal(r_short, c_k_short, g_k_short, 263 g_tgt_short, G_minus_g_short) 264 F = gen_fourier_matrix(r_long, fourier) 265 Finv = np.linalg.inv(F) 266 B = np.concatenate((np.diag(np.ones(len(r_short))), 267 np.zeros((len(r_long) - len(r_short), len(r_short)))), 268 axis=0) 269 Binv = np.linalg.pinv(B) 270 J = Binv @ (Finv @ np.diag((1 + n * rho * F @ B @ G_minus_g_short)**2 271 / (1 - (1 + n * rho * F @ B @ G_minus_g_short) 272 * n * rho * F @ B @ c_k_short)**2) @ F) @ B 273 Jinv = np.linalg.pinv(J) 274 Δc = -1 * Jinv @ (g_k_short - g_tgt_short) 275 if r0_removed: 276 Δc = np.concatenate(([0], Δc)) 277 return Δc 278 279 280def extrapolate_g(r_short, r_long, g_short, G_minus_g_short, n, rho, 281 k_max=5, output_c=False, verbose=False): 282 """ 283 Extrapolate an RDF with integral equation theory. 284 285 Assumes c = 0 in the extrapolated region. This is not a good aproximation 286 in systems with bonds. 287 288 Args: 289 r_short: Input grid. 290 r_long: Output grid. 291 g_short: Input RDF. 292 G_minus_g_short: Intramolecular distribution. 293 n: Number of equal beads per molecule. 294 rho: Number density of the molecules. 295 k_max: Number of iterations. 296 output_c: Wether to output the final direct correlation function. 297 verbose: Print convergence, dump direct correlation function. 298 299 Returns: 300 The extrapolated RDF and depending on output_c the c. 301 302 """ 303 r0_removed, (r_short, r_long, g_short, 304 G_minus_g_short) = r0_removal(r_short, r_long, g_short, 305 G_minus_g_short) 306 ndx_co = len(r_short) 307 G_minus_g_long = np.concatenate((G_minus_g_short, 308 np.zeros(len(r_long) - len(r_short)))) 309 # starting guess for c 310 c_short_k = [calc_c(r_short, g_short, G_minus_g_short, n, rho)] 311 c_long_k = [np.zeros_like(r_long)] 312 c_long_k[0][:ndx_co] = c_short_k[0] 313 # evaluate starting guess 314 g_long_k = [calc_g(r_long, c_long_k[0], G_minus_g_long, n, rho)] 315 g_short_k = [g_long_k[0][:ndx_co]] 316 # Newton iterations 317 for it in range(1, k_max+1): 318 # update c 319 c_short_k.append(c_short_k[-1] 320 + calc_dc_ext(r_short, r_long, c_short_k[-1], g_short_k[-1], 321 g_short, G_minus_g_short, n, rho)) 322 c_long_k.append(np.zeros_like(r_long)) 323 c_long_k[-1][:ndx_co] = c_short_k[-1] 324 # new g 325 g_long_k.append(calc_g(r_long, c_long_k[-1], G_minus_g_long, n, rho)) 326 g_short_k.append(g_long_k[-1][:ndx_co]) 327 328 if r0_removed: 329 for it in range(0, k_max+1): 330 c_short_k[it] = np.concatenate(([np.nan], c_short_k[it])) 331 c_long_k[it] = np.concatenate(([np.nan], c_long_k[it])) 332 g_long_k[it] = np.concatenate(([np.nan], g_long_k[it])) 333 g_short_k[it] = np.concatenate(([np.nan], g_short_k[it])) 334 if verbose: 335 np.savez_compressed('g-extrapolation.npz', c_short_k=c_short_k, 336 c_long_k=c_long_k, g_long_k=g_long_k, g_short_k=g_short_k) 337 if output_c: 338 return g_long_k[-1], c_long_k[-1] 339 return g_long_k[-1] 340 341 342def calc_slices(r, g_tgt, g_cur, cut_off, verbose=False): 343 """ 344 Generate slices for the regions used in the IIE methods. 345 346 There are different regions used: 347 | crucial | # regions (slices) 348 | core | nocore | 349 0---------core_end------------cut_off-----------r[-1] # distances 350 0----------ndx_ce--------------ndx_co----------len(r) # indices 351 nocore equals Δ from Delbary et al. 352 crucial equals Δ' from Delbary et al. 353 note: Vector w of HNCGN is in region crucial, but with one element less, 354 because of the antiderivative operator A0. 355 356 """ 357 r, g_tgt, g_cur = map(np.asarray, (r, g_tgt, g_cur)) 358 ndx_ce = max((np.where(g_tgt > G_MIN)[0][0], 359 np.where(g_cur > G_MIN)[0][0])) 360 ndx_co = find_after_cut_off_ndx(r, cut_off) 361 crucial = slice(ndx_ce, ndx_co) 362 nocore = slice(ndx_ce, len(r)) 363 if verbose: 364 print("ndx_ce: {}, ({})".format(ndx_ce, r[ndx_ce])) 365 print("ndx_co: {}, ({})".format(ndx_co, cut_off)) 366 print("min(r): {}".format(min(r))) 367 print("max(r): {}".format(max(r))) 368 print("len(r): {}".format(len(r))) 369 print("crucial:", crucial.start, crucial.stop, min(r[crucial]), max(r[crucial])) 370 print("nocore:", nocore.start, nocore.stop, min(r[nocore]), max(r[nocore])) 371 return nocore, crucial 372 373 374def calc_U(r, g_tgt, G_minus_g, n, kBT, rho, closure): 375 """ 376 Calculate a potential U using integral equation theory. 377 378 Supports symmetric molecules with n equal beads. 379 380 Args: 381 r: Distance grid. 382 g_tgt: Target RDF. 383 G_minus_g: Target intramolecular RDF. Can be an empty array if n == 1. 384 n: Number of equal beads per molecule. 385 kBT: Boltzmann constant times temperature. 386 rho: Number density of the molecules. 387 closure: OZ-equation closure ('hnc' or 'py'). 388 389 Returns: 390 The calculated potential. 391 392 """ 393 h = g_tgt - 1 394 c = calc_c(r, g_tgt, G_minus_g, n, rho) 395 with np.errstate(divide='ignore', invalid='ignore'): 396 if closure == 'hnc': 397 U = kBT * (-np.log(g_tgt) + h - c) 398 elif closure == 'py': 399 U = kBT * np.log(1 - c/g_tgt) 400 return U 401 402 403def calc_dU_newton(r, g_tgt, g_cur, G_minus_g, n, kBT, rho, cut_off, 404 closure, newton_mod, cut_jacobian, verbose=False): 405 """ 406 Calculate a potential update dU using Newtons method. 407 408 Supports symmetric molecules with n equal beads. 409 410 Args: 411 r: Distance grid. 412 g_tgt: Target RDF. 413 g_cur: Current RDF. 414 G_minus_g: Current intramolecular RDF. Can be an empty array if n == 1. 415 n: Number of equal beads per molecule. 416 kBT: Boltzmann constant times temperature. 417 rho: Number density of the molecules. 418 cut_off: Highest distance for potential update. 419 closure: OZ-equation closure ('hnc' or 'py'). 420 newton_mod: Use IBI style update term. 421 cut_jacobian: Wether to cut the Jacobian. If False, then the full Jacobian will 422 be used. 423 424 Returns: 425 The calculated potential update. 426 427 """ 428 r0_removed, (r, g_tgt, g_cur, G_minus_g) = r0_removal(r, g_tgt, g_cur, G_minus_g) 429 # calc slices and Delta_r 430 nocore, crucial = calc_slices(r, g_tgt, g_cur, cut_off, verbose=verbose) 431 # difference of rdf to target 432 Delta_g = g_cur - g_tgt 433 # FT of total correlation function 'h' 434 _, h_hat = fourier(r, g_cur - 1) 435 F = gen_fourier_matrix(r, fourier) 436 # dc/dg 437 if n == 1: 438 # single bead case 439 dcdg = np.linalg.inv(F) @ np.diag(1 / (1 + rho * h_hat)**2) @ F 440 else: 441 _, G_minus_g_hat = fourier(r, G_minus_g) 442 dcdg = np.linalg.inv(F) @ np.diag(1 / (1 + n * rho * G_minus_g_hat 443 + n * rho * h_hat)**2) @ F 444 # calculate jacobian^-1 445 # in the core where RDF=0, the jacobin will have -np.inf on the diagonal 446 # numpy correctly inverts this to zero 447 if closure == 'hnc': 448 if newton_mod: 449 with np.errstate(divide='ignore', invalid='ignore', under='ignore'): 450 jac_inv1 = kBT * (1 + np.log(g_tgt / g_cur) / Delta_g) 451 jac_inv2 = -kBT * dcdg 452 # Some fixes, because we want to define a jacobian matrix 453 # Unmodified Newton is less awkward 454 # Ensure this is zero, not nan, on the diagonal where Delta_g is zero 455 jac_inv1[Delta_g == 0] = 0 456 # Ensure this is -np.inf, not nan, on the diagonal in the core region 457 jac_inv1[:nocore.start] = -np.inf 458 jac_inv = np.diag(jac_inv1) + jac_inv2 459 else: 460 with np.errstate(divide='ignore', invalid='ignore', under='ignore'): 461 jac_inv = kBT * (np.diag(1 - 1 / g_cur) - dcdg) 462 elif closure == 'py': 463 raise NotImplementedError 464 jac = np.linalg.inv(jac_inv) 465 if cut_jacobian: 466 # cut jacobian and transform back 467 jac_cut = jac[crucial, crucial] 468 jac_inv_cut = np.linalg.inv(jac_cut) 469 dU = - (jac_inv_cut @ Delta_g[crucial]) 470 else: 471 with np.errstate(invalid='ignore'): 472 dU = - (jac_inv @ Delta_g)[crucial] 473 if verbose: 474 np.savez_compressed('newton-arrays.npz', jac=jac, jac_inv=jac_inv, dU=dU) 475 # fill core and behind cut-off 476 dU = np.concatenate((np.full(nocore.start, np.nan), dU, 477 np.full(len(r) - crucial.stop, np.nan))) 478 if r0_removed: 479 dU = np.concatenate(([np.nan], dU)) 480 return dU 481 482 483def gauss_newton_constrained(A, C, b, d): 484 """Do a gauss-newton update, but eliminate Cx=d first.""" 485 m, n = A.shape 486 p, n_ = C.shape 487 assert n == n_ 488 b.shape = (m) 489 d.shape = (p) 490 491 if p > 1: 492 raise Exception("not implemented for p > 1") 493 494 A_elim = A.copy() 495 b_elim = b.copy() 496 for i in range(p): 497 pivot = np.argmax(abs(C[i])) # find max value of C 498 A_elim = A - (np.ones_like(A) * A[:, pivot][:, np.newaxis] 499 * C[i] / C[i, pivot]) 500 b_elim = b - A[:, pivot] * d[i] / C[i, pivot] 501 A_elim = np.delete(A_elim, pivot, 1) 502 if p == n: 503 print("WARNING: solution of Gauss-Newton update determined fully " 504 "by constraints.") 505 x_elim = [] 506 else: 507 x_elim = np.linalg.solve(A_elim.T @ A_elim, A_elim.T @ b_elim) 508 if p == 0: 509 # no constraints 510 x = x_elim 511 else: 512 x_pivot = (d[i] - np.delete(C, pivot, 1) @ x_elim) / C[i, pivot] 513 x = np.insert(x_elim, pivot, x_pivot) 514 return x 515 516 517def test_gauss_newton_constrained(): 518 """Check Gauss-Newton with some simple cases.""" 519 tests = [ 520 (np.identity(10), np.ones((1, 10)), np.ones(10), np.array(2), [0.2]*10), 521 (np.identity(5), np.array([[0, 0, 1, 0, 0]]), np.ones(5), np.array(2), 522 [1, 1, 2, 1, 1]), 523 (np.array([[1, 0], [1, 1]]), np.zeros((0, 2)), np.ones(2), np.array([]), 524 [1.0, 0.0]), 525 (np.array([[1, 0], [1, 1]]), np.array([[0, 1]]), np.ones(2), np.array(0.1), 526 [0.95, 0.1]), 527 ] 528 for A, C, b, d, x in tests: 529 assert np.allclose(x, gauss_newton_constrained(A, C, b, d)) 530 531 532def calc_dU_gauss_newton(r, g_tgt, g_cur, G_minus_g, n, kBT, rho, 533 cut_off, constraints, 534 verbose=False): 535 """ 536 Calculate a potential update dU using the Gauss-Newton method. 537 538 Constraints can be added. 539 540 Args: 541 r: Distance grid. 542 g_tgt: Target RDF. 543 g_cur: Current RDF. 544 kBT: Boltzmann constant times temperature. 545 rho: Number density of the molecules. 546 cut_off: Highest distance for potential update. 547 constraints: List of dicts, which describe physical constraints. 548 549 Returns: 550 The calculated potential update. 551 552 """ 553 r0_removed, (r, g_tgt, g_cur, G_minus_g) = r0_removal(r, g_tgt, g_cur, G_minus_g) 554 # calc slices and Delta_r 555 nocore, crucial = calc_slices(r, g_tgt, g_cur, cut_off, verbose=verbose) 556 Delta_r = calc_grid_spacing(r) 557 # pair correlation function 'h' 558 h = g_cur - 1 559 # special Fourier of h 560 _, h_hat = fourier(r, h) 561 # Fourier matrix 562 F = gen_fourier_matrix(r, fourier) 563 # dc/dg 564 if n == 1: 565 # single bead case 566 dcdg = np.linalg.inv(F) @ np.diag(1 / (1 + rho * h_hat)**2) @ F 567 else: 568 _, G_minus_g_hat = fourier(r, G_minus_g) 569 dcdg = np.linalg.inv(F) @ np.diag(1 / (1 + n * rho * G_minus_g_hat 570 + n * rho * h_hat)**2) @ F 571 # jacobian^-1 (matrix U in Delbary et al., with respect to potential) 572 with np.errstate(divide='ignore', invalid='ignore', under='ignore'): 573 jac_inv = kBT * (np.diag(1 - 1 / g_cur[nocore]) - dcdg[nocore, nocore]) 574 # A0 matrix 575 A0 = Delta_r * np.triu(np.ones((len(r[nocore]), len(r[crucial])-1)), k=0) 576 # Jacobian with respect to force 577 J = np.linalg.inv(jac_inv) @ A0 578 # constraint matrix and vector 579 C = np.zeros((len(constraints), len(r[crucial])-1)) 580 d = np.zeros(len(constraints)) 581 # build constraint matrix and vector from constraints 582 if verbose: 583 print(constraints) 584 for c, constraint in enumerate(constraints): 585 if constraint['type'] == 'pressure': 586 # current pressure 587 p = constraint['current'] / BAR_PER_MD_PRESSURE 588 # target pressure 589 p_tgt = constraint['target'] / BAR_PER_MD_PRESSURE 590 # g_tgt(r_{i+1}) 591 g_tgt_ip1 = g_tgt[crucial][1:] 592 # g_tgt(r_{i}) 593 g_tgt_i = g_tgt[crucial][:-1] 594 # r_{i+1} 595 r_ip1 = r[crucial][1:] 596 # r_{i} 597 r_i = r[crucial][:-1] 598 # l vector 599 ll = (g_tgt_i + g_tgt_ip1) * (r_ip1**4 - r_i**4) 600 ll *= 1/12 * np.pi * rho**2 601 # set C row and d element 602 C[c, :] = ll 603 d[c] = p_tgt - p 604 else: 605 raise Exception("not implemented constraint type") 606 # residuum vector 607 res = g_tgt - g_cur 608 # switching to notation of Gander et al. for solving 609 A = J 610 b = res[nocore] 611 w = gauss_newton_constrained(A, C, b, d) 612 # dU 613 dU = A0 @ w 614 # fill core with nans 615 dU = np.concatenate((np.full(nocore.start, np.nan), dU)) 616 # dump files 617 if verbose: 618 np.savez_compressed('gauss-newton-arrays.npz', A=A, b=b, C=C, d=d, 619 jac_inv=jac_inv, A0=A0, J=J) 620 if r0_removed: 621 dU = np.concatenate(([np.nan], dU)) 622 return dU 623 624 625def extrapolate_U_constant(dU, dU_flag): 626 """ 627 Extrapolate the potential in the core region by a constant value. 628 629 Returns dU. U_{k+1} = U_k + dU is done by VOTCA. 630 631 """ 632 dU_extrap = dU.copy() 633 # find first valid dU value 634 first_dU_index = np.where(dU_flag == 'i')[0][0] 635 first_dU = dU[first_dU_index] 636 637 # replace out of range dU values with constant first value 638 dU_extrap = np.where(dU_flag == 'i', dU, first_dU) 639 return dU_extrap 640 641 642def extrapolate_U_power(r, dU, U, g_tgt, g_min, kBT, verbose=False): 643 """ 644 Extrapolate the potential in the core region. 645 646 This includes the point, where the RDF becomes larger than g_min or where 647 the new potential is convex. A power function is used. The PMF is fitted, 648 not U+dU. The fit is then shifted such that the graph is monotonous. 649 650 Extrapolation is done, because p-HNCGN often has artifacs at 651 the core, especially when pressure is far off. 652 653 Returns dU. U_{k+1} = U_k + dU is done by Votca. 654 655 """ 656 # make copy 657 dU_extrap = dU.copy() 658 # calc PMF 659 with np.errstate(divide='ignore', over='ignore'): 660 pmf = np.nan_to_num(-kBT * np.log(g_tgt)) 661 # index first minimum 662 ndx_fm = np.where(np.nan_to_num(np.diff(pmf)) > 0)[0][0] 663 # index core end 664 ndx_ce = np.where(g_tgt > g_min)[0][0] 665 if verbose: 666 print('ndx_fm, r_fm', ndx_fm, r[ndx_fm]) 667 print('ndx_ce, r_ce', ndx_ce, r[ndx_ce]) 668 # fit pmf region 669 fit_region = slice(ndx_ce, ndx_ce + 3) 670 # fit pmf with power function a*x^b 671 pmf_shift = -pmf[ndx_fm] + 0.01 672 fit_x = np.log(r[fit_region]) 673 fit_y = np.log(pmf[fit_region] + pmf_shift) 674 b, log_a = np.polyfit(fit_x, fit_y, 1) 675 a = np.exp(log_a) 676 if verbose: 677 print('pmf fit a*x^b. Coefficients a, b:', a, b) 678 with np.errstate(divide='ignore', over='ignore', under='ignore'): 679 pmf_fit = np.nan_to_num(a * r**b - pmf_shift) 680 681 # region to extrapolate 682 ndx_ex1 = ndx_ce + 1 683 ndx_ex2 = np.where(np.nan_to_num(np.diff(np.diff(U + dU))) > 0)[0][0] 684 ndx_ex = max(ndx_ex1, ndx_ex2) 685 if verbose: 686 print('extrapolate up to:', r[ndx_ex]) 687 # extrapolate 688 U_extrap = U + dU 689 U_extrap[:ndx_ex] = pmf_fit[:ndx_ex] + (U_extrap[ndx_ex] - pmf_fit[ndx_ex]) 690 dU_extrap = U_extrap - U 691 return dU_extrap 692 693 694def shift_U_cutoff_zero(dU, r, U, cut_off): 695 """Make potential zero at and beyond cut-off.""" 696 dU_shift = dU.copy() 697 # shift dU to be zero at cut_off and beyond 698 ndx_co = find_after_cut_off_ndx(r, cut_off) 699 U_before_cut_off = U[ndx_co-1] + dU[ndx_co-1] 700 dU_shift -= U_before_cut_off 701 dU_shift[ndx_co:] = -1 * U[ndx_co:] 702 return dU_shift 703 704 705def fix_U_near_cut_off_full(r, U, cut_off): 706 """ 707 Modify the potential close to the cut-off to make it more smooth. 708 709 The derivative of the potential between the last two points will be equal 710 to the derivative between the two points before. The original last two 711 points of dU are therefore ignored. 712 713 This also helps agains an artifact of p-HNCGN, where the last value of dU 714 is a spike. 715 716 """ 717 U_fixed = U.copy() 718 ndx_co = find_after_cut_off_ndx(r, cut_off) 719 second_last_deriv = U[ndx_co-2] - U[ndx_co-3] 720 shift = -1.0 * second_last_deriv - U[ndx_co-2] 721 # modify up to second last value 722 U_fixed[:ndx_co-1] += shift 723 return U_fixed 724 725 726def upd_flag_g_smaller_g_min(flag, g, g_min): 727 """ 728 Update the flag to 'o' for small RDF. 729 730 Take a flag list, copy it, and set the flag to 'o'utside if g is smaller 731 g_min. 732 733 """ 734 flag_new = flag.copy() 735 for i, gg in enumerate(g): 736 if gg < g_min: 737 flag_new[i] = 'o' 738 return flag_new 739 740 741def upd_flag_by_other_flag(flag, other_flag): 742 """ 743 Update a flag array by another flag array. 744 745 Take a flag list, copy it, and set the flag to 'o'utside where some 746 other flag list is 'o'. 747 748 """ 749 flag_new = flag.copy() 750 for i, of in enumerate(other_flag): 751 if of == 'o': 752 flag_new[i] = 'o' 753 return flag_new 754 755 756def upd_U_const_first_flag_i(U, flag): 757 """ 758 Extrapolate potential with constant values where flag is 'o'. 759 760 Take a potential list, copy it, and set the potential at places where 761 flag is 'o' to the first value where flag is 'i'. 762 763 """ 764 U_new = U.copy() 765 # find first valid U value 766 first_U_index = np.where(flag == 'i')[0][0] 767 first_U = U_new[first_U_index] 768 # replace out of range U values 769 U_new = np.where(flag == 'i', U_new, first_U) 770 return U_new 771 772 773def upd_U_zero_beyond_cut_off(r, U, cut_off): 774 """Shift potential to be zero at cut_off and beyond.""" 775 U_new = U.copy() 776 index_cut_off = find_nearest_ndx(r, cut_off) 777 U_cut_off = U_new[index_cut_off] 778 U_new -= U_cut_off 779 U_new[index_cut_off:] = 0 780 return U_new 781 782 783def get_args(): 784 """Define and parse command line arguments.""" 785 description = """ 786 Calculate U or ΔU with Integral Equations. 787 """ 788 parser = argparse.ArgumentParser(description=description) 789 # subparsers 790 subparsers = parser.add_subparsers(dest='subcommand') 791 parser_pot_guess = subparsers.add_parser( 792 'potential_guess', 793 help='potential guess from inverting integral equation') 794 parser_newton = subparsers.add_parser( 795 'newton', 796 help='potential update using Newton method') 797 parser_newton_mod = subparsers.add_parser( 798 'newton-mod', 799 help='potential update using a modified Newton method') 800 parser_gauss_newton = subparsers.add_parser( 801 'gauss-newton', 802 help='potential update using Gauss-Newton method') 803 804 # all subparsers 805 for pars in [parser_pot_guess, parser_newton, parser_newton_mod, 806 parser_gauss_newton]: 807 pars.add_argument('-v', '--verbose', dest='verbose', 808 help='save some intermeditary results', 809 action='store_const', const=True, default=False) 810 pars.add_argument('--closure', type=str, choices=['hnc', 'py'], 811 required=True, 812 help='Closure equation to use for the OZ equation') 813 pars.add_argument('--g-tgt', type=argparse.FileType('r'), 814 nargs='+', required=True, 815 metavar=('X-X.dist.tgt', 'X-Y.dist.tgt'), 816 help='RDF target files') 817 pars.add_argument('--kBT', type=float, required=True, help='') 818 pars.add_argument('--densities', type=float, 819 nargs='+', required=True, 820 metavar=('rho_X', 'rho_Y'), 821 help='list of number densities of beads') 822 pars.add_argument('--n-intra', type=int, 823 nargs='+', required=True, 824 metavar=('n_X', 'n_Y'), 825 help='number of beads per molecule') 826 # todo: this might need to be split up for multicomponent 827 pars.add_argument('--cut-off', type=float, required=True, 828 help='cutt-off (co) of potential') 829 pars.add_argument('--U-out', type=argparse.FileType('wb'), 830 nargs='+', required=True, 831 metavar=('X-X.dpot.new', 'X-Y.dpot.new'), 832 help='U or ΔU output files') 833 pars.add_argument('--g-extrap-factor', type=float, required=False, 834 help='factor by which to extrapolate RDFs') 835 # intial potential guess subparsers 836 for pars in [parser_pot_guess]: 837 pars.add_argument('--G-tgt', type=argparse.FileType('r'), 838 nargs='+', required=False, 839 metavar=('X-X-incl.dist.tgt', 'X-Y-incl.dist.tgt'), 840 help='RDF (including intramolecular) target files') 841 # update potential subparsers 842 for pars in [parser_newton, parser_newton_mod, parser_gauss_newton]: 843 pars.add_argument('--g-cur', type=argparse.FileType('r'), 844 nargs='+', required=True, 845 metavar=('X-X.dist.cur', 'X-Y.dist.cur'), 846 help='RDF current files') 847 pars.add_argument('--G-cur', type=argparse.FileType('r'), 848 nargs='+', required=False, 849 metavar=('X-X-incl.dist.cur', 'X-Y-incl.dist.cur'), 850 help='RDF (including intramolecular) current files') 851 pars.add_argument('--U-cur', type=argparse.FileType('r'), 852 nargs='+', required=True, 853 metavar=('X-X.pot.cur', 'X-Y.pot.cur'), 854 help='potential current files') 855 # Newton's method only options 856 for pars in [parser_newton, parser_newton_mod]: 857 pars.add_argument('--cut-jacobian', dest='cut_jacobian', action='store_true', 858 help=('Cut the top-left part of the Jacobian before' 859 + ' multiplying with Δg.')) 860 pars.set_defaults(cut_jacobian=False) 861 # HNCGN only options 862 parser_gauss_newton.add_argument('--pressure-constraint', 863 dest='pressure_constraint', 864 type=str, default=None) 865 parser_gauss_newton.add_argument('--extrap-near-core', 866 dest='extrap_near_core', 867 type=str, choices=['none', 'constant', 868 'power']) 869 parser_gauss_newton.add_argument('--fix-near-cut-off', 870 dest='fix_near_cut_off', 871 type=str, choices=['none', 'full-deriv']) 872 873 args = parser.parse_args() 874 875 # check for subcommand 876 if args.subcommand is None: 877 parser.print_help() 878 raise Exception("subcommand needed") 879 880 # close writable files directly due to weird bug, where np.savetxt would 881 # write empty file, use filename later. 882 # I should report it, but on the other side, argparse opened files do not 883 # get closed at any point, so this is better 884 for f in args.U_out: 885 f.close() 886 args.U_out = [f.name for f in args.U_out] 887 888 return args 889 890 891def process_input(args): 892 """Process arguments and perform some checks.""" 893 # infering variables from input 894 n_beads = len(args.densities) 895 # nr. of interactions equals nr. of elements in triangular matrix incl. 896 # the diagonal 897 n_interactions = (n_beads * (n_beads + 1)) // 2 898 899 # some checks on input 900 # test for same number of interactions 901 file_arguments_names = ['g_tgt', 'G_tgt', 'g_cur', 'G_cur', 'U_cur', 902 'U_out'] 903 file_arguments = [(argname, vars(args)[argname]) 904 for argname in file_arguments_names 905 if vars(args).get(argname) is not None] 906 file_arguments_wrong = [(argname, flist) 907 for argname, flist in file_arguments 908 if len(flist) != n_interactions] 909 for argname, flist in file_arguments_wrong: 910 raise Exception("""N = {} densities provided, therefore 911 there should be (N * (N + 1)) // 2 = {} 912 files for {}, but {} was 913 provided""".format(n_beads, n_interactions, argname, 914 [f.name for f in flist])) 915 # multicomponent not implemented 916 if any((len(files) != 1 for files in [args.g_tgt, args.densities])): 917 raise Exception('not implemented for multiple components!') 918 if len(args.n_intra) > 1: 919 raise Exception('not implemented for multiple components!') 920 # todo: if n_intra == 1, check if G close to g at G[-1] 921 # todo for multicomponent: check order of input and output by filename 922 # todo for multicomponent: allow not existing X-Y? particles would overlap 923 # todo for multicomponent: do not allow same bead on different moleculetypes 924 925 # load input arrays 926 input_arrays = {} # input_arrays['g_tgt'][0]['x'] 927 input_arguments_names = ['g_tgt', 'G_tgt', 'g_cur', 'G_cur', 'U_cur'] 928 input_arguments = [(argname, vars(args)[argname]) 929 for argname in input_arguments_names 930 if vars(args).get(argname) is not None] 931 for argname, flist in input_arguments: 932 input_arrays[argname] = [] 933 for f in flist: 934 x, y, flag = readin_table(f) 935 input_arrays[argname].append({'x': x, 'y': y, 'flag': flag}) 936 937 # todo: compare grids of all 938 r = input_arrays['g_tgt'][0]['x'] 939 # compare grids of G_cur and G_tgt with g_tgt in smart way 940 # calculate G_minus_g 941 if 'G_tgt' in input_arrays: 942 g_name = 'g_tgt' 943 G_name = 'G_tgt' 944 elif 'G_cur' in input_arrays: 945 g_name = 'g_cur' 946 G_name = 'G_cur' 947 else: 948 print("No intramolecular correlations provided, assuming there are \ 949 none.") 950 # this will result in zero arrays in input_arrays['G_minus_g'] in the 951 # below loop 952 g_name = 'g_tgt' 953 G_name = 'g_tgt' 954 input_arrays['G_minus_g'] = [] 955 for g_dict, G_dict in zip(input_arrays[g_name], input_arrays[G_name]): 956 G_minus_g = np.zeros_like(g_dict['y']) 957 G_minus_g[:G_dict['y'].shape[0]] = (G_dict['y'] 958 - g_dict['y'] 959 [:G_dict['y'].shape[0]]) 960 input_arrays['G_minus_g'].append({'y': G_minus_g}) 961 return r, input_arrays 962 963 964def potential_guess(r, input_arrays, args): 965 """Do the potential guess.""" 966 U1 = calc_U(r, 967 input_arrays['g_tgt'][0]['y'], 968 input_arrays['G_minus_g'][0]['y'], 969 args.n_intra[0], args.kBT, args.densities[0], 970 args.closure) 971 U_flag1 = np.array(['i'] * len(r)) 972 U_flag2 = upd_flag_g_smaller_g_min(U_flag1, 973 input_arrays['g_tgt'][0]['y'], 974 G_MIN) 975 U2 = upd_U_const_first_flag_i(U1, U_flag2) 976 U3 = upd_U_zero_beyond_cut_off(r, U2, args.cut_off) 977 comment = "created by: {}".format(" ".join(sys.argv)) 978 saveto_table(args.U_out[0], r, U3, U_flag2, comment) 979 980 981def newton_update(r, input_arrays, args): 982 """Do the Newton update.""" 983 # further tests on input 984 # if g_extrap_factor != 1.0 then it should hold that cut-off == r[-1] 985 # this is basically HNCN (ex) vs HNCN (ed) 986 if not np.isclose(args.g_extrap_factor, 1.0): 987 if not np.isclose(args.cut_off, r[-1]): 988 raise Exception('if g_extrap_factor is not equal 1.0, the cut-off ' 989 'needs to be the same as the range of all RDFs for ' 990 'Newton method.') 991 # extrapolate RDFs 992 if args.g_extrap_factor < 1: 993 raise Exception('g_extrap_factor needs to be larger than 1.0') 994 Delta_r = calc_grid_spacing(r) 995 r_short = np.copy(r) 996 r = np.arange(r[0], r[-1] * args.g_extrap_factor, Delta_r) 997 g_tgt = extrapolate_g(r_short, r, input_arrays['g_tgt'][0]['y'], 998 input_arrays['G_minus_g'][0]['y'], 999 args.n_intra[0], args.densities[0], 1000 verbose=args.verbose) 1001 g_cur = extrapolate_g(r_short, r, input_arrays['g_cur'][0]['y'], 1002 input_arrays['G_minus_g'][0]['y'], 1003 args.n_intra[0], args.densities[0], 1004 verbose=args.verbose) 1005 if args.verbose: 1006 np.savez_compressed('extrapolated.npz', r=r, g_tgt=g_tgt, g_cur=g_cur) 1007 G_minus_g = np.concatenate((input_arrays['G_minus_g'][0]['y'], 1008 np.zeros(len(r)-len(r_short)))) 1009 cut_off = r_short[-1] 1010 else: 1011 g_tgt = input_arrays['g_tgt'][0]['y'] 1012 g_cur = input_arrays['g_cur'][0]['y'] 1013 G_minus_g = input_arrays['G_minus_g'][0]['y'] 1014 cut_off = args.cut_off 1015 dU1 = calc_dU_newton(r, g_tgt, g_cur, G_minus_g, 1016 args.n_intra[0], args.kBT, 1017 args.densities[0], cut_off, args.closure, 1018 args.subcommand == 'newton-mod', 1019 args.cut_jacobian, 1020 verbose=args.verbose) 1021 # go back to short r range if we extrapolated 1022 if not np.isclose(args.g_extrap_factor, 1.0): 1023 dU1 = dU1[:len(r_short)] 1024 r = r_short 1025 dU_flag1 = np.array(['i'] * len(r)) 1026 dU_flag2 = upd_flag_g_smaller_g_min(dU_flag1, 1027 input_arrays['g_tgt'][0]['y'], 1028 G_MIN) 1029 dU_flag3 = upd_flag_g_smaller_g_min(dU_flag2, 1030 input_arrays['g_cur'][0]['y'], 1031 G_MIN) 1032 dU_flag4 = upd_flag_by_other_flag(dU_flag3, 1033 input_arrays['U_cur'][0]['flag']) 1034 1035 dU2 = upd_U_const_first_flag_i(dU1, dU_flag4) 1036 U_temp = upd_U_zero_beyond_cut_off(r, 1037 dU2 + input_arrays['U_cur'][0]['y'], 1038 cut_off) 1039 dU3 = U_temp - input_arrays['U_cur'][0]['y'] 1040 comment = "created by: {}".format(" ".join(sys.argv)) 1041 saveto_table(args.U_out[0], r, dU3, dU_flag4, comment) 1042 1043 1044def gauss_newton_update(r, input_arrays, args): 1045 """Do the Gauss-Newton update.""" 1046 # parse constraints 1047 constraints = [] 1048 if args.pressure_constraint is not None: 1049 p_target = float(args.pressure_constraint.split(',')[0]) 1050 p_current = float(args.pressure_constraint.split(',')[1]) 1051 constraints.append({'type': 'pressure', 'target': p_target, 1052 'current': p_current}) 1053 1054 # calc dU_pure 1055 dU_pure = calc_dU_gauss_newton(r, 1056 input_arrays['g_tgt'][0]['y'], 1057 input_arrays['g_cur'][0]['y'], 1058 input_arrays['G_minus_g'][0]['y'], 1059 args.n_intra[0], args.kBT, 1060 args.densities[0], args.cut_off, 1061 constraints, verbose=args.verbose) 1062 1063 # set dU_flag to 'o' inside the core 1064 dU_flag = np.where(np.isnan(dU_pure), 'o', 'i') 1065 1066 # select extrapolation 1067 if args.extrap_near_core == 'none': 1068 dU_extrap = np.nan_to_num(dU_pure) 1069 elif args.extrap_near_core == 'constant': 1070 dU_extrap = extrapolate_U_constant(dU_pure, dU_flag) 1071 elif args.extrap_near_core == 'power': 1072 dU_extrap = extrapolate_U_power(r, dU_pure, 1073 input_arrays['U_cur'][0]['y'], 1074 input_arrays['g_tgt'][0]['y'], 1075 G_MIN_EXTRAPOLATE, args.kBT, 1076 verbose=args.verbose) 1077 else: 1078 raise Exception("unknown extrapolation scheme for inside and near " 1079 "core region: " + args.extrap_near_core) 1080 # shifts to correct potential after cut-off 1081 dU_shift = shift_U_cutoff_zero(dU_extrap, r, 1082 input_arrays['U_cur'][0]['y'], 1083 args.cut_off) 1084 # shifts to correct potential near cut-off 1085 if args.fix_near_cut_off == 'none': 1086 dU = dU_shift.copy() 1087 elif args.fix_near_cut_off == 'full-deriv': 1088 U_new = input_arrays['U_cur'][0]['y'] + dU_shift 1089 U_new = fix_U_near_cut_off_full(r, U_new, args.cut_off) 1090 dU = U_new - input_arrays['U_cur'][0]['y'] 1091 else: 1092 raise Exception("unknown fix scheme for near cut-off: " 1093 + args.fix_near_cut_off) 1094 1095 if args.verbose: 1096 np.savez_compressed('hncgn-dU.npz', r=r, dU_pure=dU_pure, 1097 dU_extrap=dU_extrap, dU_shift=dU_shift) 1098 comment = "created by: {}".format(" ".join(sys.argv)) 1099 saveto_table(args.U_out[0], r, dU, dU_flag, comment) 1100 1101 1102def main(): 1103 # get command line arguments 1104 args = get_args() 1105 1106 # process and prepare input 1107 r, input_arrays = process_input(args) 1108 1109 # guess potential from distribution 1110 if args.subcommand in ['potential_guess']: 1111 potential_guess(r, input_arrays, args) 1112 1113 # newton update 1114 if args.subcommand in ['newton', 'newton-mod']: 1115 newton_update(r, input_arrays, args) 1116 1117 # gauss-newton update 1118 if args.subcommand in ['gauss-newton']: 1119 gauss_newton_update(r, input_arrays, args) 1120 1121 1122if __name__ == '__main__': 1123 main() 1124