1# coding: utf-8 2# Copyright (c) Pymatgen Development Team. 3# Distributed under the terms of the MIT License. 4 5""" 6A module to perform diffusion analyses (e.g. calculating diffusivity from 7mean square displacements etc.). If you use this module, please consider 8citing the following papers:: 9 10 Ong, S. P., Mo, Y., Richards, W. D., Miara, L., Lee, H. S., & Ceder, G. 11 (2013). Phase stability, electrochemical stability and ionic conductivity 12 of the Li10+-1MP2X12 (M = Ge, Si, Sn, Al or P, and X = O, S or Se) family 13 of superionic conductors. Energy & Environmental Science, 6(1), 148. 14 doi:10.1039/c2ee23355j 15 16 Mo, Y., Ong, S. P., & Ceder, G. (2012). First Principles Study of the 17 Li10GeP2S12 Lithium Super Ionic Conductor Material. Chemistry of Materials, 18 24(1), 15-17. doi:10.1021/cm203303y 19""" 20 21 22import multiprocessing 23import warnings 24 25import numpy as np 26import scipy.constants as const 27from monty.json import MSONable 28 29from pymatgen.analysis.structure_matcher import ( 30 OrderDisorderElementComparator, 31 StructureMatcher, 32) 33from pymatgen.core.periodic_table import get_el_sp 34from pymatgen.core.structure import Structure 35from pymatgen.io.vasp.outputs import Vasprun 36from pymatgen.util.coord import pbc_diff 37 38__author__ = "Will Richards, Shyue Ping Ong" 39__version__ = "0.2" 40__maintainer__ = "Will Richards" 41__email__ = "wrichard@mit.edu" 42__status__ = "Beta" 43__date__ = "5/2/13" 44 45warnings.warn( 46 "All code in pymatgen.analysis.diffusion_analyzer has been moved to the separate add-on package" 47 "pymatgen-diffusion, which also includes a lot more functionality for analyzing diffusion" 48 "calculations. This module here is retained for backwards compatibility. It will be removed from" 49 "2022.1.1.", 50 FutureWarning, 51) 52 53 54class DiffusionAnalyzer(MSONable): 55 """ 56 Class for performing diffusion analysis. 57 58 .. attribute: diffusivity 59 60 Diffusivity in cm^2 / s 61 62 .. attribute: chg_diffusivity 63 64 Charge diffusivity in cm^2 / s 65 66 .. attribute: conductivity 67 68 Conductivity in mS / cm 69 70 .. attribute: chg_conductivity 71 72 Conductivity derived from Nernst-Einstein equation using charge 73 diffusivity, in mS / cm 74 75 .. attribute: diffusivity_components 76 77 A vector with diffusivity in the a, b and c directions in cm^2 / s 78 79 .. attribute: conductivity_components 80 81 A vector with conductivity in the a, b and c directions in mS / cm 82 83 .. attribute: diffusivity_std_dev 84 85 Std dev in diffusivity in cm^2 / s. Note that this makes sense only 86 for non-smoothed analyses. 87 88 .. attribute: chg_diffusivity_std_dev 89 90 Std dev in charge diffusivity in cm^2 / s. Note that this makes sense only 91 for non-smoothed analyses. 92 93 .. attribute: conductivity_std_dev 94 95 Std dev in conductivity in mS / cm. Note that this makes sense only 96 for non-smoothed analyses. 97 98 .. attribute: diffusivity_components_std_dev 99 100 A vector with std dev. in diffusivity in the a, b and c directions in 101 cm^2 / cm. Note that this makes sense only for non-smoothed analyses. 102 103 .. attribute: conductivity_components_std_dev 104 105 A vector with std dev. in conductivity in the a, b and c directions 106 in mS / cm. Note that this makes sense only for non-smoothed analyses. 107 108 .. attribute: max_framework_displacement 109 110 The maximum (drift adjusted) distance of any framework atom from its 111 starting location in A. 112 113 .. attribute: max_ion_displacements 114 115 nions x 1 array of the maximum displacement of each individual ion. 116 117 .. attribute: msd 118 119 nsteps x 1 array of the mean square displacement of specie. 120 121 .. attribute: mscd 122 123 nsteps x 1 array of the mean square charge displacement of specie. 124 125 .. attribute: msd_components 126 127 nsteps x 3 array of the MSD in each lattice direction of specie. 128 129 .. attribute: sq_disp_ions 130 131 The square displacement of all ion (both specie and other ions) as a 132 nions x nsteps array. 133 134 .. attribute: dt 135 136 Time coordinate array. 137 138 .. attribute: haven_ratio 139 Haven ratio defined as diffusivity / chg_diffusivity. 140 """ 141 142 def __init__( 143 self, 144 structure, 145 displacements, 146 specie, 147 temperature, 148 time_step, 149 step_skip, 150 smoothed="max", 151 min_obs=30, 152 avg_nsteps=1000, 153 lattices=None, 154 ): 155 """ 156 This constructor is meant to be used with pre-processed data. 157 Other convenient constructors are provided as class methods (see 158 from_vaspruns and from_files). 159 160 Given a matrix of displacements (see arguments below for expected 161 format), the diffusivity is given by:: 162 163 D = 1 / 2dt * <mean square displacement> 164 165 where d is the dimensionality, t is the time. To obtain a reliable 166 diffusion estimate, a least squares regression of the MSD against 167 time to obtain the slope, which is then related to the diffusivity. 168 169 For traditional analysis, use smoothed=False and weighted=False. 170 171 Args: 172 structure (Structure): Initial structure. 173 displacements (array): Numpy array of with shape [site, 174 time step, axis] 175 specie (Element/Species): Species to calculate diffusivity for as a 176 String. E.g., "Li". 177 temperature (float): Temperature of the diffusion run in Kelvin. 178 time_step (int): Time step between measurements. 179 step_skip (int): Sampling frequency of the displacements ( 180 time_step is multiplied by this number to get the real time 181 between measurements) 182 smoothed (str): Whether to smooth the MSD, and what mode to smooth. 183 Supported modes are: 184 185 i. "max", which tries to use the maximum # 186 of data points for each time origin, subject to a 187 minimum # of observations given by min_obs, and then 188 weights the observations based on the variance 189 accordingly. This is the default. 190 ii. "constant", in which each timestep is averaged over 191 the number of time_steps given by min_steps. 192 iii. None / False / any other false-like quantity. No 193 smoothing. 194 195 min_obs (int): Used with smoothed="max". Minimum number of 196 observations to have before including in the MSD vs dt 197 calculation. E.g. If a structure has 10 diffusing atoms, 198 and min_obs = 30, the MSD vs dt will be 199 calculated up to dt = total_run_time / 3, so that each 200 diffusing atom is measured at least 3 uncorrelated times. 201 Only applies in smoothed="max". 202 avg_nsteps (int): Used with smoothed="constant". Determines the 203 number of time steps to average over to get the msd for each 204 timestep. Default of 1000 is usually pretty good. 205 lattices (array): Numpy array of lattice matrix of every step. Used 206 for NPT-AIMD. For NVT-AIMD, the lattice at each time step is 207 set to the lattice in the "structure" argument. 208 """ 209 self.structure = structure 210 self.disp = displacements 211 self.specie = specie 212 self.temperature = temperature 213 self.time_step = time_step 214 self.step_skip = step_skip 215 self.min_obs = min_obs 216 self.smoothed = smoothed 217 self.avg_nsteps = avg_nsteps 218 self.lattices = lattices 219 220 if lattices is None: 221 self.lattices = np.array([structure.lattice.matrix.tolist()]) 222 223 indices = [] 224 framework_indices = [] 225 for i, site in enumerate(structure): 226 if site.specie.symbol == specie: 227 indices.append(i) 228 else: 229 framework_indices.append(i) 230 if self.disp.shape[1] < 2: 231 self.diffusivity = 0.0 232 self.conductivity = 0.0 233 self.diffusivity_components = np.array([0.0, 0.0, 0.0]) 234 self.conductivity_components = np.array([0.0, 0.0, 0.0]) 235 self.max_framework_displacement = 0 236 else: 237 framework_disp = self.disp[framework_indices] 238 drift = np.average(framework_disp, axis=0)[None, :, :] 239 240 # drift corrected position 241 dc = self.disp - drift 242 243 nions, nsteps, dim = dc.shape 244 245 if not smoothed: 246 timesteps = np.arange(0, nsteps) 247 elif smoothed == "constant": 248 if nsteps <= avg_nsteps: 249 raise ValueError("Not enough data to calculate diffusivity") 250 timesteps = np.arange(0, nsteps - avg_nsteps) 251 else: 252 # limit the number of sampled timesteps to 200 253 min_dt = int(1000 / (self.step_skip * self.time_step)) 254 max_dt = min(len(indices) * nsteps // self.min_obs, nsteps) 255 if min_dt >= max_dt: 256 raise ValueError("Not enough data to calculate diffusivity") 257 timesteps = np.arange(min_dt, max_dt, max(int((max_dt - min_dt) / 200), 1)) 258 259 dt = timesteps * self.time_step * self.step_skip 260 261 # calculate the smoothed msd values 262 msd = np.zeros_like(dt, dtype=np.double) 263 sq_disp_ions = np.zeros((len(dc), len(dt)), dtype=np.double) 264 msd_components = np.zeros(dt.shape + (3,)) 265 266 # calculate mean square charge displacement 267 mscd = np.zeros_like(msd, dtype=np.double) 268 269 for i, n in enumerate(timesteps): 270 if not smoothed: 271 dx = dc[:, i : i + 1, :] 272 dcomponents = dc[:, i : i + 1, :] 273 elif smoothed == "constant": 274 dx = dc[:, i : i + avg_nsteps, :] - dc[:, 0:avg_nsteps, :] 275 dcomponents = dc[:, i : i + avg_nsteps, :] - dc[:, 0:avg_nsteps, :] 276 else: 277 dx = dc[:, n:, :] - dc[:, :-n, :] 278 dcomponents = dc[:, n:, :] - dc[:, :-n, :] 279 280 # Get msd 281 sq_disp = dx ** 2 282 sq_disp_ions[:, i] = np.average(np.sum(sq_disp, axis=2), axis=1) 283 msd[i] = np.average(sq_disp_ions[:, i][indices]) 284 285 msd_components[i] = np.average(dcomponents[indices] ** 2, axis=(0, 1)) 286 287 # Get mscd 288 sq_chg_disp = np.sum(dx[indices, :, :], axis=0) ** 2 289 mscd[i] = np.average(np.sum(sq_chg_disp, axis=1), axis=0) / len(indices) 290 291 def weighted_lstsq(a, b): 292 if smoothed == "max": 293 # For max smoothing, we need to weight by variance. 294 w_root = (1 / dt) ** 0.5 295 return np.linalg.lstsq(a * w_root[:, None], b * w_root, rcond=None) 296 return np.linalg.lstsq(a, b, rcond=None) 297 298 # Get self diffusivity 299 m_components = np.zeros(3) 300 m_components_res = np.zeros(3) 301 a = np.ones((len(dt), 2)) 302 a[:, 0] = dt 303 for i in range(3): 304 (m, c), res, rank, s = weighted_lstsq(a, msd_components[:, i]) 305 m_components[i] = max(m, 1e-15) 306 m_components_res[i] = res[0] 307 308 (m, c), res, rank, s = weighted_lstsq(a, msd) 309 # m shouldn't be negative 310 m = max(m, 1e-15) 311 312 # Get also the charge diffusivity 313 (m_chg, c_chg), res_chg, _, _ = weighted_lstsq(a, mscd) 314 # m shouldn't be negative 315 m_chg = max(m_chg, 1e-15) 316 317 # factor of 10 is to convert from A^2/fs to cm^2/s 318 # factor of 6 is for dimensionality 319 conv_factor = get_conversion_factor(self.structure, self.specie, self.temperature) 320 self.diffusivity = m / 60 321 self.chg_diffusivity = m_chg / 60 322 323 # Calculate the error in the diffusivity using the error in the 324 # slope from the lst sq. 325 # Variance in slope = n * Sum Squared Residuals / (n * Sxx - Sx 326 # ** 2) / (n-2). 327 n = len(dt) 328 329 # Pre-compute the denominator since we will use it later. 330 # We divide dt by 1000 to avoid overflow errors in some systems ( 331 # e.g., win). This is subsequently corrected where denom is used. 332 denom = (n * np.sum((dt / 1000) ** 2) - np.sum(dt / 1000) ** 2) * (n - 2) 333 self.diffusivity_std_dev = np.sqrt(n * res[0] / denom) / 60 / 1000 334 self.chg_diffusivity_std_dev = np.sqrt(n * res_chg[0] / denom) / 60 / 1000 335 self.conductivity = self.diffusivity * conv_factor 336 self.chg_conductivity = self.chg_diffusivity * conv_factor 337 self.conductivity_std_dev = self.diffusivity_std_dev * conv_factor 338 339 self.diffusivity_components = m_components / 20 340 self.diffusivity_components_std_dev = np.sqrt(n * m_components_res / denom) / 20 / 1000 341 self.conductivity_components = self.diffusivity_components * conv_factor 342 self.conductivity_components_std_dev = self.diffusivity_components_std_dev * conv_factor 343 344 # Drift and displacement information. 345 self.drift = drift 346 self.corrected_displacements = dc 347 self.max_ion_displacements = np.max(np.sum(dc ** 2, axis=-1) ** 0.5, axis=1) 348 self.max_framework_displacement = np.max(self.max_ion_displacements[framework_indices]) 349 self.msd = msd 350 self.mscd = mscd 351 self.haven_ratio = self.diffusivity / self.chg_diffusivity 352 self.sq_disp_ions = sq_disp_ions 353 self.msd_components = msd_components 354 self.dt = dt 355 self.indices = indices 356 self.framework_indices = framework_indices 357 358 def get_drift_corrected_structures(self, start=None, stop=None, step=None): 359 """ 360 Returns an iterator for the drift-corrected structures. Use of 361 iterator is to reduce memory usage as # of structures in MD can be 362 huge. You don't often need all the structures all at once. 363 364 Args: 365 start, stop, step (int): applies a start/stop/step to the iterator. 366 Faster than applying it after generation, as it reduces the 367 number of structures created. 368 """ 369 coords = np.array(self.structure.cart_coords) 370 species = self.structure.species_and_occu 371 lattices = self.lattices 372 nsites, nsteps, dim = self.corrected_displacements.shape 373 374 for i in range(start or 0, stop or nsteps, step or 1): 375 latt = lattices[0] if len(lattices) == 1 else lattices[i] 376 yield Structure( 377 latt, 378 species, 379 coords + self.corrected_displacements[:, i, :], 380 coords_are_cartesian=True, 381 ) 382 383 def get_summary_dict(self, include_msd_t=False, include_mscd_t=False): 384 """ 385 Provides a summary of diffusion information. 386 387 Args: 388 include_msd_t (bool): Whether to include mean square displace and 389 time data with the data. 390 include_msd_t (bool): Whether to include mean square charge displace and 391 time data with the data. 392 393 Returns: 394 (dict) of diffusion and conductivity data. 395 """ 396 d = { 397 "D": self.diffusivity, 398 "D_sigma": self.diffusivity_std_dev, 399 "D_charge": self.chg_diffusivity, 400 "D_charge_sigma": self.chg_diffusivity_std_dev, 401 "S": self.conductivity, 402 "S_sigma": self.conductivity_std_dev, 403 "S_charge": self.chg_conductivity, 404 "D_components": self.diffusivity_components.tolist(), 405 "S_components": self.conductivity_components.tolist(), 406 "D_components_sigma": self.diffusivity_components_std_dev.tolist(), 407 "S_components_sigma": self.conductivity_components_std_dev.tolist(), 408 "specie": str(self.specie), 409 "step_skip": self.step_skip, 410 "time_step": self.time_step, 411 "temperature": self.temperature, 412 "max_framework_displacement": self.max_framework_displacement, 413 "Haven_ratio": self.haven_ratio, 414 } 415 if include_msd_t: 416 d["msd"] = self.msd.tolist() 417 d["msd_components"] = self.msd_components.tolist() 418 d["dt"] = self.dt.tolist() 419 if include_mscd_t: 420 d["mscd"] = self.mscd.tolist() 421 return d 422 423 def get_framework_rms_plot(self, plt=None, granularity=200, matching_s=None): 424 """ 425 Get the plot of rms framework displacement vs time. Useful for checking 426 for melting, especially if framework atoms can move via paddle-wheel 427 or similar mechanism (which would show up in max framework displacement 428 but doesn't constitute melting). 429 430 Args: 431 plt (matplotlib.pyplot): If plt is supplied, changes will be made 432 to an existing plot. Otherwise, a new plot will be created. 433 granularity (int): Number of structures to match 434 matching_s (Structure): Optionally match to a disordered structure 435 instead of the first structure in the analyzer. Required when 436 a secondary mobile ion is present. 437 Notes: 438 The method doesn't apply to NPT-AIMD simulation analysis. 439 """ 440 from pymatgen.util.plotting import pretty_plot 441 442 if self.lattices is not None and len(self.lattices) > 1: 443 warnings.warn("Note the method doesn't apply to NPT-AIMD " "simulation analysis!") 444 445 plt = pretty_plot(12, 8, plt=plt) 446 step = (self.corrected_displacements.shape[1] - 1) // (granularity - 1) 447 f = (matching_s or self.structure).copy() 448 f.remove_species([self.specie]) 449 sm = StructureMatcher( 450 primitive_cell=False, 451 stol=0.6, 452 comparator=OrderDisorderElementComparator(), 453 allow_subset=True, 454 ) 455 rms = [] 456 for s in self.get_drift_corrected_structures(step=step): 457 s.remove_species([self.specie]) 458 d = sm.get_rms_dist(f, s) 459 if d: 460 rms.append(d) 461 else: 462 rms.append((1, 1)) 463 max_dt = (len(rms) - 1) * step * self.step_skip * self.time_step 464 if max_dt > 100000: 465 plot_dt = np.linspace(0, max_dt / 1000, len(rms)) 466 unit = "ps" 467 else: 468 plot_dt = np.linspace(0, max_dt, len(rms)) 469 unit = "fs" 470 rms = np.array(rms) 471 plt.plot(plot_dt, rms[:, 0], label="RMS") 472 plt.plot(plot_dt, rms[:, 1], label="max") 473 plt.legend(loc="best") 474 plt.xlabel("Timestep ({})".format(unit)) 475 plt.ylabel("normalized distance") 476 plt.tight_layout() 477 return plt 478 479 def get_msd_plot(self, plt=None, mode="specie"): 480 """ 481 Get the plot of the smoothed msd vs time graph. Useful for 482 checking convergence. This can be written to an image file. 483 484 Args: 485 plt: A plot object. Defaults to None, which means one will be 486 generated. 487 mode (str): Determines type of msd plot. By "species", "sites", 488 or direction (default). If mode = "mscd", the smoothed mscd vs. 489 time will be plotted. 490 """ 491 from pymatgen.util.plotting import pretty_plot 492 493 plt = pretty_plot(12, 8, plt=plt) 494 if np.max(self.dt) > 100000: 495 plot_dt = self.dt / 1000 496 unit = "ps" 497 else: 498 plot_dt = self.dt 499 unit = "fs" 500 501 if mode == "species": 502 for sp in sorted(self.structure.composition.keys()): 503 indices = [i for i, site in enumerate(self.structure) if site.specie == sp] 504 sd = np.average(self.sq_disp_ions[indices, :], axis=0) 505 plt.plot(plot_dt, sd, label=sp.__str__()) 506 plt.legend(loc=2, prop={"size": 20}) 507 elif mode == "sites": 508 for i, site in enumerate(self.structure): 509 sd = self.sq_disp_ions[i, :] 510 plt.plot(plot_dt, sd, label="%s - %d" % (site.specie.__str__(), i)) 511 plt.legend(loc=2, prop={"size": 20}) 512 elif mode == "mscd": 513 plt.plot(plot_dt, self.mscd, "r") 514 plt.legend(["Overall"], loc=2, prop={"size": 20}) 515 else: 516 # Handle default / invalid mode case 517 plt.plot(plot_dt, self.msd, "k") 518 plt.plot(plot_dt, self.msd_components[:, 0], "r") 519 plt.plot(plot_dt, self.msd_components[:, 1], "g") 520 plt.plot(plot_dt, self.msd_components[:, 2], "b") 521 plt.legend(["Overall", "a", "b", "c"], loc=2, prop={"size": 20}) 522 523 plt.xlabel("Timestep ({})".format(unit)) 524 if mode == "mscd": 525 plt.ylabel("MSCD ($\\AA^2$)") 526 else: 527 plt.ylabel("MSD ($\\AA^2$)") 528 plt.tight_layout() 529 return plt 530 531 def plot_msd(self, mode="default"): 532 """ 533 Plot the smoothed msd vs time graph. Useful for checking convergence. 534 535 Args: 536 mode (str): Can be "default" (the default, shows only the MSD for 537 the diffusing specie, and its components), "ions" (individual 538 square displacements of all ions), "species" (mean square 539 displacement by specie), or "mscd" (overall mean square charge 540 displacement for diffusing specie). 541 """ 542 self.get_msd_plot(mode=mode).show() 543 544 def export_msdt(self, filename): 545 """ 546 Writes MSD data to a csv file that can be easily plotted in other 547 software. 548 549 Args: 550 filename (str): Filename. Supported formats are csv and dat. If 551 the extension is csv, a csv file is written. Otherwise, 552 a dat format is assumed. 553 """ 554 fmt = "csv" if filename.lower().endswith(".csv") else "dat" 555 delimiter = ", " if fmt == "csv" else " " 556 with open(filename, "wt") as f: 557 if fmt == "dat": 558 f.write("# ") 559 f.write(delimiter.join(["t", "MSD", "MSD_a", "MSD_b", "MSD_c", "MSCD"])) 560 f.write("\n") 561 for dt, msd, msdc, mscd in zip(self.dt, self.msd, self.msd_components, self.mscd): 562 f.write(delimiter.join(["%s" % v for v in [dt, msd] + list(msdc) + [mscd]])) 563 f.write("\n") 564 565 @classmethod 566 def from_structures( 567 cls, structures, specie, temperature, time_step, step_skip, initial_disp=None, initial_structure=None, **kwargs 568 ): 569 r""" 570 Convenient constructor that takes in a list of Structure objects to 571 perform diffusion analysis. 572 573 Args: 574 structures ([Structure]): list of Structure objects (must be 575 ordered in sequence of run). E.g., you may have performed 576 sequential VASP runs to obtain sufficient statistics. 577 specie (Element/Species): Species to calculate diffusivity for as a 578 String. E.g., "Li". 579 temperature (float): Temperature of the diffusion run in Kelvin. 580 time_step (int): Time step between measurements. 581 step_skip (int): Sampling frequency of the displacements ( 582 time_step is multiplied by this number to get the real time 583 between measurements) 584 initial_disp (np.ndarray): Sometimes, you need to iteratively 585 compute estimates of the diffusivity. This supplies an 586 initial displacement that will be added on to the initial 587 displacements. Note that this makes sense only when 588 smoothed=False. 589 initial_structure (Structure): Like initial_disp, this is used 590 for iterative computations of estimates of the diffusivity. You 591 typically need to supply both variables. This stipulates the 592 initial structure from which the current set of displacements 593 are computed. 594 \\*\\*kwargs: kwargs supported by the :class:`DiffusionAnalyzer`_. 595 Examples include smoothed, min_obs, avg_nsteps. 596 """ 597 p, l = [], [] 598 for i, s in enumerate(structures): 599 if i == 0: 600 structure = s 601 p.append(np.array(s.frac_coords)[:, None]) 602 l.append(s.lattice.matrix) 603 if initial_structure is not None: 604 p.insert(0, np.array(initial_structure.frac_coords)[:, None]) 605 l.insert(0, initial_structure.lattice.matrix) 606 else: 607 p.insert(0, p[0]) 608 l.insert(0, l[0]) 609 610 p = np.concatenate(p, axis=1) 611 dp = p[:, 1:] - p[:, :-1] 612 dp = dp - np.round(dp) 613 f_disp = np.cumsum(dp, axis=1) 614 c_disp = [] 615 for i in f_disp: 616 c_disp.append([np.dot(d, m) for d, m in zip(i, l[1:])]) 617 disp = np.array(c_disp) 618 619 # If is NVT-AIMD, clear lattice data. 620 if np.array_equal(l[0], l[-1]): 621 l = np.array([l[0]]) 622 else: 623 l = np.array(l) 624 if initial_disp is not None: 625 disp += initial_disp[:, None, :] 626 627 return cls(structure, disp, specie, temperature, time_step, step_skip=step_skip, lattices=l, **kwargs) 628 629 @classmethod 630 def from_vaspruns(cls, vaspruns, specie, initial_disp=None, initial_structure=None, **kwargs): 631 r""" 632 Convenient constructor that takes in a list of Vasprun objects to 633 perform diffusion analysis. 634 635 Args: 636 vaspruns ([Vasprun]): List of Vaspruns (must be ordered in 637 sequence of MD simulation). E.g., you may have performed 638 sequential VASP runs to obtain sufficient statistics. 639 specie (Element/Species): Species to calculate diffusivity for as a 640 String. E.g., "Li". 641 initial_disp (np.ndarray): Sometimes, you need to iteratively 642 compute estimates of the diffusivity. This supplies an 643 initial displacement that will be added on to the initial 644 displacements. Note that this makes sense only when 645 smoothed=False. 646 initial_structure (Structure): Like initial_disp, this is used 647 for iterative computations of estimates of the diffusivity. You 648 typically need to supply both variables. This stipulates the 649 initial stricture from which the current set of displacements 650 are computed. 651 \\*\\*kwargs: kwargs supported by the :class:`DiffusionAnalyzer`_. 652 Examples include smoothed, min_obs, avg_nsteps. 653 """ 654 655 def get_structures(vaspruns): 656 for i, vr in enumerate(vaspruns): 657 if i == 0: 658 step_skip = vr.ionic_step_skip or 1 659 final_structure = vr.initial_structure 660 temperature = vr.parameters["TEEND"] 661 time_step = vr.parameters["POTIM"] 662 yield step_skip, temperature, time_step 663 # check that the runs are continuous 664 fdist = pbc_diff(vr.initial_structure.frac_coords, final_structure.frac_coords) 665 if np.any(fdist > 0.001): 666 raise ValueError("initial and final structures do not " "match.") 667 final_structure = vr.final_structure 668 669 assert (vr.ionic_step_skip or 1) == step_skip 670 for s in vr.ionic_steps: 671 yield s["structure"] 672 673 s = get_structures(vaspruns) 674 step_skip, temperature, time_step = next(s) 675 676 return cls.from_structures( 677 structures=list(s), 678 specie=specie, 679 temperature=temperature, 680 time_step=time_step, 681 step_skip=step_skip, 682 initial_disp=initial_disp, 683 initial_structure=initial_structure, 684 **kwargs, 685 ) 686 687 @classmethod 688 def from_files( 689 cls, filepaths, specie, step_skip=10, ncores=None, initial_disp=None, initial_structure=None, **kwargs 690 ): 691 r""" 692 Convenient constructor that takes in a list of vasprun.xml paths to 693 perform diffusion analysis. 694 695 Args: 696 filepaths ([str]): List of paths to vasprun.xml files of runs. ( 697 must be ordered in sequence of MD simulation). For example, 698 you may have done sequential VASP runs and they are in run1, 699 run2, run3, etc. You should then pass in 700 ["run1/vasprun.xml", "run2/vasprun.xml", ...]. 701 specie (Element/Species): Species to calculate diffusivity for as a 702 String. E.g., "Li". 703 step_skip (int): Sampling frequency of the displacements ( 704 time_step is multiplied by this number to get the real time 705 between measurements) 706 ncores (int): Numbers of cores to use for multiprocessing. Can 707 speed up vasprun parsing considerably. Defaults to None, 708 which means serial. It should be noted that if you want to 709 use multiprocessing, the number of ionic steps in all vasprun 710 .xml files should be a multiple of the ionic_step_skip. 711 Otherwise, inconsistent results may arise. Serial mode has no 712 such restrictions. 713 initial_disp (np.ndarray): Sometimes, you need to iteratively 714 compute estimates of the diffusivity. This supplies an 715 initial displacement that will be added on to the initial 716 displacements. Note that this makes sense only when 717 smoothed=False. 718 initial_structure (Structure): Like initial_disp, this is used 719 for iterative computations of estimates of the diffusivity. You 720 typically need to supply both variables. This stipulates the 721 initial structure from which the current set of displacements 722 are computed. 723 \\*\\*kwargs: kwargs supported by the :class:`DiffusionAnalyzer`_. 724 Examples include smoothed, min_obs, avg_nsteps. 725 """ 726 if ncores is not None and len(filepaths) > 1: 727 p = multiprocessing.Pool(ncores) # pylint: disable=R1732 728 vaspruns = p.imap(_get_vasprun, [(fp, step_skip) for fp in filepaths]) 729 analyzer = cls.from_vaspruns( 730 vaspruns, specie=specie, initial_disp=initial_disp, initial_structure=initial_structure, **kwargs 731 ) 732 p.close() 733 p.join() 734 return analyzer 735 736 def vr(filepaths): 737 offset = 0 738 for p in filepaths: 739 v = Vasprun(p, ionic_step_offset=offset, ionic_step_skip=step_skip) 740 yield v 741 # Recompute offset. 742 offset = (-(v.nionic_steps - offset)) % step_skip 743 744 return cls.from_vaspruns( 745 vr(filepaths), specie=specie, initial_disp=initial_disp, initial_structure=initial_structure, **kwargs 746 ) 747 748 def as_dict(self): 749 """ 750 Returns: MSONable dict 751 """ 752 return { 753 "@module": self.__class__.__module__, 754 "@class": self.__class__.__name__, 755 "structure": self.structure.as_dict(), 756 "displacements": self.disp.tolist(), 757 "specie": self.specie, 758 "temperature": self.temperature, 759 "time_step": self.time_step, 760 "step_skip": self.step_skip, 761 "min_obs": self.min_obs, 762 "smoothed": self.smoothed, 763 "avg_nsteps": self.avg_nsteps, 764 "lattices": self.lattices.tolist(), 765 } 766 767 @classmethod 768 def from_dict(cls, d): 769 """ 770 Args: 771 d (dict): Dict representation 772 773 Returns: DiffusionAnalyzer 774 """ 775 structure = Structure.from_dict(d["structure"]) 776 return cls( 777 structure, 778 np.array(d["displacements"]), 779 specie=d["specie"], 780 temperature=d["temperature"], 781 time_step=d["time_step"], 782 step_skip=d["step_skip"], 783 min_obs=d["min_obs"], 784 smoothed=d.get("smoothed", "max"), 785 avg_nsteps=d.get("avg_nsteps", 1000), 786 lattices=np.array(d.get("lattices", [d["structure"]["lattice"]["matrix"]])), 787 ) 788 789 790def get_conversion_factor(structure, species, temperature): 791 """ 792 Conversion factor to convert between cm^2/s diffusivity measurements and 793 mS/cm conductivity measurements based on number of atoms of diffusing 794 species. Note that the charge is based on the oxidation state of the 795 species (where available), or else the number of valence electrons 796 (usually a good guess, esp for main group ions). 797 798 Args: 799 structure (Structure): Input structure. 800 species (Element/Species): Diffusing species. 801 temperature (float): Temperature of the diffusion run in Kelvin. 802 803 Returns: 804 Conversion factor. 805 Conductivity (in mS/cm) = Conversion Factor * Diffusivity (in cm^2/s) 806 """ 807 df_sp = get_el_sp(species) 808 if hasattr(df_sp, "oxi_state"): 809 z = df_sp.oxi_state 810 else: 811 z = df_sp.full_electronic_structure[-1][2] 812 813 n = structure.composition[species] 814 815 vol = structure.volume * 1e-24 # units cm^3 816 return 1000 * n / (vol * const.N_A) * z ** 2 * (const.N_A * const.e) ** 2 / (const.R * temperature) 817 818 819def _get_vasprun(args): 820 """ 821 Internal method to support multiprocessing. 822 """ 823 return Vasprun(args[0], ionic_step_skip=args[1], parse_dos=False, parse_eigen=False) 824 825 826def fit_arrhenius(temps, diffusivities): 827 """ 828 Returns Ea, c, standard error of Ea from the Arrhenius fit: 829 D = c * exp(-Ea/kT) 830 831 Args: 832 temps ([float]): A sequence of temperatures. units: K 833 diffusivities ([float]): A sequence of diffusivities (e.g., 834 from DiffusionAnalyzer.diffusivity). units: cm^2/s 835 """ 836 t_1 = 1 / np.array(temps) 837 logd = np.log(diffusivities) 838 # Do a least squares regression of log(D) vs 1/T 839 a = np.array([t_1, np.ones(len(temps))]).T 840 w, res, _, _ = np.linalg.lstsq(a, logd, rcond=None) 841 w = np.array(w) 842 n = len(temps) 843 if n > 2: 844 std_Ea = (res[0] / (n - 2) / (n * np.var(t_1))) ** 0.5 * const.k / const.e 845 else: 846 std_Ea = None 847 return -w[0] * const.k / const.e, np.exp(w[1]), std_Ea 848 849 850def get_extrapolated_diffusivity(temps, diffusivities, new_temp): 851 """ 852 Returns (Arrhenius) extrapolated diffusivity at new_temp 853 854 Args: 855 temps ([float]): A sequence of temperatures. units: K 856 diffusivities ([float]): A sequence of diffusivities (e.g., 857 from DiffusionAnalyzer.diffusivity). units: cm^2/s 858 new_temp (float): desired temperature. units: K 859 860 Returns: 861 (float) Diffusivity at extrapolated temp in mS/cm. 862 """ 863 Ea, c, _ = fit_arrhenius(temps, diffusivities) 864 return c * np.exp(-Ea / (const.k / const.e * new_temp)) 865 866 867def get_extrapolated_conductivity(temps, diffusivities, new_temp, structure, species): 868 """ 869 Returns extrapolated mS/cm conductivity. 870 871 Args: 872 temps ([float]): A sequence of temperatures. units: K 873 diffusivities ([float]): A sequence of diffusivities (e.g., 874 from DiffusionAnalyzer.diffusivity). units: cm^2/s 875 new_temp (float): desired temperature. units: K 876 structure (structure): Structure used for the diffusivity calculation 877 species (string/Species): conducting species 878 879 Returns: 880 (float) Conductivity at extrapolated temp in mS/cm. 881 """ 882 return get_extrapolated_diffusivity(temps, diffusivities, new_temp) * get_conversion_factor( 883 structure, species, new_temp 884 ) 885 886 887def get_arrhenius_plot(temps, diffusivities, diffusivity_errors=None, **kwargs): 888 r""" 889 Returns an Arrhenius plot. 890 891 Args: 892 temps ([float]): A sequence of temperatures. 893 diffusivities ([float]): A sequence of diffusivities (e.g., 894 from DiffusionAnalyzer.diffusivity). 895 diffusivity_errors ([float]): A sequence of errors for the 896 diffusivities. If None, no error bar is plotted. 897 \\*\\*kwargs: 898 Any keyword args supported by matplotlib.pyplot.plot. 899 900 Returns: 901 A matplotlib.pyplot object. Do plt.show() to show the plot. 902 """ 903 Ea, c, _ = fit_arrhenius(temps, diffusivities) 904 905 from pymatgen.util.plotting import pretty_plot 906 907 plt = pretty_plot(12, 8) 908 909 # log10 of the arrhenius fit 910 arr = c * np.exp(-Ea / (const.k / const.e * np.array(temps))) 911 912 t_1 = 1000 / np.array(temps) 913 914 plt.plot(t_1, diffusivities, "ko", t_1, arr, "k--", markersize=10, **kwargs) 915 if diffusivity_errors is not None: 916 n = len(diffusivity_errors) 917 plt.errorbar( 918 t_1[0:n], 919 diffusivities[0:n], 920 yerr=diffusivity_errors, 921 fmt="ko", 922 ecolor="k", 923 capthick=2, 924 linewidth=2, 925 ) 926 ax = plt.axes() 927 ax.set_yscale("log") 928 plt.text( 929 0.6, 930 0.85, 931 "E$_a$ = {:.0f} meV".format(Ea * 1000), 932 fontsize=30, 933 transform=plt.axes().transAxes, 934 ) 935 plt.ylabel("D (cm$^2$/s)") 936 plt.xlabel("1000/T (K$^{-1}$)") 937 plt.tight_layout() 938 return plt 939