1# coding: utf-8
2# Copyright (c) Pymatgen Development Team.
3# Distributed under the terms of the MIT License.
4
5"""
6A module to perform diffusion analyses (e.g. calculating diffusivity from
7mean square displacements etc.). If you use this module, please consider
8citing the following papers::
9
10    Ong, S. P., Mo, Y., Richards, W. D., Miara, L., Lee, H. S., & Ceder, G.
11    (2013). Phase stability, electrochemical stability and ionic conductivity
12    of the Li10+-1MP2X12 (M = Ge, Si, Sn, Al or P, and X = O, S or Se) family
13    of superionic conductors. Energy & Environmental Science, 6(1), 148.
14    doi:10.1039/c2ee23355j
15
16    Mo, Y., Ong, S. P., & Ceder, G. (2012). First Principles Study of the
17    Li10GeP2S12 Lithium Super Ionic Conductor Material. Chemistry of Materials,
18    24(1), 15-17. doi:10.1021/cm203303y
19"""
20
21
22import multiprocessing
23import warnings
24
25import numpy as np
26import scipy.constants as const
27from monty.json import MSONable
28
29from pymatgen.analysis.structure_matcher import (
30    OrderDisorderElementComparator,
31    StructureMatcher,
32)
33from pymatgen.core.periodic_table import get_el_sp
34from pymatgen.core.structure import Structure
35from pymatgen.io.vasp.outputs import Vasprun
36from pymatgen.util.coord import pbc_diff
37
38__author__ = "Will Richards, Shyue Ping Ong"
39__version__ = "0.2"
40__maintainer__ = "Will Richards"
41__email__ = "wrichard@mit.edu"
42__status__ = "Beta"
43__date__ = "5/2/13"
44
45warnings.warn(
46    "All code in pymatgen.analysis.diffusion_analyzer has been moved to the separate add-on package"
47    "pymatgen-diffusion, which also includes a lot more functionality for analyzing diffusion"
48    "calculations. This module here is retained for backwards compatibility. It will be removed from"
49    "2022.1.1.",
50    FutureWarning,
51)
52
53
54class DiffusionAnalyzer(MSONable):
55    """
56    Class for performing diffusion analysis.
57
58    .. attribute: diffusivity
59
60        Diffusivity in cm^2 / s
61
62    .. attribute: chg_diffusivity
63
64        Charge diffusivity in cm^2 / s
65
66    .. attribute: conductivity
67
68        Conductivity in mS / cm
69
70    .. attribute: chg_conductivity
71
72        Conductivity derived from Nernst-Einstein equation using charge
73        diffusivity, in mS / cm
74
75    .. attribute: diffusivity_components
76
77        A vector with diffusivity in the a, b and c directions in cm^2 / s
78
79    .. attribute: conductivity_components
80
81        A vector with conductivity in the a, b and c directions in mS / cm
82
83    .. attribute: diffusivity_std_dev
84
85        Std dev in diffusivity in cm^2 / s. Note that this makes sense only
86        for non-smoothed analyses.
87
88    .. attribute: chg_diffusivity_std_dev
89
90        Std dev in charge diffusivity in cm^2 / s. Note that this makes sense only
91        for non-smoothed analyses.
92
93    .. attribute: conductivity_std_dev
94
95        Std dev in conductivity in mS / cm. Note that this makes sense only
96        for non-smoothed analyses.
97
98    .. attribute: diffusivity_components_std_dev
99
100        A vector with std dev. in diffusivity in the a, b and c directions in
101        cm^2 / cm. Note that this makes sense only for non-smoothed analyses.
102
103    .. attribute: conductivity_components_std_dev
104
105        A vector with std dev. in conductivity in the a, b and c directions
106        in mS / cm. Note that this makes sense only for non-smoothed analyses.
107
108    .. attribute: max_framework_displacement
109
110        The maximum (drift adjusted) distance of any framework atom from its
111        starting location in A.
112
113    .. attribute: max_ion_displacements
114
115        nions x 1 array of the maximum displacement of each individual ion.
116
117    .. attribute: msd
118
119        nsteps x 1 array of the mean square displacement of specie.
120
121    .. attribute: mscd
122
123        nsteps x 1 array of the mean square charge displacement of specie.
124
125    .. attribute: msd_components
126
127        nsteps x 3 array of the MSD in each lattice direction of specie.
128
129    .. attribute: sq_disp_ions
130
131        The square displacement of all ion (both specie and other ions) as a
132        nions x nsteps array.
133
134    .. attribute: dt
135
136        Time coordinate array.
137
138    .. attribute: haven_ratio
139        Haven ratio defined as diffusivity / chg_diffusivity.
140    """
141
142    def __init__(
143        self,
144        structure,
145        displacements,
146        specie,
147        temperature,
148        time_step,
149        step_skip,
150        smoothed="max",
151        min_obs=30,
152        avg_nsteps=1000,
153        lattices=None,
154    ):
155        """
156        This constructor is meant to be used with pre-processed data.
157        Other convenient constructors are provided as class methods (see
158        from_vaspruns and from_files).
159
160        Given a matrix of displacements (see arguments below for expected
161        format), the diffusivity is given by::
162
163            D = 1 / 2dt * <mean square displacement>
164
165        where d is the dimensionality, t is the time. To obtain a reliable
166        diffusion estimate, a least squares regression of the MSD against
167        time to obtain the slope, which is then related to the diffusivity.
168
169        For traditional analysis, use smoothed=False and weighted=False.
170
171        Args:
172            structure (Structure): Initial structure.
173            displacements (array): Numpy array of with shape [site,
174                time step, axis]
175            specie (Element/Species): Species to calculate diffusivity for as a
176                String. E.g., "Li".
177            temperature (float): Temperature of the diffusion run in Kelvin.
178            time_step (int): Time step between measurements.
179            step_skip (int): Sampling frequency of the displacements (
180                time_step is multiplied by this number to get the real time
181                between measurements)
182            smoothed (str): Whether to smooth the MSD, and what mode to smooth.
183                Supported modes are:
184
185                i. "max", which tries to use the maximum #
186                   of data points for each time origin, subject to a
187                   minimum # of observations given by min_obs, and then
188                   weights the observations based on the variance
189                   accordingly. This is the default.
190                ii. "constant", in which each timestep is averaged over
191                    the number of time_steps given by min_steps.
192                iii. None / False / any other false-like quantity. No
193                   smoothing.
194
195            min_obs (int): Used with smoothed="max". Minimum number of
196                observations to have before including in the MSD vs dt
197                calculation. E.g. If a structure has 10 diffusing atoms,
198                and min_obs = 30, the MSD vs dt will be
199                calculated up to dt = total_run_time / 3, so that each
200                diffusing atom is measured at least 3 uncorrelated times.
201                Only applies in smoothed="max".
202            avg_nsteps (int): Used with smoothed="constant". Determines the
203                number of time steps to average over to get the msd for each
204                timestep. Default of 1000 is usually pretty good.
205            lattices (array): Numpy array of lattice matrix of every step. Used
206                for NPT-AIMD. For NVT-AIMD, the lattice at each time step is
207                set to the lattice in the "structure" argument.
208        """
209        self.structure = structure
210        self.disp = displacements
211        self.specie = specie
212        self.temperature = temperature
213        self.time_step = time_step
214        self.step_skip = step_skip
215        self.min_obs = min_obs
216        self.smoothed = smoothed
217        self.avg_nsteps = avg_nsteps
218        self.lattices = lattices
219
220        if lattices is None:
221            self.lattices = np.array([structure.lattice.matrix.tolist()])
222
223        indices = []
224        framework_indices = []
225        for i, site in enumerate(structure):
226            if site.specie.symbol == specie:
227                indices.append(i)
228            else:
229                framework_indices.append(i)
230        if self.disp.shape[1] < 2:
231            self.diffusivity = 0.0
232            self.conductivity = 0.0
233            self.diffusivity_components = np.array([0.0, 0.0, 0.0])
234            self.conductivity_components = np.array([0.0, 0.0, 0.0])
235            self.max_framework_displacement = 0
236        else:
237            framework_disp = self.disp[framework_indices]
238            drift = np.average(framework_disp, axis=0)[None, :, :]
239
240            # drift corrected position
241            dc = self.disp - drift
242
243            nions, nsteps, dim = dc.shape
244
245            if not smoothed:
246                timesteps = np.arange(0, nsteps)
247            elif smoothed == "constant":
248                if nsteps <= avg_nsteps:
249                    raise ValueError("Not enough data to calculate diffusivity")
250                timesteps = np.arange(0, nsteps - avg_nsteps)
251            else:
252                # limit the number of sampled timesteps to 200
253                min_dt = int(1000 / (self.step_skip * self.time_step))
254                max_dt = min(len(indices) * nsteps // self.min_obs, nsteps)
255                if min_dt >= max_dt:
256                    raise ValueError("Not enough data to calculate diffusivity")
257                timesteps = np.arange(min_dt, max_dt, max(int((max_dt - min_dt) / 200), 1))
258
259            dt = timesteps * self.time_step * self.step_skip
260
261            # calculate the smoothed msd values
262            msd = np.zeros_like(dt, dtype=np.double)
263            sq_disp_ions = np.zeros((len(dc), len(dt)), dtype=np.double)
264            msd_components = np.zeros(dt.shape + (3,))
265
266            # calculate mean square charge displacement
267            mscd = np.zeros_like(msd, dtype=np.double)
268
269            for i, n in enumerate(timesteps):
270                if not smoothed:
271                    dx = dc[:, i : i + 1, :]
272                    dcomponents = dc[:, i : i + 1, :]
273                elif smoothed == "constant":
274                    dx = dc[:, i : i + avg_nsteps, :] - dc[:, 0:avg_nsteps, :]
275                    dcomponents = dc[:, i : i + avg_nsteps, :] - dc[:, 0:avg_nsteps, :]
276                else:
277                    dx = dc[:, n:, :] - dc[:, :-n, :]
278                    dcomponents = dc[:, n:, :] - dc[:, :-n, :]
279
280                # Get msd
281                sq_disp = dx ** 2
282                sq_disp_ions[:, i] = np.average(np.sum(sq_disp, axis=2), axis=1)
283                msd[i] = np.average(sq_disp_ions[:, i][indices])
284
285                msd_components[i] = np.average(dcomponents[indices] ** 2, axis=(0, 1))
286
287                # Get mscd
288                sq_chg_disp = np.sum(dx[indices, :, :], axis=0) ** 2
289                mscd[i] = np.average(np.sum(sq_chg_disp, axis=1), axis=0) / len(indices)
290
291            def weighted_lstsq(a, b):
292                if smoothed == "max":
293                    # For max smoothing, we need to weight by variance.
294                    w_root = (1 / dt) ** 0.5
295                    return np.linalg.lstsq(a * w_root[:, None], b * w_root, rcond=None)
296                return np.linalg.lstsq(a, b, rcond=None)
297
298            # Get self diffusivity
299            m_components = np.zeros(3)
300            m_components_res = np.zeros(3)
301            a = np.ones((len(dt), 2))
302            a[:, 0] = dt
303            for i in range(3):
304                (m, c), res, rank, s = weighted_lstsq(a, msd_components[:, i])
305                m_components[i] = max(m, 1e-15)
306                m_components_res[i] = res[0]
307
308            (m, c), res, rank, s = weighted_lstsq(a, msd)
309            # m shouldn't be negative
310            m = max(m, 1e-15)
311
312            # Get also the charge diffusivity
313            (m_chg, c_chg), res_chg, _, _ = weighted_lstsq(a, mscd)
314            # m shouldn't be negative
315            m_chg = max(m_chg, 1e-15)
316
317            # factor of 10 is to convert from A^2/fs to cm^2/s
318            # factor of 6 is for dimensionality
319            conv_factor = get_conversion_factor(self.structure, self.specie, self.temperature)
320            self.diffusivity = m / 60
321            self.chg_diffusivity = m_chg / 60
322
323            # Calculate the error in the diffusivity using the error in the
324            # slope from the lst sq.
325            # Variance in slope = n * Sum Squared Residuals / (n * Sxx - Sx
326            # ** 2) / (n-2).
327            n = len(dt)
328
329            # Pre-compute the denominator since we will use it later.
330            # We divide dt by 1000 to avoid overflow errors in some systems (
331            # e.g., win). This is subsequently corrected where denom is used.
332            denom = (n * np.sum((dt / 1000) ** 2) - np.sum(dt / 1000) ** 2) * (n - 2)
333            self.diffusivity_std_dev = np.sqrt(n * res[0] / denom) / 60 / 1000
334            self.chg_diffusivity_std_dev = np.sqrt(n * res_chg[0] / denom) / 60 / 1000
335            self.conductivity = self.diffusivity * conv_factor
336            self.chg_conductivity = self.chg_diffusivity * conv_factor
337            self.conductivity_std_dev = self.diffusivity_std_dev * conv_factor
338
339            self.diffusivity_components = m_components / 20
340            self.diffusivity_components_std_dev = np.sqrt(n * m_components_res / denom) / 20 / 1000
341            self.conductivity_components = self.diffusivity_components * conv_factor
342            self.conductivity_components_std_dev = self.diffusivity_components_std_dev * conv_factor
343
344            # Drift and displacement information.
345            self.drift = drift
346            self.corrected_displacements = dc
347            self.max_ion_displacements = np.max(np.sum(dc ** 2, axis=-1) ** 0.5, axis=1)
348            self.max_framework_displacement = np.max(self.max_ion_displacements[framework_indices])
349            self.msd = msd
350            self.mscd = mscd
351            self.haven_ratio = self.diffusivity / self.chg_diffusivity
352            self.sq_disp_ions = sq_disp_ions
353            self.msd_components = msd_components
354            self.dt = dt
355            self.indices = indices
356            self.framework_indices = framework_indices
357
358    def get_drift_corrected_structures(self, start=None, stop=None, step=None):
359        """
360        Returns an iterator for the drift-corrected structures. Use of
361        iterator is to reduce memory usage as # of structures in MD can be
362        huge. You don't often need all the structures all at once.
363
364        Args:
365            start, stop, step (int): applies a start/stop/step to the iterator.
366                Faster than applying it after generation, as it reduces the
367                number of structures created.
368        """
369        coords = np.array(self.structure.cart_coords)
370        species = self.structure.species_and_occu
371        lattices = self.lattices
372        nsites, nsteps, dim = self.corrected_displacements.shape
373
374        for i in range(start or 0, stop or nsteps, step or 1):
375            latt = lattices[0] if len(lattices) == 1 else lattices[i]
376            yield Structure(
377                latt,
378                species,
379                coords + self.corrected_displacements[:, i, :],
380                coords_are_cartesian=True,
381            )
382
383    def get_summary_dict(self, include_msd_t=False, include_mscd_t=False):
384        """
385        Provides a summary of diffusion information.
386
387        Args:
388            include_msd_t (bool): Whether to include mean square displace and
389                time data with the data.
390            include_msd_t (bool): Whether to include mean square charge displace and
391                time data with the data.
392
393        Returns:
394            (dict) of diffusion and conductivity data.
395        """
396        d = {
397            "D": self.diffusivity,
398            "D_sigma": self.diffusivity_std_dev,
399            "D_charge": self.chg_diffusivity,
400            "D_charge_sigma": self.chg_diffusivity_std_dev,
401            "S": self.conductivity,
402            "S_sigma": self.conductivity_std_dev,
403            "S_charge": self.chg_conductivity,
404            "D_components": self.diffusivity_components.tolist(),
405            "S_components": self.conductivity_components.tolist(),
406            "D_components_sigma": self.diffusivity_components_std_dev.tolist(),
407            "S_components_sigma": self.conductivity_components_std_dev.tolist(),
408            "specie": str(self.specie),
409            "step_skip": self.step_skip,
410            "time_step": self.time_step,
411            "temperature": self.temperature,
412            "max_framework_displacement": self.max_framework_displacement,
413            "Haven_ratio": self.haven_ratio,
414        }
415        if include_msd_t:
416            d["msd"] = self.msd.tolist()
417            d["msd_components"] = self.msd_components.tolist()
418            d["dt"] = self.dt.tolist()
419        if include_mscd_t:
420            d["mscd"] = self.mscd.tolist()
421        return d
422
423    def get_framework_rms_plot(self, plt=None, granularity=200, matching_s=None):
424        """
425        Get the plot of rms framework displacement vs time. Useful for checking
426        for melting, especially if framework atoms can move via paddle-wheel
427        or similar mechanism (which would show up in max framework displacement
428        but doesn't constitute melting).
429
430        Args:
431            plt (matplotlib.pyplot): If plt is supplied, changes will be made
432                to an existing plot. Otherwise, a new plot will be created.
433            granularity (int): Number of structures to match
434            matching_s (Structure): Optionally match to a disordered structure
435                instead of the first structure in the analyzer. Required when
436                a secondary mobile ion is present.
437        Notes:
438            The method doesn't apply to NPT-AIMD simulation analysis.
439        """
440        from pymatgen.util.plotting import pretty_plot
441
442        if self.lattices is not None and len(self.lattices) > 1:
443            warnings.warn("Note the method doesn't apply to NPT-AIMD " "simulation analysis!")
444
445        plt = pretty_plot(12, 8, plt=plt)
446        step = (self.corrected_displacements.shape[1] - 1) // (granularity - 1)
447        f = (matching_s or self.structure).copy()
448        f.remove_species([self.specie])
449        sm = StructureMatcher(
450            primitive_cell=False,
451            stol=0.6,
452            comparator=OrderDisorderElementComparator(),
453            allow_subset=True,
454        )
455        rms = []
456        for s in self.get_drift_corrected_structures(step=step):
457            s.remove_species([self.specie])
458            d = sm.get_rms_dist(f, s)
459            if d:
460                rms.append(d)
461            else:
462                rms.append((1, 1))
463        max_dt = (len(rms) - 1) * step * self.step_skip * self.time_step
464        if max_dt > 100000:
465            plot_dt = np.linspace(0, max_dt / 1000, len(rms))
466            unit = "ps"
467        else:
468            plot_dt = np.linspace(0, max_dt, len(rms))
469            unit = "fs"
470        rms = np.array(rms)
471        plt.plot(plot_dt, rms[:, 0], label="RMS")
472        plt.plot(plot_dt, rms[:, 1], label="max")
473        plt.legend(loc="best")
474        plt.xlabel("Timestep ({})".format(unit))
475        plt.ylabel("normalized distance")
476        plt.tight_layout()
477        return plt
478
479    def get_msd_plot(self, plt=None, mode="specie"):
480        """
481        Get the plot of the smoothed msd vs time graph. Useful for
482        checking convergence. This can be written to an image file.
483
484        Args:
485            plt: A plot object. Defaults to None, which means one will be
486                generated.
487            mode (str): Determines type of msd plot. By "species", "sites",
488                or direction (default). If mode = "mscd", the smoothed mscd vs.
489                time will be plotted.
490        """
491        from pymatgen.util.plotting import pretty_plot
492
493        plt = pretty_plot(12, 8, plt=plt)
494        if np.max(self.dt) > 100000:
495            plot_dt = self.dt / 1000
496            unit = "ps"
497        else:
498            plot_dt = self.dt
499            unit = "fs"
500
501        if mode == "species":
502            for sp in sorted(self.structure.composition.keys()):
503                indices = [i for i, site in enumerate(self.structure) if site.specie == sp]
504                sd = np.average(self.sq_disp_ions[indices, :], axis=0)
505                plt.plot(plot_dt, sd, label=sp.__str__())
506            plt.legend(loc=2, prop={"size": 20})
507        elif mode == "sites":
508            for i, site in enumerate(self.structure):
509                sd = self.sq_disp_ions[i, :]
510                plt.plot(plot_dt, sd, label="%s - %d" % (site.specie.__str__(), i))
511            plt.legend(loc=2, prop={"size": 20})
512        elif mode == "mscd":
513            plt.plot(plot_dt, self.mscd, "r")
514            plt.legend(["Overall"], loc=2, prop={"size": 20})
515        else:
516            # Handle default / invalid mode case
517            plt.plot(plot_dt, self.msd, "k")
518            plt.plot(plot_dt, self.msd_components[:, 0], "r")
519            plt.plot(plot_dt, self.msd_components[:, 1], "g")
520            plt.plot(plot_dt, self.msd_components[:, 2], "b")
521            plt.legend(["Overall", "a", "b", "c"], loc=2, prop={"size": 20})
522
523        plt.xlabel("Timestep ({})".format(unit))
524        if mode == "mscd":
525            plt.ylabel("MSCD ($\\AA^2$)")
526        else:
527            plt.ylabel("MSD ($\\AA^2$)")
528        plt.tight_layout()
529        return plt
530
531    def plot_msd(self, mode="default"):
532        """
533        Plot the smoothed msd vs time graph. Useful for checking convergence.
534
535        Args:
536            mode (str): Can be "default" (the default, shows only the MSD for
537                the diffusing specie, and its components), "ions" (individual
538                square displacements of all ions), "species" (mean square
539                displacement by specie), or "mscd" (overall mean square charge
540                displacement for diffusing specie).
541        """
542        self.get_msd_plot(mode=mode).show()
543
544    def export_msdt(self, filename):
545        """
546        Writes MSD data to a csv file that can be easily plotted in other
547        software.
548
549        Args:
550            filename (str): Filename. Supported formats are csv and dat. If
551                the extension is csv, a csv file is written. Otherwise,
552                a dat format is assumed.
553        """
554        fmt = "csv" if filename.lower().endswith(".csv") else "dat"
555        delimiter = ", " if fmt == "csv" else " "
556        with open(filename, "wt") as f:
557            if fmt == "dat":
558                f.write("# ")
559            f.write(delimiter.join(["t", "MSD", "MSD_a", "MSD_b", "MSD_c", "MSCD"]))
560            f.write("\n")
561            for dt, msd, msdc, mscd in zip(self.dt, self.msd, self.msd_components, self.mscd):
562                f.write(delimiter.join(["%s" % v for v in [dt, msd] + list(msdc) + [mscd]]))
563                f.write("\n")
564
565    @classmethod
566    def from_structures(
567        cls, structures, specie, temperature, time_step, step_skip, initial_disp=None, initial_structure=None, **kwargs
568    ):
569        r"""
570        Convenient constructor that takes in a list of Structure objects to
571        perform diffusion analysis.
572
573        Args:
574            structures ([Structure]): list of Structure objects (must be
575                ordered in sequence of run). E.g., you may have performed
576                sequential VASP runs to obtain sufficient statistics.
577            specie (Element/Species): Species to calculate diffusivity for as a
578                String. E.g., "Li".
579            temperature (float): Temperature of the diffusion run in Kelvin.
580            time_step (int): Time step between measurements.
581            step_skip (int): Sampling frequency of the displacements (
582                time_step is multiplied by this number to get the real time
583                between measurements)
584            initial_disp (np.ndarray): Sometimes, you need to iteratively
585                compute estimates of the diffusivity. This supplies an
586                initial displacement that will be added on to the initial
587                displacements. Note that this makes sense only when
588                smoothed=False.
589            initial_structure (Structure): Like initial_disp, this is used
590                for iterative computations of estimates of the diffusivity. You
591                typically need to supply both variables. This stipulates the
592                initial structure from which the current set of displacements
593                are computed.
594            \\*\\*kwargs: kwargs supported by the :class:`DiffusionAnalyzer`_.
595                Examples include smoothed, min_obs, avg_nsteps.
596        """
597        p, l = [], []
598        for i, s in enumerate(structures):
599            if i == 0:
600                structure = s
601            p.append(np.array(s.frac_coords)[:, None])
602            l.append(s.lattice.matrix)
603        if initial_structure is not None:
604            p.insert(0, np.array(initial_structure.frac_coords)[:, None])
605            l.insert(0, initial_structure.lattice.matrix)
606        else:
607            p.insert(0, p[0])
608            l.insert(0, l[0])
609
610        p = np.concatenate(p, axis=1)
611        dp = p[:, 1:] - p[:, :-1]
612        dp = dp - np.round(dp)
613        f_disp = np.cumsum(dp, axis=1)
614        c_disp = []
615        for i in f_disp:
616            c_disp.append([np.dot(d, m) for d, m in zip(i, l[1:])])
617        disp = np.array(c_disp)
618
619        # If is NVT-AIMD, clear lattice data.
620        if np.array_equal(l[0], l[-1]):
621            l = np.array([l[0]])
622        else:
623            l = np.array(l)
624        if initial_disp is not None:
625            disp += initial_disp[:, None, :]
626
627        return cls(structure, disp, specie, temperature, time_step, step_skip=step_skip, lattices=l, **kwargs)
628
629    @classmethod
630    def from_vaspruns(cls, vaspruns, specie, initial_disp=None, initial_structure=None, **kwargs):
631        r"""
632        Convenient constructor that takes in a list of Vasprun objects to
633        perform diffusion analysis.
634
635        Args:
636            vaspruns ([Vasprun]): List of Vaspruns (must be ordered  in
637                sequence of MD simulation). E.g., you may have performed
638                sequential VASP runs to obtain sufficient statistics.
639            specie (Element/Species): Species to calculate diffusivity for as a
640                String. E.g., "Li".
641            initial_disp (np.ndarray): Sometimes, you need to iteratively
642                compute estimates of the diffusivity. This supplies an
643                initial displacement that will be added on to the initial
644                displacements. Note that this makes sense only when
645                smoothed=False.
646            initial_structure (Structure): Like initial_disp, this is used
647                for iterative computations of estimates of the diffusivity. You
648                typically need to supply both variables. This stipulates the
649                initial stricture from which the current set of displacements
650                are computed.
651            \\*\\*kwargs: kwargs supported by the :class:`DiffusionAnalyzer`_.
652                Examples include smoothed, min_obs, avg_nsteps.
653        """
654
655        def get_structures(vaspruns):
656            for i, vr in enumerate(vaspruns):
657                if i == 0:
658                    step_skip = vr.ionic_step_skip or 1
659                    final_structure = vr.initial_structure
660                    temperature = vr.parameters["TEEND"]
661                    time_step = vr.parameters["POTIM"]
662                    yield step_skip, temperature, time_step
663                # check that the runs are continuous
664                fdist = pbc_diff(vr.initial_structure.frac_coords, final_structure.frac_coords)
665                if np.any(fdist > 0.001):
666                    raise ValueError("initial and final structures do not " "match.")
667                final_structure = vr.final_structure
668
669                assert (vr.ionic_step_skip or 1) == step_skip
670                for s in vr.ionic_steps:
671                    yield s["structure"]
672
673        s = get_structures(vaspruns)
674        step_skip, temperature, time_step = next(s)
675
676        return cls.from_structures(
677            structures=list(s),
678            specie=specie,
679            temperature=temperature,
680            time_step=time_step,
681            step_skip=step_skip,
682            initial_disp=initial_disp,
683            initial_structure=initial_structure,
684            **kwargs,
685        )
686
687    @classmethod
688    def from_files(
689        cls, filepaths, specie, step_skip=10, ncores=None, initial_disp=None, initial_structure=None, **kwargs
690    ):
691        r"""
692        Convenient constructor that takes in a list of vasprun.xml paths to
693        perform diffusion analysis.
694
695        Args:
696            filepaths ([str]): List of paths to vasprun.xml files of runs. (
697                must be ordered in sequence of MD simulation). For example,
698                you may have done sequential VASP runs and they are in run1,
699                run2, run3, etc. You should then pass in
700                ["run1/vasprun.xml", "run2/vasprun.xml", ...].
701            specie (Element/Species): Species to calculate diffusivity for as a
702                String. E.g., "Li".
703            step_skip (int): Sampling frequency of the displacements (
704                time_step is multiplied by this number to get the real time
705                between measurements)
706            ncores (int): Numbers of cores to use for multiprocessing. Can
707                speed up vasprun parsing considerably. Defaults to None,
708                which means serial. It should be noted that if you want to
709                use multiprocessing, the number of ionic steps in all vasprun
710                .xml files should be a multiple of the ionic_step_skip.
711                Otherwise, inconsistent results may arise. Serial mode has no
712                such restrictions.
713            initial_disp (np.ndarray): Sometimes, you need to iteratively
714                compute estimates of the diffusivity. This supplies an
715                initial displacement that will be added on to the initial
716                displacements. Note that this makes sense only when
717                smoothed=False.
718            initial_structure (Structure): Like initial_disp, this is used
719                for iterative computations of estimates of the diffusivity. You
720                typically need to supply both variables. This stipulates the
721                initial structure from which the current set of displacements
722                are computed.
723            \\*\\*kwargs: kwargs supported by the :class:`DiffusionAnalyzer`_.
724                Examples include smoothed, min_obs, avg_nsteps.
725        """
726        if ncores is not None and len(filepaths) > 1:
727            p = multiprocessing.Pool(ncores)  # pylint: disable=R1732
728            vaspruns = p.imap(_get_vasprun, [(fp, step_skip) for fp in filepaths])
729            analyzer = cls.from_vaspruns(
730                vaspruns, specie=specie, initial_disp=initial_disp, initial_structure=initial_structure, **kwargs
731            )
732            p.close()
733            p.join()
734            return analyzer
735
736        def vr(filepaths):
737            offset = 0
738            for p in filepaths:
739                v = Vasprun(p, ionic_step_offset=offset, ionic_step_skip=step_skip)
740                yield v
741                # Recompute offset.
742                offset = (-(v.nionic_steps - offset)) % step_skip
743
744        return cls.from_vaspruns(
745            vr(filepaths), specie=specie, initial_disp=initial_disp, initial_structure=initial_structure, **kwargs
746        )
747
748    def as_dict(self):
749        """
750        Returns: MSONable dict
751        """
752        return {
753            "@module": self.__class__.__module__,
754            "@class": self.__class__.__name__,
755            "structure": self.structure.as_dict(),
756            "displacements": self.disp.tolist(),
757            "specie": self.specie,
758            "temperature": self.temperature,
759            "time_step": self.time_step,
760            "step_skip": self.step_skip,
761            "min_obs": self.min_obs,
762            "smoothed": self.smoothed,
763            "avg_nsteps": self.avg_nsteps,
764            "lattices": self.lattices.tolist(),
765        }
766
767    @classmethod
768    def from_dict(cls, d):
769        """
770        Args:
771            d (dict): Dict representation
772
773        Returns: DiffusionAnalyzer
774        """
775        structure = Structure.from_dict(d["structure"])
776        return cls(
777            structure,
778            np.array(d["displacements"]),
779            specie=d["specie"],
780            temperature=d["temperature"],
781            time_step=d["time_step"],
782            step_skip=d["step_skip"],
783            min_obs=d["min_obs"],
784            smoothed=d.get("smoothed", "max"),
785            avg_nsteps=d.get("avg_nsteps", 1000),
786            lattices=np.array(d.get("lattices", [d["structure"]["lattice"]["matrix"]])),
787        )
788
789
790def get_conversion_factor(structure, species, temperature):
791    """
792    Conversion factor to convert between cm^2/s diffusivity measurements and
793    mS/cm conductivity measurements based on number of atoms of diffusing
794    species. Note that the charge is based on the oxidation state of the
795    species (where available), or else the number of valence electrons
796    (usually a good guess, esp for main group ions).
797
798    Args:
799        structure (Structure): Input structure.
800        species (Element/Species): Diffusing species.
801        temperature (float): Temperature of the diffusion run in Kelvin.
802
803    Returns:
804        Conversion factor.
805        Conductivity (in mS/cm) = Conversion Factor * Diffusivity (in cm^2/s)
806    """
807    df_sp = get_el_sp(species)
808    if hasattr(df_sp, "oxi_state"):
809        z = df_sp.oxi_state
810    else:
811        z = df_sp.full_electronic_structure[-1][2]
812
813    n = structure.composition[species]
814
815    vol = structure.volume * 1e-24  # units cm^3
816    return 1000 * n / (vol * const.N_A) * z ** 2 * (const.N_A * const.e) ** 2 / (const.R * temperature)
817
818
819def _get_vasprun(args):
820    """
821    Internal method to support multiprocessing.
822    """
823    return Vasprun(args[0], ionic_step_skip=args[1], parse_dos=False, parse_eigen=False)
824
825
826def fit_arrhenius(temps, diffusivities):
827    """
828    Returns Ea, c, standard error of Ea from the Arrhenius fit:
829        D = c * exp(-Ea/kT)
830
831    Args:
832        temps ([float]): A sequence of temperatures. units: K
833        diffusivities ([float]): A sequence of diffusivities (e.g.,
834            from DiffusionAnalyzer.diffusivity). units: cm^2/s
835    """
836    t_1 = 1 / np.array(temps)
837    logd = np.log(diffusivities)
838    # Do a least squares regression of log(D) vs 1/T
839    a = np.array([t_1, np.ones(len(temps))]).T
840    w, res, _, _ = np.linalg.lstsq(a, logd, rcond=None)
841    w = np.array(w)
842    n = len(temps)
843    if n > 2:
844        std_Ea = (res[0] / (n - 2) / (n * np.var(t_1))) ** 0.5 * const.k / const.e
845    else:
846        std_Ea = None
847    return -w[0] * const.k / const.e, np.exp(w[1]), std_Ea
848
849
850def get_extrapolated_diffusivity(temps, diffusivities, new_temp):
851    """
852    Returns (Arrhenius) extrapolated diffusivity at new_temp
853
854    Args:
855        temps ([float]): A sequence of temperatures. units: K
856        diffusivities ([float]): A sequence of diffusivities (e.g.,
857            from DiffusionAnalyzer.diffusivity). units: cm^2/s
858        new_temp (float): desired temperature. units: K
859
860    Returns:
861        (float) Diffusivity at extrapolated temp in mS/cm.
862    """
863    Ea, c, _ = fit_arrhenius(temps, diffusivities)
864    return c * np.exp(-Ea / (const.k / const.e * new_temp))
865
866
867def get_extrapolated_conductivity(temps, diffusivities, new_temp, structure, species):
868    """
869    Returns extrapolated mS/cm conductivity.
870
871    Args:
872        temps ([float]): A sequence of temperatures. units: K
873        diffusivities ([float]): A sequence of diffusivities (e.g.,
874            from DiffusionAnalyzer.diffusivity). units: cm^2/s
875        new_temp (float): desired temperature. units: K
876        structure (structure): Structure used for the diffusivity calculation
877        species (string/Species): conducting species
878
879    Returns:
880        (float) Conductivity at extrapolated temp in mS/cm.
881    """
882    return get_extrapolated_diffusivity(temps, diffusivities, new_temp) * get_conversion_factor(
883        structure, species, new_temp
884    )
885
886
887def get_arrhenius_plot(temps, diffusivities, diffusivity_errors=None, **kwargs):
888    r"""
889    Returns an Arrhenius plot.
890
891    Args:
892        temps ([float]): A sequence of temperatures.
893        diffusivities ([float]): A sequence of diffusivities (e.g.,
894            from DiffusionAnalyzer.diffusivity).
895        diffusivity_errors ([float]): A sequence of errors for the
896            diffusivities. If None, no error bar is plotted.
897        \\*\\*kwargs:
898            Any keyword args supported by matplotlib.pyplot.plot.
899
900    Returns:
901        A matplotlib.pyplot object. Do plt.show() to show the plot.
902    """
903    Ea, c, _ = fit_arrhenius(temps, diffusivities)
904
905    from pymatgen.util.plotting import pretty_plot
906
907    plt = pretty_plot(12, 8)
908
909    # log10 of the arrhenius fit
910    arr = c * np.exp(-Ea / (const.k / const.e * np.array(temps)))
911
912    t_1 = 1000 / np.array(temps)
913
914    plt.plot(t_1, diffusivities, "ko", t_1, arr, "k--", markersize=10, **kwargs)
915    if diffusivity_errors is not None:
916        n = len(diffusivity_errors)
917        plt.errorbar(
918            t_1[0:n],
919            diffusivities[0:n],
920            yerr=diffusivity_errors,
921            fmt="ko",
922            ecolor="k",
923            capthick=2,
924            linewidth=2,
925        )
926    ax = plt.axes()
927    ax.set_yscale("log")
928    plt.text(
929        0.6,
930        0.85,
931        "E$_a$ = {:.0f} meV".format(Ea * 1000),
932        fontsize=30,
933        transform=plt.axes().transAxes,
934    )
935    plt.ylabel("D (cm$^2$/s)")
936    plt.xlabel("1000/T (K$^{-1}$)")
937    plt.tight_layout()
938    return plt
939