1# coding: utf-8
2# Copyright (c) Pymatgen Development Team.
3# Distributed under the terms of the MIT License.
4
5"""
6This module implements classes and methods for processing LAMMPS output
7files (log and dump).
8"""
9
10
11import glob
12import re
13from io import StringIO
14
15import numpy as np
16import pandas as pd
17from monty.io import zopen
18from monty.json import MSONable
19
20from pymatgen.io.lammps.data import LammpsBox
21
22__author__ = "Kiran Mathew, Zhi Deng"
23__copyright__ = "Copyright 2018, The Materials Virtual Lab"
24__version__ = "1.0"
25__maintainer__ = "Zhi Deng"
26__email__ = "z4deng@eng.ucsd.edu"
27__date__ = "Aug 1, 2018"
28
29
30class LammpsDump(MSONable):
31    """
32    Object for representing dump data for a single snapshot.
33    """
34
35    def __init__(self, timestep, natoms, box, data):
36        """
37        Base constructor.
38
39        Args:
40            timestep (int): Current timestep.
41            natoms (int): Total number of atoms in the box.
42            box (LammpsBox): Simulation box.
43            data (pd.DataFrame): Dumped atomic data.
44
45        """
46        self.timestep = timestep
47        self.natoms = natoms
48        self.box = box
49        self.data = data
50
51    @classmethod
52    def from_string(cls, string):
53        """
54        Constructor from string parsing.
55
56        Args:
57            string (str): Input string.
58
59        """
60        lines = string.split("\n")
61        timestep = int(lines[1])
62        natoms = int(lines[3])
63        box_arr = np.loadtxt(StringIO("\n".join(lines[5:8])))
64        bounds = box_arr[:, :2]
65        tilt = None
66        if "xy xz yz" in lines[4]:
67            tilt = box_arr[:, 2]
68            x = (0, tilt[0], tilt[1], tilt[0] + tilt[1])
69            y = (0, tilt[2])
70            bounds -= np.array([[min(x), max(x)], [min(y), max(y)], [0, 0]])
71        box = LammpsBox(bounds, tilt)
72        data_head = lines[8].replace("ITEM: ATOMS", "").split()
73        data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, delim_whitespace=True)
74        return cls(timestep, natoms, box, data)
75
76    @classmethod
77    def from_dict(cls, d):
78        """
79        Args:
80            d (dict): Dict representation
81
82        Returns:
83            LammpsDump
84        """
85        items = {"timestep": d["timestep"], "natoms": d["natoms"]}
86        items["box"] = LammpsBox.from_dict(d["box"])
87        items["data"] = pd.read_json(d["data"], orient="split")
88        return cls(**items)
89
90    def as_dict(self):
91        """
92        Returns: MSONable dict
93        """
94        d = {}
95        d["@module"] = self.__class__.__module__
96        d["@class"] = self.__class__.__name__
97        d["timestep"] = self.timestep
98        d["natoms"] = self.natoms
99        d["box"] = self.box.as_dict()
100        d["data"] = self.data.to_json(orient="split")
101        return d
102
103
104def parse_lammps_dumps(file_pattern):
105    """
106    Generator that parses dump file(s).
107
108    Args:
109        file_pattern (str): Filename to parse. The timestep wildcard
110            (e.g., dump.atom.'*') is supported and the files are parsed
111            in the sequence of timestep.
112
113    Yields:
114        LammpsDump for each available snapshot.
115
116    """
117    files = glob.glob(file_pattern)
118    if len(files) > 1:
119        pattern = r"%s" % file_pattern.replace("*", "([0-9]+)")
120        pattern = pattern.replace("\\", "\\\\")
121        files = sorted(files, key=lambda f: int(re.match(pattern, f).group(1)))
122
123    for fname in files:
124        with zopen(fname, "rt") as f:
125            dump_cache = []
126            for line in f:
127                if line.startswith("ITEM: TIMESTEP"):
128                    if len(dump_cache) > 0:
129                        yield LammpsDump.from_string("".join(dump_cache))
130                    dump_cache = [line]
131                else:
132                    dump_cache.append(line)
133            yield LammpsDump.from_string("".join(dump_cache))
134
135
136def parse_lammps_log(filename="log.lammps"):
137    """
138    Parses log file with focus on thermo data. Both one and multi line
139    formats are supported. Any incomplete runs (no "Loop time" marker)
140    will not be parsed.
141
142    Notes:
143        SHAKE stats printed with thermo data are not supported yet.
144        They are ignored in multi line format, while they may cause
145        issues with dataframe parsing in one line format.
146
147    Args:
148        filename (str): Filename to parse.
149
150    Returns:
151        [pd.DataFrame] containing thermo data for each completed run.
152
153    """
154    with zopen(filename, "rt") as f:
155        lines = f.readlines()
156    begin_flag = (
157        "Memory usage per processor =",
158        "Per MPI rank memory allocation (min/avg/max) =",
159    )
160    end_flag = "Loop time of"
161    begins, ends = [], []
162    for i, l in enumerate(lines):
163        if l.startswith(begin_flag):
164            begins.append(i)
165        elif l.startswith(end_flag):
166            ends.append(i)
167
168    def _parse_thermo(lines):
169        multi_pattern = r"-+\s+Step\s+([0-9]+)\s+-+"
170        # multi line thermo data
171        if re.match(multi_pattern, lines[0]):
172            timestep_marks = [i for i, l in enumerate(lines) if re.match(multi_pattern, l)]
173            timesteps = np.split(lines, timestep_marks)[1:]
174            dicts = []
175            kv_pattern = r"([0-9A-Za-z_\[\]]+)\s+=\s+([0-9eE\.+-]+)"
176            for ts in timesteps:
177                data = {}
178                data["Step"] = int(re.match(multi_pattern, ts[0]).group(1))
179                data.update({k: float(v) for k, v in re.findall(kv_pattern, "".join(ts[1:]))})
180                dicts.append(data)
181            df = pd.DataFrame(dicts)
182            # rearrange the sequence of columns
183            columns = ["Step"] + [k for k, v in re.findall(kv_pattern, "".join(timesteps[0][1:]))]
184            df = df[columns]
185        # one line thermo data
186        else:
187            df = pd.read_csv(StringIO("".join(lines)), delim_whitespace=True)
188        return df
189
190    runs = []
191    for b, e in zip(begins, ends):
192        runs.append(_parse_thermo(lines[b + 1 : e]))
193    return runs
194