1# coding: utf-8 2# Copyright (c) Pymatgen Development Team. 3# Distributed under the terms of the MIT License. 4 5""" 6This module implements classes and methods for processing LAMMPS output 7files (log and dump). 8""" 9 10 11import glob 12import re 13from io import StringIO 14 15import numpy as np 16import pandas as pd 17from monty.io import zopen 18from monty.json import MSONable 19 20from pymatgen.io.lammps.data import LammpsBox 21 22__author__ = "Kiran Mathew, Zhi Deng" 23__copyright__ = "Copyright 2018, The Materials Virtual Lab" 24__version__ = "1.0" 25__maintainer__ = "Zhi Deng" 26__email__ = "z4deng@eng.ucsd.edu" 27__date__ = "Aug 1, 2018" 28 29 30class LammpsDump(MSONable): 31 """ 32 Object for representing dump data for a single snapshot. 33 """ 34 35 def __init__(self, timestep, natoms, box, data): 36 """ 37 Base constructor. 38 39 Args: 40 timestep (int): Current timestep. 41 natoms (int): Total number of atoms in the box. 42 box (LammpsBox): Simulation box. 43 data (pd.DataFrame): Dumped atomic data. 44 45 """ 46 self.timestep = timestep 47 self.natoms = natoms 48 self.box = box 49 self.data = data 50 51 @classmethod 52 def from_string(cls, string): 53 """ 54 Constructor from string parsing. 55 56 Args: 57 string (str): Input string. 58 59 """ 60 lines = string.split("\n") 61 timestep = int(lines[1]) 62 natoms = int(lines[3]) 63 box_arr = np.loadtxt(StringIO("\n".join(lines[5:8]))) 64 bounds = box_arr[:, :2] 65 tilt = None 66 if "xy xz yz" in lines[4]: 67 tilt = box_arr[:, 2] 68 x = (0, tilt[0], tilt[1], tilt[0] + tilt[1]) 69 y = (0, tilt[2]) 70 bounds -= np.array([[min(x), max(x)], [min(y), max(y)], [0, 0]]) 71 box = LammpsBox(bounds, tilt) 72 data_head = lines[8].replace("ITEM: ATOMS", "").split() 73 data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, delim_whitespace=True) 74 return cls(timestep, natoms, box, data) 75 76 @classmethod 77 def from_dict(cls, d): 78 """ 79 Args: 80 d (dict): Dict representation 81 82 Returns: 83 LammpsDump 84 """ 85 items = {"timestep": d["timestep"], "natoms": d["natoms"]} 86 items["box"] = LammpsBox.from_dict(d["box"]) 87 items["data"] = pd.read_json(d["data"], orient="split") 88 return cls(**items) 89 90 def as_dict(self): 91 """ 92 Returns: MSONable dict 93 """ 94 d = {} 95 d["@module"] = self.__class__.__module__ 96 d["@class"] = self.__class__.__name__ 97 d["timestep"] = self.timestep 98 d["natoms"] = self.natoms 99 d["box"] = self.box.as_dict() 100 d["data"] = self.data.to_json(orient="split") 101 return d 102 103 104def parse_lammps_dumps(file_pattern): 105 """ 106 Generator that parses dump file(s). 107 108 Args: 109 file_pattern (str): Filename to parse. The timestep wildcard 110 (e.g., dump.atom.'*') is supported and the files are parsed 111 in the sequence of timestep. 112 113 Yields: 114 LammpsDump for each available snapshot. 115 116 """ 117 files = glob.glob(file_pattern) 118 if len(files) > 1: 119 pattern = r"%s" % file_pattern.replace("*", "([0-9]+)") 120 pattern = pattern.replace("\\", "\\\\") 121 files = sorted(files, key=lambda f: int(re.match(pattern, f).group(1))) 122 123 for fname in files: 124 with zopen(fname, "rt") as f: 125 dump_cache = [] 126 for line in f: 127 if line.startswith("ITEM: TIMESTEP"): 128 if len(dump_cache) > 0: 129 yield LammpsDump.from_string("".join(dump_cache)) 130 dump_cache = [line] 131 else: 132 dump_cache.append(line) 133 yield LammpsDump.from_string("".join(dump_cache)) 134 135 136def parse_lammps_log(filename="log.lammps"): 137 """ 138 Parses log file with focus on thermo data. Both one and multi line 139 formats are supported. Any incomplete runs (no "Loop time" marker) 140 will not be parsed. 141 142 Notes: 143 SHAKE stats printed with thermo data are not supported yet. 144 They are ignored in multi line format, while they may cause 145 issues with dataframe parsing in one line format. 146 147 Args: 148 filename (str): Filename to parse. 149 150 Returns: 151 [pd.DataFrame] containing thermo data for each completed run. 152 153 """ 154 with zopen(filename, "rt") as f: 155 lines = f.readlines() 156 begin_flag = ( 157 "Memory usage per processor =", 158 "Per MPI rank memory allocation (min/avg/max) =", 159 ) 160 end_flag = "Loop time of" 161 begins, ends = [], [] 162 for i, l in enumerate(lines): 163 if l.startswith(begin_flag): 164 begins.append(i) 165 elif l.startswith(end_flag): 166 ends.append(i) 167 168 def _parse_thermo(lines): 169 multi_pattern = r"-+\s+Step\s+([0-9]+)\s+-+" 170 # multi line thermo data 171 if re.match(multi_pattern, lines[0]): 172 timestep_marks = [i for i, l in enumerate(lines) if re.match(multi_pattern, l)] 173 timesteps = np.split(lines, timestep_marks)[1:] 174 dicts = [] 175 kv_pattern = r"([0-9A-Za-z_\[\]]+)\s+=\s+([0-9eE\.+-]+)" 176 for ts in timesteps: 177 data = {} 178 data["Step"] = int(re.match(multi_pattern, ts[0]).group(1)) 179 data.update({k: float(v) for k, v in re.findall(kv_pattern, "".join(ts[1:]))}) 180 dicts.append(data) 181 df = pd.DataFrame(dicts) 182 # rearrange the sequence of columns 183 columns = ["Step"] + [k for k, v in re.findall(kv_pattern, "".join(timesteps[0][1:]))] 184 df = df[columns] 185 # one line thermo data 186 else: 187 df = pd.read_csv(StringIO("".join(lines)), delim_whitespace=True) 188 return df 189 190 runs = [] 191 for b, e in zip(begins, ends): 192 runs.append(_parse_thermo(lines[b + 1 : e])) 193 return runs 194