1# coding: utf-8 2# Copyright (c) Pymatgen Development Team. 3# Distributed under the terms of the MIT License. 4 5""" 6This module define the various drones used to assimilate data. 7""" 8 9import abc 10import glob 11import json 12import logging 13import os 14import warnings 15 16from monty.io import zopen 17from monty.json import MSONable 18 19from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry 20from pymatgen.io.gaussian import GaussianOutput 21from pymatgen.io.vasp.inputs import Incar, Poscar, Potcar 22from pymatgen.io.vasp.outputs import Dynmat, Oszicar, Vasprun 23 24logger = logging.getLogger(__name__) 25 26 27class AbstractDrone(MSONable, metaclass=abc.ABCMeta): 28 """ 29 Abstract drone class that defines the various methods that must be 30 implemented by drones. Because of the quirky nature of Python"s 31 multiprocessing, the intermediate data representations has to be in the 32 form of python primitives. So all objects that drones work with must be 33 MSONable. All drones must also implement the standard MSONable as_dict() and 34 from_dict API. 35 """ 36 37 @abc.abstractmethod 38 def assimilate(self, path): 39 """ 40 Assimilate data in a directory path into a pymatgen object. Because of 41 the quirky nature of Python"s multiprocessing, the object must support 42 pymatgen's as_dict() for parallel processing. 43 44 Args: 45 path: directory path 46 47 Returns: 48 An assimilated object 49 """ 50 return 51 52 @abc.abstractmethod 53 def get_valid_paths(self, path): 54 """ 55 Checks if path contains valid data for assimilation, and then returns 56 the valid paths. The paths returned can be a list of directory or file 57 paths, depending on what kind of data you are assimilating. For 58 example, if you are assimilating VASP runs, you are only interested in 59 directories containing vasprun.xml files. On the other hand, if you are 60 interested converting all POSCARs in a directory tree to cifs for 61 example, you will want the file paths. 62 63 Args: 64 path: input path as a tuple generated from os.walk, i.e., 65 (parent, subdirs, files). 66 67 Returns: 68 List of valid dir/file paths for assimilation 69 """ 70 return 71 72 73class VaspToComputedEntryDrone(AbstractDrone): 74 """ 75 VaspToEntryDrone assimilates directories containing vasp output to 76 ComputedEntry/ComputedStructureEntry objects. There are some restrictions 77 on the valid directory structures: 78 79 1. There can be only one vasp run in each directory. 80 2. Directories designated "relax1", "relax2" are considered to be 2 parts 81 of an aflow style run, and only "relax2" is parsed. 82 3. The drone parses only the vasprun.xml file. 83 """ 84 85 def __init__(self, inc_structure=False, parameters=None, data=None): 86 """ 87 Args: 88 inc_structure (bool): Set to True if you want 89 ComputedStructureEntries to be returned instead of 90 ComputedEntries. 91 parameters (list): Input parameters to include. It has to be one of 92 the properties supported by the Vasprun object. See 93 :class:`pymatgen.io.vasp.Vasprun`. If parameters is None, 94 a default set of parameters that are necessary for typical 95 post-processing will be set. 96 data (list): Output data to include. Has to be one of the properties 97 supported by the Vasprun object. 98 """ 99 self._inc_structure = inc_structure 100 self._parameters = { 101 "is_hubbard", 102 "hubbards", 103 "potcar_spec", 104 "potcar_symbols", 105 "run_type", 106 } 107 if parameters: 108 self._parameters.update(parameters) 109 self._data = data if data else [] 110 111 def assimilate(self, path): 112 """ 113 Assimilate data in a directory path into a ComputedEntry object. 114 115 Args: 116 path: directory path 117 118 Returns: 119 ComputedEntry 120 """ 121 files = os.listdir(path) 122 if "relax1" in files and "relax2" in files: 123 filepath = glob.glob(os.path.join(path, "relax2", "vasprun.xml*"))[0] 124 else: 125 vasprun_files = glob.glob(os.path.join(path, "vasprun.xml*")) 126 filepath = None 127 if len(vasprun_files) == 1: 128 filepath = vasprun_files[0] 129 elif len(vasprun_files) > 1: 130 # Since multiple files are ambiguous, we will always read 131 # the one that it the last one alphabetically. 132 filepath = sorted(vasprun_files)[-1] 133 warnings.warn("%d vasprun.xml.* found. %s is being parsed." % (len(vasprun_files), filepath)) 134 135 try: 136 vasprun = Vasprun(filepath) 137 except Exception as ex: 138 logger.debug("error in {}: {}".format(filepath, ex)) 139 return None 140 141 entry = vasprun.get_computed_entry(self._inc_structure, parameters=self._parameters, data=self._data) 142 143 # entry.parameters["history"] = _get_transformation_history(path) 144 return entry 145 146 def get_valid_paths(self, path): 147 """ 148 Checks if paths contains vasprun.xml or (POSCAR+OSZICAR) 149 150 Args: 151 path: input path as a tuple generated from os.walk, i.e., 152 (parent, subdirs, files). 153 154 Returns: 155 List of valid dir/file paths for assimilation 156 """ 157 (parent, subdirs, files) = path 158 if "relax1" in subdirs and "relax2" in subdirs: 159 return [parent] 160 if ( 161 (not parent.endswith("/relax1")) 162 and (not parent.endswith("/relax2")) 163 and ( 164 len(glob.glob(os.path.join(parent, "vasprun.xml*"))) > 0 165 or ( 166 len(glob.glob(os.path.join(parent, "POSCAR*"))) > 0 167 and len(glob.glob(os.path.join(parent, "OSZICAR*"))) > 0 168 ) 169 ) 170 ): 171 return [parent] 172 return [] 173 174 def __str__(self): 175 return " VaspToComputedEntryDrone" 176 177 def as_dict(self): 178 """ 179 Returns: MSONABle dict 180 """ 181 return { 182 "init_args": { 183 "inc_structure": self._inc_structure, 184 "parameters": self._parameters, 185 "data": self._data, 186 }, 187 "@module": self.__class__.__module__, 188 "@class": self.__class__.__name__, 189 } 190 191 @classmethod 192 def from_dict(cls, d): 193 """ 194 Args: 195 d (dict): Dict Representation 196 197 Returns: 198 VaspToComputedEntryDrone 199 """ 200 return cls(**d["init_args"]) 201 202 203class SimpleVaspToComputedEntryDrone(VaspToComputedEntryDrone): 204 """ 205 A simpler VaspToComputedEntryDrone. Instead of parsing vasprun.xml, it 206 parses only the INCAR, POTCAR, OSZICAR and KPOINTS files, which are much 207 smaller and faster to parse. However, much fewer properties are available 208 compared to the standard VaspToComputedEntryDrone. 209 """ 210 211 def __init__(self, inc_structure=False): 212 """ 213 Args: 214 inc_structure (bool): Set to True if you want 215 ComputedStructureEntries to be returned instead of 216 ComputedEntries. Structure will be parsed from the CONTCAR. 217 """ 218 self._inc_structure = inc_structure 219 self._parameters = {"is_hubbard", "hubbards", "potcar_spec", "run_type"} 220 221 def assimilate(self, path): 222 """ 223 Assimilate data in a directory path into a ComputedEntry object. 224 225 Args: 226 path: directory path 227 228 Returns: 229 ComputedEntry 230 """ 231 files = os.listdir(path) 232 try: 233 files_to_parse = {} 234 filenames = {"INCAR", "POTCAR", "CONTCAR", "OSZICAR", "POSCAR", "DYNMAT"} 235 if "relax1" in files and "relax2" in files: 236 for filename in ("INCAR", "POTCAR", "POSCAR"): 237 search_str = os.path.join(path, "relax1", filename + "*") 238 files_to_parse[filename] = glob.glob(search_str)[0] 239 for filename in ("CONTCAR", "OSZICAR"): 240 search_str = os.path.join(path, "relax2", filename + "*") 241 files_to_parse[filename] = glob.glob(search_str)[-1] 242 else: 243 for filename in filenames: 244 files = sorted(glob.glob(os.path.join(path, filename + "*"))) 245 if len(files) == 1 or filename in ("INCAR", "POTCAR"): 246 files_to_parse[filename] = files[0] 247 elif len(files) == 1 and filename == "DYNMAT": 248 files_to_parse[filename] = files[0] 249 elif len(files) > 1: 250 # Since multiple files are ambiguous, we will always 251 # use the first one for POSCAR and the last one 252 # alphabetically for CONTCAR and OSZICAR. 253 254 files_to_parse[filename] = files[0] if filename == "POSCAR" else files[-1] 255 warnings.warn("%d files found. %s is being parsed." % (len(files), files_to_parse[filename])) 256 257 if not set(files_to_parse.keys()).issuperset({"INCAR", "POTCAR", "CONTCAR", "OSZICAR", "POSCAR"}): 258 raise ValueError( 259 "Unable to parse %s as not all necessary files are present! " 260 "SimpleVaspToComputedEntryDrone requires INCAR, POTCAR, CONTCAR, OSZICAR, POSCAR " 261 "to be present. Only %s detected" % str(files_to_parse.keys()) 262 ) 263 264 poscar = Poscar.from_file(files_to_parse["POSCAR"]) 265 contcar = Poscar.from_file(files_to_parse["CONTCAR"]) 266 incar = Incar.from_file(files_to_parse["INCAR"]) 267 potcar = Potcar.from_file(files_to_parse["POTCAR"]) 268 oszicar = Oszicar(files_to_parse["OSZICAR"]) 269 270 param = {"hubbards": {}} 271 if "LDAUU" in incar: 272 param["hubbards"] = dict(zip(poscar.site_symbols, incar["LDAUU"])) 273 param["is_hubbard"] = incar.get("LDAU", True) and sum(param["hubbards"].values()) > 0 274 param["run_type"] = None 275 param["potcar_spec"] = potcar.spec 276 energy = oszicar.final_energy 277 structure = contcar.structure 278 initial_vol = poscar.structure.volume 279 final_vol = contcar.structure.volume 280 delta_volume = final_vol / initial_vol - 1 281 data = {"filename": path, "delta_volume": delta_volume} 282 if "DYNMAT" in files_to_parse: 283 dynmat = Dynmat(files_to_parse["DYNMAT"]) 284 data["phonon_frequencies"] = dynmat.get_phonon_frequencies() 285 if self._inc_structure: 286 return ComputedStructureEntry(structure, energy, parameters=param, data=data) 287 return ComputedEntry(structure.composition, energy, parameters=param, data=data) 288 289 except Exception as ex: 290 logger.debug("error in {}: {}".format(path, ex)) 291 return None 292 293 def __str__(self): 294 return "SimpleVaspToComputedEntryDrone" 295 296 def as_dict(self): 297 """ 298 Returns: MSONAble dict 299 """ 300 return { 301 "init_args": {"inc_structure": self._inc_structure}, 302 "@module": self.__class__.__module__, 303 "@class": self.__class__.__name__, 304 } 305 306 @classmethod 307 def from_dict(cls, d): 308 """ 309 Args: 310 d (dict): Dict Representation 311 312 Returns: 313 SimpleVaspToComputedEntryDrone 314 """ 315 return cls(**d["init_args"]) 316 317 318class GaussianToComputedEntryDrone(AbstractDrone): 319 """ 320 GaussianToEntryDrone assimilates directories containing Gaussian output to 321 ComputedEntry/ComputedStructureEntry objects. By default, it is assumed 322 that Gaussian output files have a ".log" extension. 323 324 .. note:: 325 326 Like the GaussianOutput class, this is still in early beta. 327 """ 328 329 def __init__(self, inc_structure=False, parameters=None, data=None, file_extensions=(".log",)): 330 """ 331 Args: 332 inc_structure (bool): Set to True if you want 333 ComputedStructureEntries to be returned instead of 334 ComputedEntries. 335 parameters (list): Input parameters to include. It has to be one of 336 the properties supported by the GaussianOutput object. See 337 :class:`pymatgen.io.gaussianio GaussianOutput`. The parameters 338 have to be one of python"s primitive types, i.e., list, dict of 339 strings and integers. If parameters is None, a default set of 340 parameters will be set. 341 data (list): Output data to include. Has to be one of the properties 342 supported by the GaussianOutput object. The parameters have to 343 be one of python"s primitive types, i.e. list, dict of strings 344 and integers. If data is None, a default set will be set. 345 file_extensions (list): 346 File extensions to be considered as Gaussian output files. 347 Defaults to just the typical "log" extension. 348 """ 349 self._inc_structure = inc_structure 350 self._parameters = { 351 "functional", 352 "basis_set", 353 "charge", 354 "spin_multiplicity", 355 "route_parameters", 356 } 357 358 if parameters: 359 self._parameters.update(parameters) 360 361 self._data = {"stationary_type", "properly_terminated"} 362 if data: 363 self._data.update(data) 364 365 self._file_extensions = file_extensions 366 367 def assimilate(self, path): 368 """ 369 Assimilate data in a directory path into a ComputedEntry object. 370 371 Args: 372 path: directory path 373 374 Returns: 375 ComputedEntry 376 """ 377 try: 378 gaurun = GaussianOutput(path) 379 except Exception as ex: 380 logger.debug("error in {}: {}".format(path, ex)) 381 return None 382 param = {} 383 for p in self._parameters: 384 param[p] = getattr(gaurun, p) 385 data = {} 386 for d in self._data: 387 data[d] = getattr(gaurun, d) 388 if self._inc_structure: 389 entry = ComputedStructureEntry(gaurun.final_structure, gaurun.final_energy, parameters=param, data=data) 390 else: 391 entry = ComputedEntry( 392 gaurun.final_structure.composition, 393 gaurun.final_energy, 394 parameters=param, 395 data=data, 396 ) 397 return entry 398 399 def get_valid_paths(self, path): 400 """ 401 Checks if path contains files with define extensions. 402 403 Args: 404 path: input path as a tuple generated from os.walk, i.e., 405 (parent, subdirs, files). 406 407 Returns: 408 List of valid dir/file paths for assimilation 409 """ 410 parent, subdirs, files = path 411 return [os.path.join(parent, f) for f in files if os.path.splitext(f)[1] in self._file_extensions] 412 413 def __str__(self): 414 return " GaussianToComputedEntryDrone" 415 416 def as_dict(self): 417 """ 418 Returns: MSONable dict 419 """ 420 return { 421 "init_args": { 422 "inc_structure": self._inc_structure, 423 "parameters": self._parameters, 424 "data": self._data, 425 "file_extensions": self._file_extensions, 426 }, 427 "@module": self.__class__.__module__, 428 "@class": self.__class__.__name__, 429 } 430 431 @classmethod 432 def from_dict(cls, d): 433 """ 434 Args: 435 d (dict): Dict Representation 436 437 Returns: 438 GaussianToComputedEntryDrone 439 """ 440 return cls(**d["init_args"]) 441 442 443def _get_transformation_history(path): 444 """ 445 Checks for a transformations.json* file and returns the history. 446 """ 447 trans_json = glob.glob(os.path.join(path, "transformations.json*")) 448 if trans_json: 449 try: 450 with zopen(trans_json[0]) as f: 451 return json.load(f)["history"] 452 except Exception: 453 return None 454 return None 455