1# coding: utf-8
2# Copyright (c) Pymatgen Development Team.
3# Distributed under the terms of the MIT License.
4
5"""
6This module define the various drones used to assimilate data.
7"""
8
9import abc
10import glob
11import json
12import logging
13import os
14import warnings
15
16from monty.io import zopen
17from monty.json import MSONable
18
19from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry
20from pymatgen.io.gaussian import GaussianOutput
21from pymatgen.io.vasp.inputs import Incar, Poscar, Potcar
22from pymatgen.io.vasp.outputs import Dynmat, Oszicar, Vasprun
23
24logger = logging.getLogger(__name__)
25
26
27class AbstractDrone(MSONable, metaclass=abc.ABCMeta):
28    """
29    Abstract drone class that defines the various methods that must be
30    implemented by drones. Because of the quirky nature of Python"s
31    multiprocessing, the intermediate data representations has to be in the
32    form of python primitives. So all objects that drones work with must be
33    MSONable. All drones must also implement the standard MSONable as_dict() and
34    from_dict API.
35    """
36
37    @abc.abstractmethod
38    def assimilate(self, path):
39        """
40        Assimilate data in a directory path into a pymatgen object. Because of
41        the quirky nature of Python"s multiprocessing, the object must support
42        pymatgen's as_dict() for parallel processing.
43
44        Args:
45            path: directory path
46
47        Returns:
48            An assimilated object
49        """
50        return
51
52    @abc.abstractmethod
53    def get_valid_paths(self, path):
54        """
55        Checks if path contains valid data for assimilation, and then returns
56        the valid paths. The paths returned can be a list of directory or file
57        paths, depending on what kind of data you are assimilating. For
58        example, if you are assimilating VASP runs, you are only interested in
59        directories containing vasprun.xml files. On the other hand, if you are
60        interested converting all POSCARs in a directory tree to cifs for
61        example, you will want the file paths.
62
63        Args:
64            path: input path as a tuple generated from os.walk, i.e.,
65                (parent, subdirs, files).
66
67        Returns:
68            List of valid dir/file paths for assimilation
69        """
70        return
71
72
73class VaspToComputedEntryDrone(AbstractDrone):
74    """
75    VaspToEntryDrone assimilates directories containing vasp output to
76    ComputedEntry/ComputedStructureEntry objects. There are some restrictions
77    on the valid directory structures:
78
79    1. There can be only one vasp run in each directory.
80    2. Directories designated "relax1", "relax2" are considered to be 2 parts
81       of an aflow style run, and only "relax2" is parsed.
82    3. The drone parses only the vasprun.xml file.
83    """
84
85    def __init__(self, inc_structure=False, parameters=None, data=None):
86        """
87        Args:
88            inc_structure (bool): Set to True if you want
89                ComputedStructureEntries to be returned instead of
90                ComputedEntries.
91            parameters (list): Input parameters to include. It has to be one of
92                the properties supported by the Vasprun object. See
93                :class:`pymatgen.io.vasp.Vasprun`. If parameters is None,
94                a default set of parameters that are necessary for typical
95                post-processing will be set.
96            data (list): Output data to include. Has to be one of the properties
97                supported by the Vasprun object.
98        """
99        self._inc_structure = inc_structure
100        self._parameters = {
101            "is_hubbard",
102            "hubbards",
103            "potcar_spec",
104            "potcar_symbols",
105            "run_type",
106        }
107        if parameters:
108            self._parameters.update(parameters)
109        self._data = data if data else []
110
111    def assimilate(self, path):
112        """
113        Assimilate data in a directory path into a ComputedEntry object.
114
115        Args:
116            path: directory path
117
118        Returns:
119            ComputedEntry
120        """
121        files = os.listdir(path)
122        if "relax1" in files and "relax2" in files:
123            filepath = glob.glob(os.path.join(path, "relax2", "vasprun.xml*"))[0]
124        else:
125            vasprun_files = glob.glob(os.path.join(path, "vasprun.xml*"))
126            filepath = None
127            if len(vasprun_files) == 1:
128                filepath = vasprun_files[0]
129            elif len(vasprun_files) > 1:
130                # Since multiple files are ambiguous, we will always read
131                # the one that it the last one alphabetically.
132                filepath = sorted(vasprun_files)[-1]
133                warnings.warn("%d vasprun.xml.* found. %s is being parsed." % (len(vasprun_files), filepath))
134
135        try:
136            vasprun = Vasprun(filepath)
137        except Exception as ex:
138            logger.debug("error in {}: {}".format(filepath, ex))
139            return None
140
141        entry = vasprun.get_computed_entry(self._inc_structure, parameters=self._parameters, data=self._data)
142
143        # entry.parameters["history"] = _get_transformation_history(path)
144        return entry
145
146    def get_valid_paths(self, path):
147        """
148        Checks if paths contains vasprun.xml or (POSCAR+OSZICAR)
149
150        Args:
151            path: input path as a tuple generated from os.walk, i.e.,
152                (parent, subdirs, files).
153
154        Returns:
155            List of valid dir/file paths for assimilation
156        """
157        (parent, subdirs, files) = path
158        if "relax1" in subdirs and "relax2" in subdirs:
159            return [parent]
160        if (
161            (not parent.endswith("/relax1"))
162            and (not parent.endswith("/relax2"))
163            and (
164                len(glob.glob(os.path.join(parent, "vasprun.xml*"))) > 0
165                or (
166                    len(glob.glob(os.path.join(parent, "POSCAR*"))) > 0
167                    and len(glob.glob(os.path.join(parent, "OSZICAR*"))) > 0
168                )
169            )
170        ):
171            return [parent]
172        return []
173
174    def __str__(self):
175        return " VaspToComputedEntryDrone"
176
177    def as_dict(self):
178        """
179        Returns: MSONABle dict
180        """
181        return {
182            "init_args": {
183                "inc_structure": self._inc_structure,
184                "parameters": self._parameters,
185                "data": self._data,
186            },
187            "@module": self.__class__.__module__,
188            "@class": self.__class__.__name__,
189        }
190
191    @classmethod
192    def from_dict(cls, d):
193        """
194        Args:
195            d (dict): Dict Representation
196
197        Returns:
198            VaspToComputedEntryDrone
199        """
200        return cls(**d["init_args"])
201
202
203class SimpleVaspToComputedEntryDrone(VaspToComputedEntryDrone):
204    """
205    A simpler VaspToComputedEntryDrone. Instead of parsing vasprun.xml, it
206    parses only the INCAR, POTCAR, OSZICAR and KPOINTS files, which are much
207    smaller and faster to parse. However, much fewer properties are available
208    compared to the standard VaspToComputedEntryDrone.
209    """
210
211    def __init__(self, inc_structure=False):
212        """
213        Args:
214            inc_structure (bool): Set to True if you want
215                ComputedStructureEntries to be returned instead of
216                ComputedEntries. Structure will be parsed from the CONTCAR.
217        """
218        self._inc_structure = inc_structure
219        self._parameters = {"is_hubbard", "hubbards", "potcar_spec", "run_type"}
220
221    def assimilate(self, path):
222        """
223        Assimilate data in a directory path into a ComputedEntry object.
224
225        Args:
226            path: directory path
227
228        Returns:
229            ComputedEntry
230        """
231        files = os.listdir(path)
232        try:
233            files_to_parse = {}
234            filenames = {"INCAR", "POTCAR", "CONTCAR", "OSZICAR", "POSCAR", "DYNMAT"}
235            if "relax1" in files and "relax2" in files:
236                for filename in ("INCAR", "POTCAR", "POSCAR"):
237                    search_str = os.path.join(path, "relax1", filename + "*")
238                    files_to_parse[filename] = glob.glob(search_str)[0]
239                for filename in ("CONTCAR", "OSZICAR"):
240                    search_str = os.path.join(path, "relax2", filename + "*")
241                    files_to_parse[filename] = glob.glob(search_str)[-1]
242            else:
243                for filename in filenames:
244                    files = sorted(glob.glob(os.path.join(path, filename + "*")))
245                    if len(files) == 1 or filename in ("INCAR", "POTCAR"):
246                        files_to_parse[filename] = files[0]
247                    elif len(files) == 1 and filename == "DYNMAT":
248                        files_to_parse[filename] = files[0]
249                    elif len(files) > 1:
250                        # Since multiple files are ambiguous, we will always
251                        # use the first one for POSCAR and the last one
252                        # alphabetically for CONTCAR and OSZICAR.
253
254                        files_to_parse[filename] = files[0] if filename == "POSCAR" else files[-1]
255                        warnings.warn("%d files found. %s is being parsed." % (len(files), files_to_parse[filename]))
256
257            if not set(files_to_parse.keys()).issuperset({"INCAR", "POTCAR", "CONTCAR", "OSZICAR", "POSCAR"}):
258                raise ValueError(
259                    "Unable to parse %s as not all necessary files are present! "
260                    "SimpleVaspToComputedEntryDrone requires INCAR, POTCAR, CONTCAR, OSZICAR, POSCAR "
261                    "to be present. Only %s detected" % str(files_to_parse.keys())
262                )
263
264            poscar = Poscar.from_file(files_to_parse["POSCAR"])
265            contcar = Poscar.from_file(files_to_parse["CONTCAR"])
266            incar = Incar.from_file(files_to_parse["INCAR"])
267            potcar = Potcar.from_file(files_to_parse["POTCAR"])
268            oszicar = Oszicar(files_to_parse["OSZICAR"])
269
270            param = {"hubbards": {}}
271            if "LDAUU" in incar:
272                param["hubbards"] = dict(zip(poscar.site_symbols, incar["LDAUU"]))
273            param["is_hubbard"] = incar.get("LDAU", True) and sum(param["hubbards"].values()) > 0
274            param["run_type"] = None
275            param["potcar_spec"] = potcar.spec
276            energy = oszicar.final_energy
277            structure = contcar.structure
278            initial_vol = poscar.structure.volume
279            final_vol = contcar.structure.volume
280            delta_volume = final_vol / initial_vol - 1
281            data = {"filename": path, "delta_volume": delta_volume}
282            if "DYNMAT" in files_to_parse:
283                dynmat = Dynmat(files_to_parse["DYNMAT"])
284                data["phonon_frequencies"] = dynmat.get_phonon_frequencies()
285            if self._inc_structure:
286                return ComputedStructureEntry(structure, energy, parameters=param, data=data)
287            return ComputedEntry(structure.composition, energy, parameters=param, data=data)
288
289        except Exception as ex:
290            logger.debug("error in {}: {}".format(path, ex))
291            return None
292
293    def __str__(self):
294        return "SimpleVaspToComputedEntryDrone"
295
296    def as_dict(self):
297        """
298        Returns: MSONAble dict
299        """
300        return {
301            "init_args": {"inc_structure": self._inc_structure},
302            "@module": self.__class__.__module__,
303            "@class": self.__class__.__name__,
304        }
305
306    @classmethod
307    def from_dict(cls, d):
308        """
309        Args:
310            d (dict): Dict Representation
311
312        Returns:
313            SimpleVaspToComputedEntryDrone
314        """
315        return cls(**d["init_args"])
316
317
318class GaussianToComputedEntryDrone(AbstractDrone):
319    """
320    GaussianToEntryDrone assimilates directories containing Gaussian output to
321    ComputedEntry/ComputedStructureEntry objects. By default, it is assumed
322    that Gaussian output files have a ".log" extension.
323
324    .. note::
325
326        Like the GaussianOutput class, this is still in early beta.
327    """
328
329    def __init__(self, inc_structure=False, parameters=None, data=None, file_extensions=(".log",)):
330        """
331        Args:
332            inc_structure (bool): Set to True if you want
333                ComputedStructureEntries to be returned instead of
334                ComputedEntries.
335            parameters (list): Input parameters to include. It has to be one of
336                the properties supported by the GaussianOutput object. See
337                :class:`pymatgen.io.gaussianio GaussianOutput`. The parameters
338                have to be one of python"s primitive types, i.e., list, dict of
339                strings and integers. If parameters is None, a default set of
340                parameters will be set.
341            data (list): Output data to include. Has to be one of the properties
342                supported by the GaussianOutput object. The parameters have to
343                be one of python"s primitive types, i.e. list, dict of strings
344                and integers. If data is None, a default set will be set.
345            file_extensions (list):
346                File extensions to be considered as Gaussian output files.
347                Defaults to just the typical "log" extension.
348        """
349        self._inc_structure = inc_structure
350        self._parameters = {
351            "functional",
352            "basis_set",
353            "charge",
354            "spin_multiplicity",
355            "route_parameters",
356        }
357
358        if parameters:
359            self._parameters.update(parameters)
360
361        self._data = {"stationary_type", "properly_terminated"}
362        if data:
363            self._data.update(data)
364
365        self._file_extensions = file_extensions
366
367    def assimilate(self, path):
368        """
369        Assimilate data in a directory path into a ComputedEntry object.
370
371        Args:
372            path: directory path
373
374        Returns:
375            ComputedEntry
376        """
377        try:
378            gaurun = GaussianOutput(path)
379        except Exception as ex:
380            logger.debug("error in {}: {}".format(path, ex))
381            return None
382        param = {}
383        for p in self._parameters:
384            param[p] = getattr(gaurun, p)
385        data = {}
386        for d in self._data:
387            data[d] = getattr(gaurun, d)
388        if self._inc_structure:
389            entry = ComputedStructureEntry(gaurun.final_structure, gaurun.final_energy, parameters=param, data=data)
390        else:
391            entry = ComputedEntry(
392                gaurun.final_structure.composition,
393                gaurun.final_energy,
394                parameters=param,
395                data=data,
396            )
397        return entry
398
399    def get_valid_paths(self, path):
400        """
401        Checks if path contains files with define extensions.
402
403        Args:
404            path: input path as a tuple generated from os.walk, i.e.,
405                (parent, subdirs, files).
406
407        Returns:
408            List of valid dir/file paths for assimilation
409        """
410        parent, subdirs, files = path
411        return [os.path.join(parent, f) for f in files if os.path.splitext(f)[1] in self._file_extensions]
412
413    def __str__(self):
414        return " GaussianToComputedEntryDrone"
415
416    def as_dict(self):
417        """
418        Returns: MSONable dict
419        """
420        return {
421            "init_args": {
422                "inc_structure": self._inc_structure,
423                "parameters": self._parameters,
424                "data": self._data,
425                "file_extensions": self._file_extensions,
426            },
427            "@module": self.__class__.__module__,
428            "@class": self.__class__.__name__,
429        }
430
431    @classmethod
432    def from_dict(cls, d):
433        """
434        Args:
435            d (dict): Dict Representation
436
437        Returns:
438            GaussianToComputedEntryDrone
439        """
440        return cls(**d["init_args"])
441
442
443def _get_transformation_history(path):
444    """
445    Checks for a transformations.json* file and returns the history.
446    """
447    trans_json = glob.glob(os.path.join(path, "transformations.json*"))
448    if trans_json:
449        try:
450            with zopen(trans_json[0]) as f:
451                return json.load(f)["history"]
452        except Exception:
453            return None
454    return None
455