1# coding: utf-8
2# Copyright (c) Pymatgen Development Team.
3# Distributed under the terms of the MIT License.
4
5"""
6This module implements an interface to enumlib, Gus Hart"s excellent Fortran
7code for enumerating derivative structures.
8
9This module depends on a compiled enumlib with the executables enum.x and
10makestr.x available in the path. Please download the library at
11https://github.com/msg-byu/enumlib and follow the instructions in the README to
12compile these two executables accordingly.
13
14If you use this module, please cite the following:
15
16Gus L. W. Hart and Rodney W. Forcade, "Algorithm for generating derivative
17structures," Phys. Rev. B 77 224115 (26 June 2008)
18
19Gus L. W. Hart and Rodney W. Forcade, "Generating derivative structures from
20multilattices: Application to hcp alloys," Phys. Rev. B 80 014120 (July 2009)
21
22Gus L. W. Hart, Lance J. Nelson, and Rodney W. Forcade, "Generating
23derivative structures at a fixed concentration," Comp. Mat. Sci. 59
24101-107 (March 2012)
25
26Wiley S. Morgan, Gus L. W. Hart, Rodney W. Forcade, "Generating derivative
27superstructures for systems with high configurational freedom," Comp. Mat.
28Sci. 136 144-149 (May 2017)
29"""
30
31import fractions
32import glob
33import itertools
34import logging
35import math
36import re
37import subprocess
38from threading import Timer
39
40import numpy as np
41from monty.dev import requires
42from monty.fractions import lcm
43from monty.os.path import which
44from monty.tempfile import ScratchDir
45
46from pymatgen.core.periodic_table import DummySpecies
47from pymatgen.core.sites import PeriodicSite
48from pymatgen.core.structure import Structure
49from pymatgen.io.vasp.inputs import Poscar
50from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
51
52logger = logging.getLogger(__name__)
53
54# Favor the use of the newer "enum.x" by Gus Hart instead of the older
55# "multienum.x"
56enum_cmd = which("enum.x") or which("multienum.x")
57# prefer makestr.x at present
58makestr_cmd = which("makestr.x") or which("makeStr.x") or which("makeStr.py")
59
60
61@requires(
62    enum_cmd and makestr_cmd,
63    "EnumlibAdaptor requires the executables 'enum.x' or 'multienum.x' "
64    "and 'makestr.x' or 'makeStr.py' to be in the path. Please download the "
65    "library at https://github.com/msg-byu/enumlib and follow the instructions "
66    "in the README to compile these two executables accordingly.",
67)
68class EnumlibAdaptor:
69    """
70    An adaptor for enumlib.
71
72    .. attribute:: structures
73
74        List of all enumerated structures.
75    """
76
77    amount_tol = 1e-5
78
79    def __init__(
80        self,
81        structure,
82        min_cell_size=1,
83        max_cell_size=1,
84        symm_prec=0.1,
85        enum_precision_parameter=0.001,
86        refine_structure=False,
87        check_ordered_symmetry=True,
88        timeout=None,
89    ):
90        """
91        Initializes the adapter with a structure and some parameters.
92
93        Args:
94            structure: An input structure.
95            min_cell_size (int): The minimum cell size wanted. Defaults to 1.
96            max_cell_size (int): The maximum cell size wanted. Defaults to 1.
97            symm_prec (float): Symmetry precision. Defaults to 0.1.
98            enum_precision_parameter (float): Finite precision parameter for
99                enumlib. Default of 0.001 is usually ok, but you might need to
100                tweak it for certain cells.
101            refine_structure (bool): If you are starting from a structure that
102                has been relaxed via some electronic structure code,
103                it is usually much better to start with symmetry determination
104                and then obtain a refined structure. The refined structure have
105                cell parameters and atomic positions shifted to the expected
106                symmetry positions, which makes it much less sensitive precision
107                issues in enumlib. If you are already starting from an
108                experimental cif, refinement should have already been done and
109                it is not necessary. Defaults to False.
110            check_ordered_symmetry (bool): Whether to check the symmetry of
111                the ordered sites. If the symmetry of the ordered sites is
112                lower, the lowest symmetry ordered sites is included in the
113                enumeration. This is important if the ordered sites break
114                symmetry in a way that is important getting possible
115                structures. But sometimes including ordered sites
116                slows down enumeration to the point that it cannot be
117                completed. Switch to False in those cases. Defaults to True.
118            timeout (float): If specified, will kill enumlib after specified
119                time in minutes. This can be useful for gracefully handling
120                enumerations in a high-throughput context, for some enumerations
121                which will not terminate in a realistic length of time.
122        """
123        if refine_structure:
124            finder = SpacegroupAnalyzer(structure, symm_prec)
125            self.structure = finder.get_refined_structure()
126        else:
127            self.structure = structure
128        self.min_cell_size = min_cell_size
129        self.max_cell_size = max_cell_size
130        self.symm_prec = symm_prec
131        self.enum_precision_parameter = enum_precision_parameter
132        self.check_ordered_symmetry = check_ordered_symmetry
133        self.timeout = timeout
134
135    def run(self):
136        """
137        Run the enumeration.
138        """
139        # Create a temporary directory for working.
140        with ScratchDir(".") as d:
141            logger.debug("Temp dir : {}".format(d))
142            # Generate input files
143            self._gen_input_file()
144            # Perform the actual enumeration
145            num_structs = self._run_multienum()
146            # Read in the enumeration output as structures.
147            if num_structs > 0:
148                self.structures = self._get_structures(num_structs)
149            else:
150                raise EnumError("Unable to enumerate structure.")
151
152    def _gen_input_file(self):
153        """
154        Generate the necessary struct_enum.in file for enumlib. See enumlib
155        documentation for details.
156        """
157        coord_format = "{:.6f} {:.6f} {:.6f}"
158        # Using symmetry finder, get the symmetrically distinct sites.
159        fitter = SpacegroupAnalyzer(self.structure, self.symm_prec)
160        symmetrized_structure = fitter.get_symmetrized_structure()
161        logger.debug(
162            "Spacegroup {} ({}) with {} distinct sites".format(
163                fitter.get_space_group_symbol(),
164                fitter.get_space_group_number(),
165                len(symmetrized_structure.equivalent_sites),
166            )
167        )
168
169        """
170        Enumlib doesn"t work when the number of species get too large. To
171        simplify matters, we generate the input file only with disordered sites
172        and exclude the ordered sites from the enumeration. The fact that
173        different disordered sites with the exact same species may belong to
174        different equivalent sites is dealt with by having determined the
175        spacegroup earlier and labelling the species differently.
176        """
177
178        # index_species and index_amounts store mappings between the indices
179        # used in the enum input file, and the actual species and amounts.
180        index_species = []
181        index_amounts = []
182
183        # Stores the ordered sites, which are not enumerated.
184        ordered_sites = []
185        disordered_sites = []
186        coord_str = []
187        for sites in symmetrized_structure.equivalent_sites:
188            if sites[0].is_ordered:
189                ordered_sites.append(sites)
190            else:
191                sp_label = []
192                species = dict(sites[0].species.items())
193                if sum(species.values()) < 1 - EnumlibAdaptor.amount_tol:
194                    # Let us first make add a dummy element for every single
195                    # site whose total occupancies don't sum to 1.
196                    species[DummySpecies("X")] = 1 - sum(species.values())
197                for sp, amt in species.items():
198                    if sp not in index_species:
199                        index_species.append(sp)
200                        sp_label.append(len(index_species) - 1)
201                        index_amounts.append(amt * len(sites))
202                    else:
203                        ind = index_species.index(sp)
204                        sp_label.append(ind)
205                        index_amounts[ind] += amt * len(sites)
206                sp_label = "/".join(["{}".format(i) for i in sorted(sp_label)])
207                for site in sites:
208                    coord_str.append("{} {}".format(coord_format.format(*site.coords), sp_label))
209                disordered_sites.append(sites)
210
211        def get_sg_info(ss):
212            finder = SpacegroupAnalyzer(Structure.from_sites(ss), self.symm_prec)
213            return finder.get_space_group_number()
214
215        target_sgnum = get_sg_info(symmetrized_structure.sites)
216        curr_sites = list(itertools.chain.from_iterable(disordered_sites))
217        sgnum = get_sg_info(curr_sites)
218        ordered_sites = sorted(ordered_sites, key=lambda sites: len(sites))
219        logger.debug("Disordered sites has sg # %d" % (sgnum))
220        self.ordered_sites = []
221
222        # progressively add ordered sites to our disordered sites
223        # until we match the symmetry of our input structure
224        if self.check_ordered_symmetry:
225            while sgnum != target_sgnum and len(ordered_sites) > 0:
226                sites = ordered_sites.pop(0)
227                temp_sites = list(curr_sites) + sites
228                new_sgnum = get_sg_info(temp_sites)
229                if sgnum != new_sgnum:
230                    logger.debug("Adding %s in enum. New sg # %d" % (sites[0].specie, new_sgnum))
231                    index_species.append(sites[0].specie)
232                    index_amounts.append(len(sites))
233                    sp_label = len(index_species) - 1
234                    for site in sites:
235                        coord_str.append("{} {}".format(coord_format.format(*site.coords), sp_label))
236                    disordered_sites.append(sites)
237                    curr_sites = temp_sites
238                    sgnum = new_sgnum
239                else:
240                    self.ordered_sites.extend(sites)
241
242        for sites in ordered_sites:
243            self.ordered_sites.extend(sites)
244
245        self.index_species = index_species
246
247        lattice = self.structure.lattice
248
249        output = [self.structure.formula, "bulk"]
250        for vec in lattice.matrix:
251            output.append(coord_format.format(*vec))
252        output.append("%d" % len(index_species))
253        output.append("%d" % len(coord_str))
254        output.extend(coord_str)
255
256        output.append("{} {}".format(self.min_cell_size, self.max_cell_size))
257        output.append(str(self.enum_precision_parameter))
258        output.append("full")
259
260        ndisordered = sum([len(s) for s in disordered_sites])
261        base = int(
262            ndisordered
263            * lcm(
264                *[
265                    f.limit_denominator(ndisordered * self.max_cell_size).denominator
266                    for f in map(fractions.Fraction, index_amounts)
267                ]
268            )
269        )
270
271        # This multiplicative factor of 10 is to prevent having too small bases
272        # which can lead to rounding issues in the next step.
273        # An old bug was that a base was set to 8, with a conc of 0.4:0.6. That
274        # resulted in a range that overlaps and a conc of 0.5 satisfying this
275        # enumeration. See Cu7Te5.cif test file.
276        base *= 10
277
278        # base = ndisordered #10 ** int(math.ceil(math.log10(ndisordered)))
279        # To get a reasonable number of structures, we fix concentrations to the
280        # range expected in the original structure.
281        total_amounts = sum(index_amounts)
282        for amt in index_amounts:
283            conc = amt / total_amounts
284
285            if abs(conc * base - round(conc * base)) < 1e-5:
286                output.append("{} {} {}".format(int(round(conc * base)), int(round(conc * base)), base))
287            else:
288                min_conc = int(math.floor(conc * base))
289                output.append("{} {} {}".format(min_conc - 1, min_conc + 1, base))
290        output.append("")
291        logger.debug("Generated input file:\n{}".format("\n".join(output)))
292        with open("struct_enum.in", "w") as f:
293            f.write("\n".join(output))
294
295    def _run_multienum(self):
296
297        with subprocess.Popen([enum_cmd], stdout=subprocess.PIPE, stdin=subprocess.PIPE, close_fds=True) as p:
298            if self.timeout:
299
300                timed_out = False
301                timer = Timer(self.timeout * 60, lambda p: p.kill(), [p])
302
303                try:
304                    timer.start()
305                    output = p.communicate()[0].decode("utf-8")
306                finally:
307                    if not timer.is_alive():
308                        timed_out = True
309                    timer.cancel()
310
311                if timed_out:
312                    raise TimeoutError("Enumeration took too long.")
313
314            else:
315
316                output = p.communicate()[0].decode("utf-8")
317
318        count = 0
319        start_count = False
320        for line in output.strip().split("\n"):
321            if line.strip().endswith("RunTot"):
322                start_count = True
323            elif start_count and re.match(r"\d+\s+.*", line.strip()):
324                count = int(line.split()[-1])
325        logger.debug("Enumeration resulted in {} structures".format(count))
326        return count
327
328    def _get_structures(self, num_structs):
329        structs = []
330
331        if ".py" in makestr_cmd:
332            options = ["-input", "struct_enum.out", str(1), str(num_structs)]
333        else:
334            options = ["struct_enum.out", str(0), str(num_structs - 1)]
335
336        with subprocess.Popen(
337            [makestr_cmd] + options,
338            stdout=subprocess.PIPE,
339            stdin=subprocess.PIPE,
340            close_fds=True,
341        ) as rs:
342            stdout, stderr = rs.communicate()
343        if stderr:
344            logger.warning(stderr.decode())
345
346        # sites retrieved from enumlib will lack site properties
347        # to ensure consistency, we keep track of what site properties
348        # are missing and set them to None
349        # TODO: improve this by mapping ordered structure to original
350        # disorded structure, and retrieving correct site properties
351        disordered_site_properties = {}
352
353        if len(self.ordered_sites) > 0:
354            original_latt = self.ordered_sites[0].lattice
355            # Need to strip sites of site_properties, which would otherwise
356            # result in an index error. Hence Structure is reconstructed in
357            # the next step.
358            site_properties = {}
359            for site in self.ordered_sites:
360                for k, v in site.properties.items():
361                    disordered_site_properties[k] = None
362                    if k in site_properties:
363                        site_properties[k].append(v)
364                    else:
365                        site_properties[k] = [v]
366            ordered_structure = Structure(
367                original_latt,
368                [site.species for site in self.ordered_sites],
369                [site.frac_coords for site in self.ordered_sites],
370                site_properties=site_properties,
371            )
372            inv_org_latt = np.linalg.inv(original_latt.matrix)
373
374        for file in glob.glob("vasp.*"):
375            with open(file) as f:
376                data = f.read()
377                data = re.sub(r"scale factor", "1", data)
378                data = re.sub(r"(\d+)-(\d+)", r"\1 -\2", data)
379                poscar = Poscar.from_string(data, self.index_species)
380                sub_structure = poscar.structure
381                # Enumeration may have resulted in a super lattice. We need to
382                # find the mapping from the new lattice to the old lattice, and
383                # perform supercell construction if necessary.
384                new_latt = sub_structure.lattice
385
386                sites = []
387
388                if len(self.ordered_sites) > 0:
389                    transformation = np.dot(new_latt.matrix, inv_org_latt)
390                    transformation = [[int(round(cell)) for cell in row] for row in transformation]
391                    logger.debug("Supercell matrix: {}".format(transformation))
392                    s = ordered_structure * transformation
393                    sites.extend([site.to_unit_cell() for site in s])
394                    super_latt = sites[-1].lattice
395                else:
396                    super_latt = new_latt
397
398                for site in sub_structure:
399                    if site.specie.symbol != "X":  # We exclude vacancies.
400                        sites.append(
401                            PeriodicSite(
402                                site.species,
403                                site.frac_coords,
404                                super_latt,
405                                to_unit_cell=True,
406                                properties=disordered_site_properties,
407                            )
408                        )
409                    else:
410                        logger.debug("Skipping sites that include species X.")
411                structs.append(Structure.from_sites(sorted(sites)))
412
413        logger.debug("Read in a total of {} structures.".format(num_structs))
414        return structs
415
416
417class EnumError(BaseException):
418    """
419    Error subclass for enumeration errors.
420    """
421
422    pass
423