1# coding: utf-8 2# Copyright (c) Pymatgen Development Team. 3# Distributed under the terms of the MIT License. 4 5""" 6This module implements an interface to enumlib, Gus Hart"s excellent Fortran 7code for enumerating derivative structures. 8 9This module depends on a compiled enumlib with the executables enum.x and 10makestr.x available in the path. Please download the library at 11https://github.com/msg-byu/enumlib and follow the instructions in the README to 12compile these two executables accordingly. 13 14If you use this module, please cite the following: 15 16Gus L. W. Hart and Rodney W. Forcade, "Algorithm for generating derivative 17structures," Phys. Rev. B 77 224115 (26 June 2008) 18 19Gus L. W. Hart and Rodney W. Forcade, "Generating derivative structures from 20multilattices: Application to hcp alloys," Phys. Rev. B 80 014120 (July 2009) 21 22Gus L. W. Hart, Lance J. Nelson, and Rodney W. Forcade, "Generating 23derivative structures at a fixed concentration," Comp. Mat. Sci. 59 24101-107 (March 2012) 25 26Wiley S. Morgan, Gus L. W. Hart, Rodney W. Forcade, "Generating derivative 27superstructures for systems with high configurational freedom," Comp. Mat. 28Sci. 136 144-149 (May 2017) 29""" 30 31import fractions 32import glob 33import itertools 34import logging 35import math 36import re 37import subprocess 38from threading import Timer 39 40import numpy as np 41from monty.dev import requires 42from monty.fractions import lcm 43from monty.os.path import which 44from monty.tempfile import ScratchDir 45 46from pymatgen.core.periodic_table import DummySpecies 47from pymatgen.core.sites import PeriodicSite 48from pymatgen.core.structure import Structure 49from pymatgen.io.vasp.inputs import Poscar 50from pymatgen.symmetry.analyzer import SpacegroupAnalyzer 51 52logger = logging.getLogger(__name__) 53 54# Favor the use of the newer "enum.x" by Gus Hart instead of the older 55# "multienum.x" 56enum_cmd = which("enum.x") or which("multienum.x") 57# prefer makestr.x at present 58makestr_cmd = which("makestr.x") or which("makeStr.x") or which("makeStr.py") 59 60 61@requires( 62 enum_cmd and makestr_cmd, 63 "EnumlibAdaptor requires the executables 'enum.x' or 'multienum.x' " 64 "and 'makestr.x' or 'makeStr.py' to be in the path. Please download the " 65 "library at https://github.com/msg-byu/enumlib and follow the instructions " 66 "in the README to compile these two executables accordingly.", 67) 68class EnumlibAdaptor: 69 """ 70 An adaptor for enumlib. 71 72 .. attribute:: structures 73 74 List of all enumerated structures. 75 """ 76 77 amount_tol = 1e-5 78 79 def __init__( 80 self, 81 structure, 82 min_cell_size=1, 83 max_cell_size=1, 84 symm_prec=0.1, 85 enum_precision_parameter=0.001, 86 refine_structure=False, 87 check_ordered_symmetry=True, 88 timeout=None, 89 ): 90 """ 91 Initializes the adapter with a structure and some parameters. 92 93 Args: 94 structure: An input structure. 95 min_cell_size (int): The minimum cell size wanted. Defaults to 1. 96 max_cell_size (int): The maximum cell size wanted. Defaults to 1. 97 symm_prec (float): Symmetry precision. Defaults to 0.1. 98 enum_precision_parameter (float): Finite precision parameter for 99 enumlib. Default of 0.001 is usually ok, but you might need to 100 tweak it for certain cells. 101 refine_structure (bool): If you are starting from a structure that 102 has been relaxed via some electronic structure code, 103 it is usually much better to start with symmetry determination 104 and then obtain a refined structure. The refined structure have 105 cell parameters and atomic positions shifted to the expected 106 symmetry positions, which makes it much less sensitive precision 107 issues in enumlib. If you are already starting from an 108 experimental cif, refinement should have already been done and 109 it is not necessary. Defaults to False. 110 check_ordered_symmetry (bool): Whether to check the symmetry of 111 the ordered sites. If the symmetry of the ordered sites is 112 lower, the lowest symmetry ordered sites is included in the 113 enumeration. This is important if the ordered sites break 114 symmetry in a way that is important getting possible 115 structures. But sometimes including ordered sites 116 slows down enumeration to the point that it cannot be 117 completed. Switch to False in those cases. Defaults to True. 118 timeout (float): If specified, will kill enumlib after specified 119 time in minutes. This can be useful for gracefully handling 120 enumerations in a high-throughput context, for some enumerations 121 which will not terminate in a realistic length of time. 122 """ 123 if refine_structure: 124 finder = SpacegroupAnalyzer(structure, symm_prec) 125 self.structure = finder.get_refined_structure() 126 else: 127 self.structure = structure 128 self.min_cell_size = min_cell_size 129 self.max_cell_size = max_cell_size 130 self.symm_prec = symm_prec 131 self.enum_precision_parameter = enum_precision_parameter 132 self.check_ordered_symmetry = check_ordered_symmetry 133 self.timeout = timeout 134 135 def run(self): 136 """ 137 Run the enumeration. 138 """ 139 # Create a temporary directory for working. 140 with ScratchDir(".") as d: 141 logger.debug("Temp dir : {}".format(d)) 142 # Generate input files 143 self._gen_input_file() 144 # Perform the actual enumeration 145 num_structs = self._run_multienum() 146 # Read in the enumeration output as structures. 147 if num_structs > 0: 148 self.structures = self._get_structures(num_structs) 149 else: 150 raise EnumError("Unable to enumerate structure.") 151 152 def _gen_input_file(self): 153 """ 154 Generate the necessary struct_enum.in file for enumlib. See enumlib 155 documentation for details. 156 """ 157 coord_format = "{:.6f} {:.6f} {:.6f}" 158 # Using symmetry finder, get the symmetrically distinct sites. 159 fitter = SpacegroupAnalyzer(self.structure, self.symm_prec) 160 symmetrized_structure = fitter.get_symmetrized_structure() 161 logger.debug( 162 "Spacegroup {} ({}) with {} distinct sites".format( 163 fitter.get_space_group_symbol(), 164 fitter.get_space_group_number(), 165 len(symmetrized_structure.equivalent_sites), 166 ) 167 ) 168 169 """ 170 Enumlib doesn"t work when the number of species get too large. To 171 simplify matters, we generate the input file only with disordered sites 172 and exclude the ordered sites from the enumeration. The fact that 173 different disordered sites with the exact same species may belong to 174 different equivalent sites is dealt with by having determined the 175 spacegroup earlier and labelling the species differently. 176 """ 177 178 # index_species and index_amounts store mappings between the indices 179 # used in the enum input file, and the actual species and amounts. 180 index_species = [] 181 index_amounts = [] 182 183 # Stores the ordered sites, which are not enumerated. 184 ordered_sites = [] 185 disordered_sites = [] 186 coord_str = [] 187 for sites in symmetrized_structure.equivalent_sites: 188 if sites[0].is_ordered: 189 ordered_sites.append(sites) 190 else: 191 sp_label = [] 192 species = dict(sites[0].species.items()) 193 if sum(species.values()) < 1 - EnumlibAdaptor.amount_tol: 194 # Let us first make add a dummy element for every single 195 # site whose total occupancies don't sum to 1. 196 species[DummySpecies("X")] = 1 - sum(species.values()) 197 for sp, amt in species.items(): 198 if sp not in index_species: 199 index_species.append(sp) 200 sp_label.append(len(index_species) - 1) 201 index_amounts.append(amt * len(sites)) 202 else: 203 ind = index_species.index(sp) 204 sp_label.append(ind) 205 index_amounts[ind] += amt * len(sites) 206 sp_label = "/".join(["{}".format(i) for i in sorted(sp_label)]) 207 for site in sites: 208 coord_str.append("{} {}".format(coord_format.format(*site.coords), sp_label)) 209 disordered_sites.append(sites) 210 211 def get_sg_info(ss): 212 finder = SpacegroupAnalyzer(Structure.from_sites(ss), self.symm_prec) 213 return finder.get_space_group_number() 214 215 target_sgnum = get_sg_info(symmetrized_structure.sites) 216 curr_sites = list(itertools.chain.from_iterable(disordered_sites)) 217 sgnum = get_sg_info(curr_sites) 218 ordered_sites = sorted(ordered_sites, key=lambda sites: len(sites)) 219 logger.debug("Disordered sites has sg # %d" % (sgnum)) 220 self.ordered_sites = [] 221 222 # progressively add ordered sites to our disordered sites 223 # until we match the symmetry of our input structure 224 if self.check_ordered_symmetry: 225 while sgnum != target_sgnum and len(ordered_sites) > 0: 226 sites = ordered_sites.pop(0) 227 temp_sites = list(curr_sites) + sites 228 new_sgnum = get_sg_info(temp_sites) 229 if sgnum != new_sgnum: 230 logger.debug("Adding %s in enum. New sg # %d" % (sites[0].specie, new_sgnum)) 231 index_species.append(sites[0].specie) 232 index_amounts.append(len(sites)) 233 sp_label = len(index_species) - 1 234 for site in sites: 235 coord_str.append("{} {}".format(coord_format.format(*site.coords), sp_label)) 236 disordered_sites.append(sites) 237 curr_sites = temp_sites 238 sgnum = new_sgnum 239 else: 240 self.ordered_sites.extend(sites) 241 242 for sites in ordered_sites: 243 self.ordered_sites.extend(sites) 244 245 self.index_species = index_species 246 247 lattice = self.structure.lattice 248 249 output = [self.structure.formula, "bulk"] 250 for vec in lattice.matrix: 251 output.append(coord_format.format(*vec)) 252 output.append("%d" % len(index_species)) 253 output.append("%d" % len(coord_str)) 254 output.extend(coord_str) 255 256 output.append("{} {}".format(self.min_cell_size, self.max_cell_size)) 257 output.append(str(self.enum_precision_parameter)) 258 output.append("full") 259 260 ndisordered = sum([len(s) for s in disordered_sites]) 261 base = int( 262 ndisordered 263 * lcm( 264 *[ 265 f.limit_denominator(ndisordered * self.max_cell_size).denominator 266 for f in map(fractions.Fraction, index_amounts) 267 ] 268 ) 269 ) 270 271 # This multiplicative factor of 10 is to prevent having too small bases 272 # which can lead to rounding issues in the next step. 273 # An old bug was that a base was set to 8, with a conc of 0.4:0.6. That 274 # resulted in a range that overlaps and a conc of 0.5 satisfying this 275 # enumeration. See Cu7Te5.cif test file. 276 base *= 10 277 278 # base = ndisordered #10 ** int(math.ceil(math.log10(ndisordered))) 279 # To get a reasonable number of structures, we fix concentrations to the 280 # range expected in the original structure. 281 total_amounts = sum(index_amounts) 282 for amt in index_amounts: 283 conc = amt / total_amounts 284 285 if abs(conc * base - round(conc * base)) < 1e-5: 286 output.append("{} {} {}".format(int(round(conc * base)), int(round(conc * base)), base)) 287 else: 288 min_conc = int(math.floor(conc * base)) 289 output.append("{} {} {}".format(min_conc - 1, min_conc + 1, base)) 290 output.append("") 291 logger.debug("Generated input file:\n{}".format("\n".join(output))) 292 with open("struct_enum.in", "w") as f: 293 f.write("\n".join(output)) 294 295 def _run_multienum(self): 296 297 with subprocess.Popen([enum_cmd], stdout=subprocess.PIPE, stdin=subprocess.PIPE, close_fds=True) as p: 298 if self.timeout: 299 300 timed_out = False 301 timer = Timer(self.timeout * 60, lambda p: p.kill(), [p]) 302 303 try: 304 timer.start() 305 output = p.communicate()[0].decode("utf-8") 306 finally: 307 if not timer.is_alive(): 308 timed_out = True 309 timer.cancel() 310 311 if timed_out: 312 raise TimeoutError("Enumeration took too long.") 313 314 else: 315 316 output = p.communicate()[0].decode("utf-8") 317 318 count = 0 319 start_count = False 320 for line in output.strip().split("\n"): 321 if line.strip().endswith("RunTot"): 322 start_count = True 323 elif start_count and re.match(r"\d+\s+.*", line.strip()): 324 count = int(line.split()[-1]) 325 logger.debug("Enumeration resulted in {} structures".format(count)) 326 return count 327 328 def _get_structures(self, num_structs): 329 structs = [] 330 331 if ".py" in makestr_cmd: 332 options = ["-input", "struct_enum.out", str(1), str(num_structs)] 333 else: 334 options = ["struct_enum.out", str(0), str(num_structs - 1)] 335 336 with subprocess.Popen( 337 [makestr_cmd] + options, 338 stdout=subprocess.PIPE, 339 stdin=subprocess.PIPE, 340 close_fds=True, 341 ) as rs: 342 stdout, stderr = rs.communicate() 343 if stderr: 344 logger.warning(stderr.decode()) 345 346 # sites retrieved from enumlib will lack site properties 347 # to ensure consistency, we keep track of what site properties 348 # are missing and set them to None 349 # TODO: improve this by mapping ordered structure to original 350 # disorded structure, and retrieving correct site properties 351 disordered_site_properties = {} 352 353 if len(self.ordered_sites) > 0: 354 original_latt = self.ordered_sites[0].lattice 355 # Need to strip sites of site_properties, which would otherwise 356 # result in an index error. Hence Structure is reconstructed in 357 # the next step. 358 site_properties = {} 359 for site in self.ordered_sites: 360 for k, v in site.properties.items(): 361 disordered_site_properties[k] = None 362 if k in site_properties: 363 site_properties[k].append(v) 364 else: 365 site_properties[k] = [v] 366 ordered_structure = Structure( 367 original_latt, 368 [site.species for site in self.ordered_sites], 369 [site.frac_coords for site in self.ordered_sites], 370 site_properties=site_properties, 371 ) 372 inv_org_latt = np.linalg.inv(original_latt.matrix) 373 374 for file in glob.glob("vasp.*"): 375 with open(file) as f: 376 data = f.read() 377 data = re.sub(r"scale factor", "1", data) 378 data = re.sub(r"(\d+)-(\d+)", r"\1 -\2", data) 379 poscar = Poscar.from_string(data, self.index_species) 380 sub_structure = poscar.structure 381 # Enumeration may have resulted in a super lattice. We need to 382 # find the mapping from the new lattice to the old lattice, and 383 # perform supercell construction if necessary. 384 new_latt = sub_structure.lattice 385 386 sites = [] 387 388 if len(self.ordered_sites) > 0: 389 transformation = np.dot(new_latt.matrix, inv_org_latt) 390 transformation = [[int(round(cell)) for cell in row] for row in transformation] 391 logger.debug("Supercell matrix: {}".format(transformation)) 392 s = ordered_structure * transformation 393 sites.extend([site.to_unit_cell() for site in s]) 394 super_latt = sites[-1].lattice 395 else: 396 super_latt = new_latt 397 398 for site in sub_structure: 399 if site.specie.symbol != "X": # We exclude vacancies. 400 sites.append( 401 PeriodicSite( 402 site.species, 403 site.frac_coords, 404 super_latt, 405 to_unit_cell=True, 406 properties=disordered_site_properties, 407 ) 408 ) 409 else: 410 logger.debug("Skipping sites that include species X.") 411 structs.append(Structure.from_sites(sorted(sites))) 412 413 logger.debug("Read in a total of {} structures.".format(num_structs)) 414 return structs 415 416 417class EnumError(BaseException): 418 """ 419 Error subclass for enumeration errors. 420 """ 421 422 pass 423