1# coding: utf-8 2# Copyright (c) Pymatgen Development Team. 3# Distributed under the terms of the MIT License. 4 5""" 6This module implements a Composition class to represent compositions, 7and a ChemicalPotential class to represent potentials. 8""" 9 10import collections 11import numbers 12import os 13import re 14import string 15from functools import total_ordering 16from itertools import combinations_with_replacement, product 17from typing import List, Tuple, Union, Dict 18 19from monty.fractions import gcd, gcd_float 20from monty.json import MSONable 21from monty.serialization import loadfn 22 23from pymatgen.core.periodic_table import DummySpecies, Element, Species, get_el_sp 24from pymatgen.core.units import Mass 25from pymatgen.util.string import formula_double_format, Stringify 26 27 28SpeciesLike = Union[str, Element, Species, DummySpecies] 29 30 31@total_ordering 32class Composition(collections.abc.Hashable, collections.abc.Mapping, MSONable, Stringify): 33 """ 34 Represents a Composition, which is essentially a {element:amount} mapping 35 type. Composition is written to be immutable and hashable, 36 unlike a standard Python dict. 37 38 Note that the key can be either an Element or a Species. Elements and Species 39 are treated differently. i.e., a Fe2+ is not the same as a Fe3+ Species and 40 would be put in separate keys. This differentiation is deliberate to 41 support using Composition to determine the fraction of a particular Species. 42 43 Works almost completely like a standard python dictionary, except that 44 __getitem__ is overridden to return 0 when an element is not found. 45 (somewhat like a defaultdict, except it is immutable). 46 47 Also adds more convenience methods relevant to compositions, e.g., 48 get_fraction. 49 50 It should also be noted that many Composition related functionality takes 51 in a standard string as a convenient input. For example, 52 even though the internal representation of a Fe2O3 composition is 53 {Element("Fe"): 2, Element("O"): 3}, you can obtain the amount of Fe 54 simply by comp["Fe"] instead of the more verbose comp[Element("Fe")]. 55 56 >>> comp = Composition("LiFePO4") 57 >>> comp.get_atomic_fraction(Element("Li")) 58 0.14285714285714285 59 >>> comp.num_atoms 60 7.0 61 >>> comp.reduced_formula 62 'LiFePO4' 63 >>> comp.formula 64 'Li1 Fe1 P1 O4' 65 >>> comp.get_wt_fraction(Element("Li")) 66 0.04399794666951898 67 >>> comp.num_atoms 68 7.0 69 """ 70 71 # Tolerance in distinguishing different composition amounts. 72 # 1e-8 is fairly tight, but should cut out most floating point arithmetic 73 # errors. 74 amount_tolerance = 1e-8 75 76 # Special formula handling for peroxides and certain elements. This is so 77 # that formula output does not write LiO instead of Li2O2 for example. 78 special_formulas = { 79 "LiO": "Li2O2", 80 "NaO": "Na2O2", 81 "KO": "K2O2", 82 "HO": "H2O2", 83 "CsO": "Cs2O2", 84 "RbO": "Rb2O2", 85 "O": "O2", 86 "N": "N2", 87 "F": "F2", 88 "Cl": "Cl2", 89 "H": "H2", 90 } 91 92 oxi_prob = None # prior probability of oxidation used by oxi_state_guesses 93 94 def __init__(self, *args, strict: bool = False, **kwargs): 95 r""" 96 Very flexible Composition construction, similar to the built-in Python 97 dict(). Also extended to allow simple string init. 98 99 Args: 100 Any form supported by the Python built-in {} function. 101 102 1. A dict of either {Element/Species: amount}, 103 104 {string symbol:amount}, or {atomic number:amount} or any mixture 105 of these. E.g., {Element("Li"):2 ,Element("O"):1}, 106 {"Li":2, "O":1}, {3:2, 8:1} all result in a Li2O composition. 107 2. Keyword arg initialization, similar to a dict, e.g., 108 109 Composition(Li = 2, O = 1) 110 111 In addition, the Composition constructor also allows a single 112 string as an input formula. E.g., Composition("Li2O"). 113 114 strict: Only allow valid Elements and Species in the Composition. 115 116 allow_negative: Whether to allow negative compositions. This 117 argument must be popped from the **kwargs due to *args 118 ambiguity. 119 """ 120 self.allow_negative = kwargs.pop("allow_negative", False) 121 # it's much faster to recognize a composition and use the elmap than 122 # to pass the composition to {} 123 if len(args) == 1 and isinstance(args[0], Composition): 124 elmap = args[0] 125 elif len(args) == 1 and isinstance(args[0], str): 126 elmap = self._parse_formula(args[0]) 127 else: 128 elmap = dict(*args, **kwargs) # type: ignore 129 elamt = {} 130 self._natoms = 0 131 for k, v in elmap.items(): 132 if v < -Composition.amount_tolerance and not self.allow_negative: 133 raise ValueError("Amounts in Composition cannot be " "negative!") 134 if abs(v) >= Composition.amount_tolerance: 135 elamt[get_el_sp(k)] = v 136 self._natoms += abs(v) 137 self._data = elamt 138 if strict and not self.valid: 139 raise ValueError("Composition is not valid, contains: {}".format(", ".join(map(str, self.elements)))) 140 141 def __getitem__(self, item: SpeciesLike): 142 try: 143 sp = get_el_sp(item) 144 return self._data.get(sp, 0) 145 except ValueError as ex: 146 raise TypeError( 147 "Invalid key {}, {} for Composition\n" "ValueError exception:\n{}".format(item, type(item), ex) 148 ) 149 150 def __len__(self): 151 return len(self._data) 152 153 def __iter__(self): 154 return self._data.keys().__iter__() 155 156 def __contains__(self, item): 157 try: 158 sp = get_el_sp(item) 159 return sp in self._data 160 except ValueError as ex: 161 raise TypeError( 162 "Invalid key {}, {} for Composition\n" "ValueError exception:\n{}".format(item, type(item), ex) 163 ) 164 165 def __eq__(self, other): 166 # elements with amounts < Composition.amount_tolerance don't show up 167 # in the elmap, so checking len enables us to only check one 168 # compositions elements 169 if len(self) != len(other): 170 return False 171 172 return all(abs(v - other[el]) <= Composition.amount_tolerance for el, v in self.items()) 173 174 def __ge__(self, other): 175 """ 176 Defines >= for Compositions. Should ONLY be used for defining a sort 177 order (the behavior is probably not what you'd expect) 178 """ 179 for el in sorted(set(self.elements + other.elements)): 180 if other[el] - self[el] >= Composition.amount_tolerance: 181 return False 182 if self[el] - other[el] >= Composition.amount_tolerance: 183 return True 184 return True 185 186 def __ne__(self, other): 187 return not self.__eq__(other) 188 189 def __add__(self, other): 190 """ 191 Adds two compositions. For example, an Fe2O3 composition + an FeO 192 composition gives a Fe3O4 composition. 193 """ 194 new_el_map = collections.defaultdict(float) 195 new_el_map.update(self) 196 for k, v in other.items(): 197 new_el_map[get_el_sp(k)] += v 198 return Composition(new_el_map, allow_negative=self.allow_negative) 199 200 def __sub__(self, other): 201 """ 202 Subtracts two compositions. For example, an Fe2O3 composition - an FeO 203 composition gives an FeO2 composition. 204 205 Raises: 206 ValueError if the subtracted composition is greater than the 207 original composition in any of its elements, unless allow_negative 208 is True 209 """ 210 new_el_map = collections.defaultdict(float) 211 new_el_map.update(self) 212 for k, v in other.items(): 213 new_el_map[get_el_sp(k)] -= v 214 return Composition(new_el_map, allow_negative=self.allow_negative) 215 216 def __mul__(self, other): 217 """ 218 Multiply a Composition by an integer or a float. 219 Fe2O3 * 4 -> Fe8O12 220 """ 221 if not isinstance(other, numbers.Number): 222 return NotImplemented 223 return Composition({el: self[el] * other for el in self}, allow_negative=self.allow_negative) 224 225 __rmul__ = __mul__ 226 227 def __truediv__(self, other): 228 if not isinstance(other, numbers.Number): 229 return NotImplemented 230 return Composition({el: self[el] / other for el in self}, allow_negative=self.allow_negative) 231 232 __div__ = __truediv__ 233 234 def __hash__(self): 235 """ 236 hash based on the chemical system 237 """ 238 return hash(frozenset(self._data.keys())) 239 240 @property 241 def average_electroneg(self) -> float: 242 """ 243 :return: Average electronegativity of the composition. 244 """ 245 return sum((el.X * abs(amt) for el, amt in self.items())) / self.num_atoms 246 247 @property 248 def total_electrons(self) -> float: 249 """ 250 :return: Total number of electrons in composition. 251 """ 252 return sum((el.Z * abs(amt) for el, amt in self.items())) 253 254 def almost_equals(self, other: "Composition", rtol: float = 0.1, atol: float = 1e-8) -> bool: 255 """ 256 Returns true if compositions are equal within a tolerance. 257 258 Args: 259 other (Composition): Other composition to check 260 rtol (float): Relative tolerance 261 atol (float): Absolute tolerance 262 """ 263 sps = set(self.elements + other.elements) 264 for sp in sps: 265 a = self[sp] 266 b = other[sp] 267 tol = atol + rtol * (abs(a) + abs(b)) / 2 268 if abs(b - a) > tol: 269 return False 270 return True 271 272 @property 273 def is_element(self) -> bool: 274 """ 275 True if composition is for an element. 276 """ 277 return len(self) == 1 278 279 def copy(self) -> "Composition": 280 """ 281 :return: A copy of the composition. 282 """ 283 return Composition(self, allow_negative=self.allow_negative) 284 285 @property 286 def formula(self) -> str: 287 """ 288 Returns a formula string, with elements sorted by electronegativity, 289 e.g., Li4 Fe4 P4 O16. 290 """ 291 sym_amt = self.get_el_amt_dict() 292 syms = sorted(sym_amt.keys(), key=lambda sym: get_el_sp(sym).X) 293 formula = [s + formula_double_format(sym_amt[s], False) for s in syms] 294 return " ".join(formula) 295 296 @property 297 def alphabetical_formula(self) -> str: 298 """ 299 Returns a formula string, with elements sorted by alphabetically 300 e.g., Fe4 Li4 O16 P4. 301 """ 302 sym_amt = self.get_el_amt_dict() 303 syms = sorted(sym_amt.keys()) 304 formula = [s + formula_double_format(sym_amt[s], False) for s in syms] 305 return " ".join(formula) 306 307 @property 308 def iupac_formula(self) -> str: 309 """ 310 Returns a formula string, with elements sorted by the iupac 311 electronegativity ordering defined in Table VI of "Nomenclature of 312 Inorganic Chemistry (IUPAC Recommendations 2005)". This ordering 313 effectively follows the groups and rows of the periodic table, except 314 the Lanthanides, Actanides and hydrogen. Polyanions are still determined 315 based on the true electronegativity of the elements. 316 e.g. CH2(SO4)2 317 """ 318 sym_amt = self.get_el_amt_dict() 319 syms = sorted(sym_amt.keys(), key=lambda s: get_el_sp(s).iupac_ordering) 320 formula = [s + formula_double_format(sym_amt[s], False) for s in syms] 321 return " ".join(formula) 322 323 @property 324 def element_composition(self) -> "Composition": 325 """ 326 Returns the composition replacing any species by the corresponding 327 element. 328 """ 329 return Composition(self.get_el_amt_dict(), allow_negative=self.allow_negative) 330 331 @property 332 def fractional_composition(self) -> "Composition": 333 """ 334 Returns the normalized composition which the number of species sum to 335 1. 336 337 Returns: 338 Normalized composition which the number of species sum to 1. 339 """ 340 return self / self._natoms 341 342 @property 343 def reduced_composition(self) -> "Composition": 344 """ 345 Returns the reduced composition,i.e. amounts normalized by greatest 346 common denominator. e.g., Composition("FePO4") for 347 Composition("Fe4P4O16"). 348 """ 349 return self.get_reduced_composition_and_factor()[0] 350 351 def get_reduced_composition_and_factor(self) -> Tuple["Composition", float]: 352 """ 353 Calculates a reduced composition and factor. 354 355 Returns: 356 A normalized composition and a multiplicative factor, i.e., 357 Li4Fe4P4O16 returns (Composition("LiFePO4"), 4). 358 """ 359 factor = self.get_reduced_formula_and_factor()[1] 360 return self / factor, factor 361 362 def get_reduced_formula_and_factor(self, iupac_ordering: bool = False) -> Tuple[str, float]: 363 """ 364 Calculates a reduced formula and factor. 365 366 Args: 367 iupac_ordering (bool, optional): Whether to order the 368 formula by the iupac "electronegativity" series, defined in 369 Table VI of "Nomenclature of Inorganic Chemistry (IUPAC 370 Recommendations 2005)". This ordering effectively follows 371 the groups and rows of the periodic table, except the 372 Lanthanides, Actanides and hydrogen. Note that polyanions 373 will still be determined based on the true electronegativity of 374 the elements. 375 376 Returns: 377 A pretty normalized formula and a multiplicative factor, i.e., 378 Li4Fe4P4O16 returns (LiFePO4, 4). 379 """ 380 all_int = all(abs(x - round(x)) < Composition.amount_tolerance for x in self.values()) 381 if not all_int: 382 return self.formula.replace(" ", ""), 1 383 d = {k: int(round(v)) for k, v in self.get_el_amt_dict().items()} 384 (formula, factor) = reduce_formula(d, iupac_ordering=iupac_ordering) 385 386 if formula in Composition.special_formulas: 387 formula = Composition.special_formulas[formula] 388 factor /= 2 389 390 return formula, factor 391 392 def get_integer_formula_and_factor( 393 self, max_denominator: int = 10000, iupac_ordering: bool = False 394 ) -> Tuple[str, float]: 395 """ 396 Calculates an integer formula and factor. 397 398 Args: 399 max_denominator (int): all amounts in the el:amt dict are 400 first converted to a Fraction with this maximum denominator 401 iupac_ordering (bool, optional): Whether to order the 402 formula by the iupac "electronegativity" series, defined in 403 Table VI of "Nomenclature of Inorganic Chemistry (IUPAC 404 Recommendations 2005)". This ordering effectively follows 405 the groups and rows of the periodic table, except the 406 Lanthanides, Actanides and hydrogen. Note that polyanions 407 will still be determined based on the true electronegativity of 408 the elements. 409 410 Returns: 411 A pretty normalized formula and a multiplicative factor, i.e., 412 Li0.5O0.25 returns (Li2O, 0.25). O0.25 returns (O2, 0.125) 413 """ 414 el_amt = self.get_el_amt_dict() 415 g = gcd_float(list(el_amt.values()), 1 / max_denominator) 416 417 d = {k: round(v / g) for k, v in el_amt.items()} 418 (formula, factor) = reduce_formula(d, iupac_ordering=iupac_ordering) 419 if formula in Composition.special_formulas: 420 formula = Composition.special_formulas[formula] 421 factor /= 2 422 return formula, factor * g 423 424 @property 425 def reduced_formula(self) -> str: 426 """ 427 Returns a pretty normalized formula, i.e., LiFePO4 instead of 428 Li4Fe4P4O16. 429 """ 430 return self.get_reduced_formula_and_factor()[0] 431 432 @property 433 def hill_formula(self) -> str: 434 """ 435 :return: Hill formula. The Hill system (or Hill notation) is a system 436 of writing empirical chemical formulas, molecular chemical formulas and 437 components of a condensed formula such that the number of carbon atoms 438 in a molecule is indicated first, the number of hydrogen atoms next, 439 and then the number of all other chemical elements subsequently, in 440 alphabetical order of the chemical symbols. When the formula contains 441 no carbon, all the elements, including hydrogen, are listed 442 alphabetically. 443 """ 444 c = self.element_composition 445 elements = sorted([el.symbol for el in c.keys()]) 446 if "C" in elements: 447 elements = ["C"] + [el for el in elements if el != "C"] 448 449 formula = ["%s%s" % (el, formula_double_format(c[el]) if c[el] != 1 else "") for el in elements] 450 return " ".join(formula) 451 452 @property 453 def elements(self) -> List[Union[Element, Species, DummySpecies]]: 454 """ 455 Returns view of elements in Composition. 456 """ 457 return list(self.keys()) 458 459 def __str__(self): 460 return " ".join( 461 ["{}{}".format(k, formula_double_format(v, ignore_ones=False)) for k, v in self.as_dict().items()] 462 ) 463 464 def to_pretty_string(self) -> str: 465 """ 466 :return: Same as __str__ but without spaces. 467 """ 468 return re.sub(r"\s+", "", self.__str__()) 469 470 @property 471 def num_atoms(self) -> float: 472 """ 473 Total number of atoms in Composition. For negative amounts, sum 474 of absolute values 475 """ 476 return self._natoms 477 478 @property 479 def weight(self) -> float: 480 """ 481 Total molecular weight of Composition 482 """ 483 return Mass(sum([amount * el.atomic_mass for el, amount in self.items()]), "amu") 484 485 def get_atomic_fraction(self, el: SpeciesLike) -> float: 486 """ 487 Calculate atomic fraction of an Element or Species. 488 489 Args: 490 el (Element/Species): Element or Species to get fraction for. 491 492 Returns: 493 Atomic fraction for element el in Composition 494 """ 495 return abs(self[el]) / self._natoms 496 497 def get_wt_fraction(self, el: SpeciesLike): 498 """ 499 Calculate weight fraction of an Element or Species. 500 501 Args: 502 el (Element/Species): Element or Species to get fraction for. 503 504 Returns: 505 Weight fraction for element el in Composition 506 """ 507 return get_el_sp(el).atomic_mass * abs(self[el]) / self.weight 508 509 def contains_element_type( 510 self, 511 category: str, 512 ): 513 """ 514 Check if Composition contains any elements matching a given category. 515 516 Args: 517 category (str): one of "noble_gas", "transition_metal", 518 "post_transition_metal", "rare_earth_metal", "metal", "metalloid", 519 "alkali", "alkaline", "halogen", "chalcogen", "lanthanoid", 520 "actinoid", "quadrupolar", "s-block", "p-block", "d-block", "f-block" 521 522 Returns: 523 True if any elements in Composition match category, otherwise False 524 """ 525 526 allowed_categories = ( 527 "noble_gas", 528 "transition_metal", 529 "post_transition_metal", 530 "rare_earth_metal", 531 "metal", 532 "metalloid", 533 "alkali", 534 "alkaline", 535 "halogen", 536 "chalcogen", 537 "lanthanoid", 538 "actinoid", 539 "quadrupolar", 540 "s-block", 541 "p-block", 542 "d-block", 543 "f-block", 544 ) 545 546 if category not in allowed_categories: 547 raise ValueError("Please pick a category from: {}".format(", ".join(allowed_categories))) 548 549 if "block" in category: 550 return any(category[0] in el.block for el in self.elements) 551 return any(getattr(el, "is_{}".format(category)) for el in self.elements) 552 553 def _parse_formula(self, formula): 554 """ 555 Args: 556 formula (str): A string formula, e.g. Fe2O3, Li3Fe2(PO4)3 557 558 Returns: 559 Composition with that formula. 560 561 Notes: 562 In the case of Metallofullerene formula (e.g. Y3N@C80), 563 the @ mark will be dropped and passed to parser. 564 """ 565 # for Metallofullerene like "Y3N@C80" 566 formula = formula.replace("@", "") 567 568 def get_sym_dict(f, factor): 569 sym_dict = collections.defaultdict(float) 570 for m in re.finditer(r"([A-Z][a-z]*)\s*([-*\.e\d]*)", f): 571 el = m.group(1) 572 amt = 1 573 if m.group(2).strip() != "": 574 amt = float(m.group(2)) 575 sym_dict[el] += amt * factor 576 f = f.replace(m.group(), "", 1) 577 if f.strip(): 578 raise ValueError("{} is an invalid formula!".format(f)) 579 return sym_dict 580 581 m = re.search(r"\(([^\(\)]+)\)\s*([\.e\d]*)", formula) 582 if m: 583 factor = 1 584 if m.group(2) != "": 585 factor = float(m.group(2)) 586 unit_sym_dict = get_sym_dict(m.group(1), factor) 587 expanded_sym = "".join(["{}{}".format(el, amt) for el, amt in unit_sym_dict.items()]) 588 expanded_formula = formula.replace(m.group(), expanded_sym) 589 return self._parse_formula(expanded_formula) 590 return get_sym_dict(formula, 1) 591 592 @property 593 def anonymized_formula(self) -> str: 594 """ 595 An anonymized formula. Unique species are arranged in ordering of 596 increasing amounts and assigned ascending alphabets. Useful for 597 prototyping formulas. For example, all stoichiometric perovskites have 598 anonymized_formula ABC3. 599 """ 600 reduced = self.element_composition 601 if all(x == int(x) for x in self.values()): 602 reduced /= gcd(*(int(i) for i in self.values())) 603 604 anon = "" 605 for e, amt in zip(string.ascii_uppercase, sorted(reduced.values())): 606 if amt == 1: 607 amt_str = "" 608 elif abs(amt % 1) < 1e-8: 609 amt_str = str(int(amt)) 610 else: 611 amt_str = str(amt) 612 anon += "{}{}".format(e, amt_str) 613 return anon 614 615 @property 616 def chemical_system(self) -> str: 617 """ 618 Get the chemical system of a Composition, for example "O-Si" for 619 SiO2. Chemical system is a string of a list of elements 620 sorted alphabetically and joined by dashes, by convention for use 621 in database keys. 622 """ 623 return "-".join(sorted([el.symbol for el in self.elements])) 624 625 @property 626 def valid(self) -> bool: 627 """ 628 Returns True if Composition contains valid elements or species and 629 False if the Composition contains any dummy species. 630 """ 631 return not any(isinstance(el, DummySpecies) for el in self.elements) 632 633 def __repr__(self): 634 return "Comp: " + self.formula 635 636 @classmethod 637 def from_dict(cls, d) -> "Composition": 638 """ 639 Creates a composition from a dict generated by as_dict(). Strictly not 640 necessary given that the standard constructor already takes in such an 641 input, but this method preserves the standard pymatgen API of having 642 from_dict methods to reconstitute objects generated by as_dict(). Allows 643 for easier introspection. 644 645 Args: 646 d (dict): {symbol: amount} dict. 647 """ 648 return cls(d) 649 650 def get_el_amt_dict(self) -> Dict[str, float]: 651 """ 652 Returns: 653 Dict with element symbol and (unreduced) amount e.g., 654 {"Fe": 4.0, "O":6.0} or {"Fe3+": 4.0, "O2-":6.0} 655 """ 656 d: Dict[str, float] = collections.defaultdict(float) 657 for e, a in self.items(): 658 d[e.symbol] += a 659 return d 660 661 def as_dict(self) -> dict: 662 """ 663 Returns: 664 dict with species symbol and (unreduced) amount e.g., 665 {"Fe": 4.0, "O":6.0} or {"Fe3+": 4.0, "O2-":6.0} 666 """ 667 d: Dict[str, float] = collections.defaultdict(float) 668 for e, a in self.items(): 669 d[str(e)] += a 670 return d 671 672 @property 673 def to_reduced_dict(self) -> dict: 674 """ 675 Returns: 676 Dict with element symbol and reduced amount e.g., 677 {"Fe": 2.0, "O":3.0} 678 """ 679 return self.get_reduced_composition_and_factor()[0].as_dict() 680 681 @property 682 def to_data_dict(self) -> dict: 683 """ 684 Returns: 685 A dict with many keys and values relating to Composition/Formula, 686 including reduced_cell_composition, unit_cell_composition, 687 reduced_cell_formula, elements and nelements. 688 """ 689 return { 690 "reduced_cell_composition": self.get_reduced_composition_and_factor()[0], 691 "unit_cell_composition": self.as_dict(), 692 "reduced_cell_formula": self.reduced_formula, 693 "elements": list(self.as_dict().keys()), 694 "nelements": len(self.as_dict().keys()), 695 } 696 697 def oxi_state_guesses( 698 self, 699 oxi_states_override: dict = None, 700 target_charge: float = 0, 701 all_oxi_states: bool = False, 702 max_sites: int = None, 703 ) -> List[Dict[str, float]]: 704 """ 705 Checks if the composition is charge-balanced and returns back all 706 charge-balanced oxidation state combinations. Composition must have 707 integer values. Note that more num_atoms in the composition gives 708 more degrees of freedom. e.g., if possible oxidation states of 709 element X are [2,4] and Y are [-3], then XY is not charge balanced 710 but X2Y2 is. Results are returned from most to least probable based 711 on ICSD statistics. Use max_sites to improve performance if needed. 712 713 Args: 714 oxi_states_override (dict): dict of str->list to override an 715 element's common oxidation states, e.g. {"V": [2,3,4,5]} 716 target_charge (int): the desired total charge on the structure. 717 Default is 0 signifying charge balance. 718 all_oxi_states (bool): if True, an element defaults to 719 all oxidation states in pymatgen Element.icsd_oxidation_states. 720 Otherwise, default is Element.common_oxidation_states. Note 721 that the full oxidation state list is *very* inclusive and 722 can produce nonsensical results. 723 max_sites (int): if possible, will reduce Compositions to at most 724 this many sites to speed up oxidation state guesses. If the 725 composition cannot be reduced to this many sites a ValueError 726 will be raised. Set to -1 to just reduce fully. If set to a 727 number less than -1, the formula will be fully reduced but a 728 ValueError will be thrown if the number of atoms in the reduced 729 formula is greater than abs(max_sites). 730 731 Returns: 732 A list of dicts - each dict reports an element symbol and average 733 oxidation state across all sites in that composition. If the 734 composition is not charge balanced, an empty list is returned. 735 """ 736 737 return self._get_oxid_state_guesses(all_oxi_states, max_sites, oxi_states_override, target_charge)[0] 738 739 def add_charges_from_oxi_state_guesses( 740 self, 741 oxi_states_override: dict = None, 742 target_charge: float = 0, 743 all_oxi_states: bool = False, 744 max_sites: int = None, 745 ) -> "Composition": 746 """ 747 Assign oxidation states basedon guessed oxidation states. 748 749 See `oxi_state_guesses` for an explanation of how oxidation states are 750 guessed. This operation uses the set of oxidation states for each site 751 that were determined to be most likley from the oxidation state guessing 752 routine. 753 754 Args: 755 oxi_states_override (dict): dict of str->list to override an 756 element's common oxidation states, e.g. {"V": [2,3,4,5]} 757 target_charge (int): the desired total charge on the structure. 758 Default is 0 signifying charge balance. 759 all_oxi_states (bool): if True, an element defaults to 760 all oxidation states in pymatgen Element.icsd_oxidation_states. 761 Otherwise, default is Element.common_oxidation_states. Note 762 that the full oxidation state list is *very* inclusive and 763 can produce nonsensical results. 764 max_sites (int): if possible, will reduce Compositions to at most 765 this many sites to speed up oxidation state guesses. If the 766 composition cannot be reduced to this many sites a ValueError 767 will be raised. Set to -1 to just reduce fully. If set to a 768 number less than -1, the formula will be fully reduced but a 769 ValueError will be thrown if the number of atoms in the reduced 770 formula is greater than abs(max_sites). 771 772 Returns: 773 Composition, where the elements are assigned oxidation states based 774 on the results form guessing oxidation states. If no oxidation state 775 is possible, returns a Composition where all oxidation states are 0. 776 """ 777 778 _, oxidation_states = self._get_oxid_state_guesses( 779 all_oxi_states, max_sites, oxi_states_override, target_charge 780 ) 781 782 # Special case: No charged compound is possible 783 if not oxidation_states: 784 return Composition(dict((Species(e, 0), f) for e, f in self.items())) 785 786 # Generate the species 787 species = [] 788 for el, charges in oxidation_states[0].items(): 789 species.extend([Species(el, c) for c in charges]) 790 791 # Return the new object 792 return Composition(collections.Counter(species)) 793 794 def remove_charges(self) -> "Composition": 795 """ 796 Removes the charges from any species in a Composition object. 797 798 Returns: 799 Composition object without charge decoration, for example 800 {"Fe3+": 2.0, "O2-":3.0} becomes {"Fe": 2.0, "O":3.0} 801 """ 802 d: Dict[Element, float] = collections.defaultdict(float) 803 for e, a in self.items(): 804 d[Element(e.symbol)] += a 805 return Composition(d) 806 807 def _get_oxid_state_guesses(self, all_oxi_states, max_sites, oxi_states_override, target_charge): 808 """ 809 Utility operation for guessing oxidation states. 810 811 See `oxi_state_guesses` for full details. This operation does the 812 calculation of the most likely oxidation states 813 814 Args: 815 oxi_states_override (dict): dict of str->list to override an 816 element's common oxidation states, e.g. {"V": [2,3,4,5]} 817 target_charge (int): the desired total charge on the structure. 818 Default is 0 signifying charge balance. 819 all_oxi_states (bool): if True, an element defaults to 820 all oxidation states in pymatgen Element.icsd_oxidation_states. 821 Otherwise, default is Element.common_oxidation_states. Note 822 that the full oxidation state list is *very* inclusive and 823 can produce nonsensical results. 824 max_sites (int): if possible, will reduce Compositions to at most 825 this many sites to speed up oxidation state guesses. If the 826 composition cannot be reduced to this many sites a ValueError 827 will be raised. Set to -1 to just reduce fully. If set to a 828 number less than -1, the formula will be fully reduced but a 829 ValueError will be thrown if the number of atoms in the reduced 830 formula is greater than abs(max_sites). 831 832 Returns: 833 A list of dicts - each dict reports an element symbol and average 834 oxidation state across all sites in that composition. If the 835 composition is not charge balanced, an empty list is returned. 836 A list of dicts - each dict maps the element symbol to a list of 837 oxidation states for each site of that element. For example, Fe3O4 could 838 return a list of [2,2,2,3,3,3] for the oxidation states of If the composition 839 is 840 841 """ 842 comp = self.copy() 843 # reduce Composition if necessary 844 if max_sites and max_sites < 0: 845 comp = self.reduced_composition 846 847 if max_sites < -1 and comp.num_atoms > abs(max_sites): 848 raise ValueError("Composition {} cannot accommodate max_sites " "setting!".format(comp)) 849 850 elif max_sites and comp.num_atoms > max_sites: 851 reduced_comp, reduced_factor = self.get_reduced_composition_and_factor() 852 if reduced_factor > 1: 853 reduced_comp *= max(1, int(max_sites / reduced_comp.num_atoms)) 854 comp = reduced_comp # as close to max_sites as possible 855 if comp.num_atoms > max_sites: 856 raise ValueError("Composition {} cannot accommodate max_sites " "setting!".format(comp)) 857 858 # Load prior probabilities of oxidation states, used to rank solutions 859 if not Composition.oxi_prob: 860 module_dir = os.path.join(os.path.dirname(os.path.abspath(__file__))) 861 all_data = loadfn(os.path.join(module_dir, "..", "analysis", "icsd_bv.yaml")) 862 Composition.oxi_prob = {Species.from_string(sp): data for sp, data in all_data["occurrence"].items()} 863 oxi_states_override = oxi_states_override or {} 864 # assert: Composition only has integer amounts 865 if not all(amt == int(amt) for amt in comp.values()): 866 raise ValueError("Charge balance analysis requires integer " "values in Composition!") 867 868 # for each element, determine all possible sum of oxidations 869 # (taking into account nsites for that particular element) 870 el_amt = comp.get_el_amt_dict() 871 els = el_amt.keys() 872 el_sums = [] # matrix: dim1= el_idx, dim2=possible sums 873 el_sum_scores = collections.defaultdict(set) # dict of el_idx, sum -> score 874 el_best_oxid_combo = {} # dict of el_idx, sum -> oxid combo with best score 875 for idx, el in enumerate(els): 876 el_sum_scores[idx] = {} 877 el_best_oxid_combo[idx] = {} 878 el_sums.append([]) 879 if oxi_states_override.get(el): 880 oxids = oxi_states_override[el] 881 elif all_oxi_states: 882 oxids = Element(el).oxidation_states 883 else: 884 oxids = Element(el).icsd_oxidation_states or Element(el).oxidation_states 885 886 # get all possible combinations of oxidation states 887 # and sum each combination 888 for oxid_combo in combinations_with_replacement(oxids, int(el_amt[el])): 889 890 # List this sum as a possible option 891 oxid_sum = sum(oxid_combo) 892 if oxid_sum not in el_sums[idx]: 893 el_sums[idx].append(oxid_sum) 894 895 # Determine how probable is this combo? 896 score = sum([Composition.oxi_prob.get(Species(el, o), 0) for o in oxid_combo]) 897 898 # If it is the most probable combo for a certain sum, 899 # store the combination 900 if oxid_sum not in el_sum_scores[idx] or score > el_sum_scores[idx].get(oxid_sum, 0): 901 el_sum_scores[idx][oxid_sum] = score 902 el_best_oxid_combo[idx][oxid_sum] = oxid_combo 903 904 # Determine which combination of oxidation states for each element 905 # is the most probable 906 all_sols = [] # will contain all solutions 907 all_oxid_combo = [] # will contain the best combination of oxidation states for each site 908 all_scores = [] # will contain a score for each solution 909 for x in product(*el_sums): 910 # each x is a trial of one possible oxidation sum for each element 911 if sum(x) == target_charge: # charge balance condition 912 el_sum_sol = dict(zip(els, x)) # element->oxid_sum 913 # normalize oxid_sum by amount to get avg oxid state 914 sol = {el: v / el_amt[el] for el, v in el_sum_sol.items()} 915 # add the solution to the list of solutions 916 all_sols.append(sol) 917 918 # determine the score for this solution 919 score = 0 920 for idx, v in enumerate(x): 921 score += el_sum_scores[idx][v] 922 all_scores.append(score) 923 924 # collect the combination of oxidation states for each site 925 all_oxid_combo.append(dict((e, el_best_oxid_combo[idx][v]) for idx, (e, v) in enumerate(zip(els, x)))) 926 927 # sort the solutions by highest to lowest score 928 if all_scores: 929 all_sols, all_oxid_combo = zip( 930 *[ 931 (y, x) 932 for (z, y, x) in sorted( 933 zip(all_scores, all_sols, all_oxid_combo), 934 key=lambda pair: pair[0], 935 reverse=True, 936 ) 937 ] 938 ) 939 return all_sols, all_oxid_combo 940 941 @staticmethod 942 def ranked_compositions_from_indeterminate_formula(fuzzy_formula, lock_if_strict=True): 943 """ 944 Takes in a formula where capitilization might not be correctly entered, 945 and suggests a ranked list of potential Composition matches. 946 Author: Anubhav Jain 947 948 Args: 949 fuzzy_formula (str): A formula string, such as "co2o3" or "MN", 950 that may or may not have multiple interpretations 951 lock_if_strict (bool): If true, a properly entered formula will 952 only return the one correct interpretation. For example, 953 "Co1" will only return "Co1" if true, but will return both 954 "Co1" and "C1 O1" if false. 955 956 Returns: 957 A ranked list of potential Composition matches 958 """ 959 960 # if we have an exact match and the user specifies lock_if_strict, just 961 # return the exact match! 962 if lock_if_strict: 963 # the strict composition parsing might throw an error, we can ignore 964 # it and just get on with fuzzy matching 965 try: 966 comp = Composition(fuzzy_formula) 967 return [comp] 968 except ValueError: 969 pass 970 971 all_matches = Composition._comps_from_fuzzy_formula(fuzzy_formula) 972 # remove duplicates 973 all_matches = list(set(all_matches)) 974 # sort matches by rank descending 975 all_matches = sorted(all_matches, key=lambda match: (match[1], match[0]), reverse=True) 976 all_matches = [m[0] for m in all_matches] 977 return all_matches 978 979 @staticmethod 980 def _comps_from_fuzzy_formula(fuzzy_formula, m_dict=None, m_points=0, factor=1): 981 """ 982 A recursive helper method for formula parsing that helps in 983 interpreting and ranking indeterminate formulas. 984 Author: Anubhav Jain 985 986 Args: 987 fuzzy_formula (str): A formula string, such as "co2o3" or "MN", 988 that may or may not have multiple interpretations. 989 m_dict (dict): A symbol:amt dictionary from the previously parsed 990 formula. 991 m_points: Number of points gained from the previously parsed 992 formula. 993 factor: Coefficient for this parse, e.g. (PO4)2 will feed in PO4 994 as the fuzzy_formula with a coefficient of 2. 995 996 Returns: 997 A list of tuples, with the first element being a Composition and 998 the second element being the number of points awarded that 999 Composition intepretation. 1000 """ 1001 m_dict = m_dict or {} 1002 1003 def _parse_chomp_and_rank(m, f, m_dict, m_points): 1004 """ 1005 A helper method for formula parsing that helps in interpreting and 1006 ranking indeterminate formulas 1007 Author: Anubhav Jain 1008 1009 Args: 1010 m: A regex match, with the first group being the element and 1011 the second group being the amount 1012 f: The formula part containing the match 1013 m_dict: A symbol:amt dictionary from the previously parsed 1014 formula 1015 m_points: Number of points gained from the previously parsed 1016 formula 1017 1018 Returns: 1019 A tuple of (f, m_dict, points) where m_dict now contains data 1020 from the match and the match has been removed (chomped) from 1021 the formula f. The "goodness" of the match determines the 1022 number of points returned for chomping. Returns 1023 (None, None, None) if no element could be found... 1024 """ 1025 1026 points = 0 1027 # Points awarded if the first element of the element is correctly 1028 # specified as a capital 1029 points_first_capital = 100 1030 # Points awarded if the second letter of the element is correctly 1031 # specified as lowercase 1032 points_second_lowercase = 100 1033 1034 # get element and amount from regex match 1035 el = m.group(1) 1036 if len(el) > 2 or len(el) < 1: 1037 raise ValueError("Invalid element symbol entered!") 1038 amt = float(m.group(2)) if m.group(2).strip() != "" else 1 1039 1040 # convert the element string to proper [uppercase,lowercase] format 1041 # and award points if it is already in that format 1042 char1 = el[0] 1043 char2 = el[1] if len(el) > 1 else "" 1044 1045 if char1 == char1.upper(): 1046 points += points_first_capital 1047 if char2 and char2 == char2.lower(): 1048 points += points_second_lowercase 1049 1050 el = char1.upper() + char2.lower() 1051 1052 # if it's a valid element, chomp and add to the points 1053 if Element.is_valid_symbol(el): 1054 if el in m_dict: 1055 m_dict[el] += amt * factor 1056 else: 1057 m_dict[el] = amt * factor 1058 return f.replace(m.group(), "", 1), m_dict, m_points + points 1059 1060 # else return None 1061 return None, None, None 1062 1063 fuzzy_formula = fuzzy_formula.strip() 1064 1065 if len(fuzzy_formula) == 0: 1066 # The entire formula has been parsed into m_dict. Return the 1067 # corresponding Composition and number of points 1068 if m_dict: 1069 yield (Composition.from_dict(m_dict), m_points) 1070 else: 1071 # if there is a parenthesis, remove it and match the remaining stuff 1072 # with the appropriate factor 1073 for mp in re.finditer(r"\(([^\(\)]+)\)([\.\d]*)", fuzzy_formula): 1074 mp_points = m_points 1075 mp_form = fuzzy_formula.replace(mp.group(), " ", 1) 1076 mp_dict = dict(m_dict) 1077 mp_factor = 1 if mp.group(2) == "" else float(mp.group(2)) 1078 # Match the stuff inside the parenthesis with the appropriate 1079 # factor 1080 for match in Composition._comps_from_fuzzy_formula(mp.group(1), mp_dict, mp_points, factor=mp_factor): 1081 only_me = True 1082 # Match the stuff outside the parentheses and return the 1083 # sum. 1084 1085 for match2 in Composition._comps_from_fuzzy_formula(mp_form, mp_dict, mp_points, factor=1): 1086 only_me = False 1087 yield (match[0] + match2[0], match[1] + match2[1]) 1088 # if the stuff inside the parenthesis is nothing, then just 1089 # return the stuff inside the parentheses 1090 if only_me: 1091 yield match 1092 return 1093 1094 # try to match the single-letter elements 1095 m1 = re.match(r"([A-z])([\.\d]*)", fuzzy_formula) 1096 if m1: 1097 m_points1 = m_points 1098 m_form1 = fuzzy_formula 1099 m_dict1 = dict(m_dict) 1100 (m_form1, m_dict1, m_points1) = _parse_chomp_and_rank(m1, m_form1, m_dict1, m_points1) 1101 if m_dict1: 1102 # there was a real match 1103 for match in Composition._comps_from_fuzzy_formula(m_form1, m_dict1, m_points1, factor): 1104 yield match 1105 1106 # try to match two-letter elements 1107 m2 = re.match(r"([A-z]{2})([\.\d]*)", fuzzy_formula) 1108 if m2: 1109 m_points2 = m_points 1110 m_form2 = fuzzy_formula 1111 m_dict2 = dict(m_dict) 1112 (m_form2, m_dict2, m_points2) = _parse_chomp_and_rank(m2, m_form2, m_dict2, m_points2) 1113 if m_dict2: 1114 # there was a real match 1115 for match in Composition._comps_from_fuzzy_formula(m_form2, m_dict2, m_points2, factor): 1116 yield match 1117 1118 1119def reduce_formula(sym_amt, iupac_ordering: bool = False) -> Tuple[str, float]: 1120 """ 1121 Helper method to reduce a sym_amt dict to a reduced formula and factor. 1122 1123 Args: 1124 sym_amt (dict): {symbol: amount}. 1125 iupac_ordering (bool, optional): Whether to order the 1126 formula by the iupac "electronegativity" series, defined in 1127 Table VI of "Nomenclature of Inorganic Chemistry (IUPAC 1128 Recommendations 2005)". This ordering effectively follows 1129 the groups and rows of the periodic table, except the 1130 Lanthanides, Actanides and hydrogen. Note that polyanions 1131 will still be determined based on the true electronegativity of 1132 the elements. 1133 1134 Returns: 1135 (reduced_formula, factor). 1136 """ 1137 syms = sorted(sym_amt.keys(), key=lambda x: [get_el_sp(x).X, x]) 1138 1139 syms = list(filter(lambda x: abs(sym_amt[x]) > Composition.amount_tolerance, syms)) 1140 1141 factor = 1 1142 # Enforce integers for doing gcd. 1143 if all((int(i) == i for i in sym_amt.values())): 1144 factor = abs(gcd(*(int(i) for i in sym_amt.values()))) 1145 1146 polyanion = [] 1147 # if the composition contains a poly anion 1148 if len(syms) >= 3 and get_el_sp(syms[-1]).X - get_el_sp(syms[-2]).X < 1.65: 1149 poly_sym_amt = {syms[i]: sym_amt[syms[i]] / factor for i in [-2, -1]} 1150 (poly_form, poly_factor) = reduce_formula(poly_sym_amt, iupac_ordering=iupac_ordering) 1151 1152 if poly_factor != 1: 1153 polyanion.append("({}){}".format(poly_form, int(poly_factor))) 1154 1155 syms = syms[: len(syms) - 2 if polyanion else len(syms)] 1156 1157 if iupac_ordering: 1158 syms = sorted(syms, key=lambda x: [get_el_sp(x).iupac_ordering, x]) 1159 1160 reduced_form = [] 1161 for s in syms: 1162 normamt = sym_amt[s] * 1.0 / factor 1163 reduced_form.append(s) 1164 reduced_form.append(formula_double_format(normamt)) 1165 1166 reduced_form = "".join(reduced_form + polyanion) # type: ignore 1167 return reduced_form, factor # type: ignore 1168 1169 1170class ChemicalPotential(dict, MSONable): 1171 """ 1172 Class to represent set of chemical potentials. Can be: 1173 multiplied/divided by a Number 1174 multiplied by a Composition (returns an energy) 1175 added/subtracted with other ChemicalPotentials. 1176 """ 1177 1178 def __init__(self, *args, **kwargs): 1179 """ 1180 Args: 1181 *args, **kwargs: any valid dict init arguments 1182 """ 1183 d = dict(*args, **kwargs) 1184 super().__init__((get_el_sp(k), v) for k, v in d.items()) 1185 if len(d) != len(self): 1186 raise ValueError("Duplicate potential specified") 1187 1188 def __mul__(self, other): 1189 if isinstance(other, numbers.Number): 1190 return ChemicalPotential({k: v * other for k, v in self.items()}) 1191 raise NotImplementedError() 1192 1193 __rmul__ = __mul__ 1194 1195 def __truediv__(self, other): 1196 if isinstance(other, numbers.Number): 1197 return ChemicalPotential({k: v / other for k, v in self.items()}) 1198 raise NotImplementedError() 1199 1200 __div__ = __truediv__ 1201 1202 def __sub__(self, other): 1203 if isinstance(other, ChemicalPotential): 1204 els = set(self.keys()).union(other.keys()) 1205 return ChemicalPotential({e: self.get(e, 0) - other.get(e, 0) for e in els}) 1206 raise NotImplementedError() 1207 1208 def __add__(self, other): 1209 if isinstance(other, ChemicalPotential): 1210 els = set(self.keys()).union(other.keys()) 1211 return ChemicalPotential({e: self.get(e, 0) + other.get(e, 0) for e in els}) 1212 raise NotImplementedError() 1213 1214 def get_energy(self, composition: Composition, strict: bool = True) -> float: 1215 """ 1216 Calculates the energy of a composition. 1217 1218 Args: 1219 composition (Composition): input composition 1220 strict (bool): Whether all potentials must be specified 1221 """ 1222 if strict and set(composition.keys()) > set(self.keys()): 1223 s = set(composition.keys()) - set(self.keys()) 1224 raise ValueError("Potentials not specified for {}".format(s)) 1225 return sum(self.get(k, 0) * v for k, v in composition.items()) 1226 1227 def __repr__(self): 1228 return "ChemPots: " + super().__repr__() 1229 1230 1231class CompositionError(Exception): 1232 """Exception class for composition errors""" 1233 1234 1235if __name__ == "__main__": 1236 import doctest 1237 1238 doctest.testmod() 1239