1# coding: utf-8 2# Copyright (c) Pymatgen Development Team. 3# Distributed under the terms of the MIT License. 4 5""" 6This module provides classes used to define a non-periodic molecule and a 7periodic structure. 8""" 9 10import collections 11import functools 12import itertools 13import json 14import math 15import os 16import random 17import re 18import sys 19import warnings 20from abc import ABCMeta, abstractmethod 21from fnmatch import fnmatch 22from typing import ( 23 Any, 24 Callable, 25 Dict, 26 Iterable, 27 Iterator, 28 List, 29 Optional, 30 Sequence, 31 Set, 32 Tuple, 33 Union, 34) 35 36import numpy as np 37from monty.dev import deprecated 38from monty.io import zopen 39from monty.json import MSONable 40from tabulate import tabulate 41 42from pymatgen.core import yaml 43from pymatgen.core.bonds import CovalentBond, get_bond_length 44from pymatgen.core.composition import Composition 45from pymatgen.core.lattice import Lattice, get_points_in_spheres 46from pymatgen.core.operations import SymmOp 47from pymatgen.core.periodic_table import DummySpecies, Element, Species, get_el_sp 48from pymatgen.core.sites import PeriodicSite, Site 49from pymatgen.core.units import Length, Mass 50from pymatgen.util.coord import all_distances, get_angle, lattice_points_in_supercell 51from pymatgen.util.typing import ArrayLike, CompositionLike, SpeciesLike 52 53if sys.version_info >= (3, 8): 54 from typing import Literal 55else: 56 from typing_extensions import Literal 57 58 59class Neighbor(Site): 60 """ 61 Simple Site subclass to contain a neighboring atom that skips all the unnecessary checks for speed. Can be 62 used as a fixed-length tuple of size 3 to retain backwards compatibility with past use cases. 63 64 (site, nn_distance, index). 65 66 In future, usage should be to call attributes, e.g., Neighbor.index, Neighbor.distance, etc. 67 """ 68 69 def __init__( 70 self, 71 species: Composition, 72 coords: np.ndarray, 73 properties: dict = None, 74 nn_distance: float = 0.0, 75 index: int = 0, 76 ): 77 """ 78 :param species: Same as Site 79 :param coords: Same as Site, but must be fractional. 80 :param properties: Same as Site 81 :param nn_distance: Distance to some other Site. 82 :param index: Index within structure. 83 """ 84 self.coords = coords 85 self._species = species 86 self.properties = properties or {} 87 self.nn_distance = nn_distance 88 self.index = index 89 90 def __len__(self): 91 """ 92 Make neighbor Tuple-like to retain backwards compatibility. 93 """ 94 return 3 95 96 def __getitem__(self, i: int): # type: ignore 97 """ 98 Make neighbor Tuple-like to retain backwards compatibility. 99 100 :param i: 101 :return: 102 """ 103 return (self, self.nn_distance, self.index)[i] 104 105 106class PeriodicNeighbor(PeriodicSite): 107 """ 108 Simple PeriodicSite subclass to contain a neighboring atom that skips all 109 the unnecessary checks for speed. Can be used as a fixed-length tuple of 110 size 4 to retain backwards compatibility with past use cases. 111 112 (site, distance, index, image). 113 114 In future, usage should be to call attributes, e.g., PeriodicNeighbor.index, 115 PeriodicNeighbor.distance, etc. 116 """ 117 118 def __init__( 119 self, 120 species: Composition, 121 coords: np.ndarray, 122 lattice: Lattice, 123 properties: dict = None, 124 nn_distance: float = 0.0, 125 index: int = 0, 126 image: tuple = (0, 0, 0), 127 ): 128 """ 129 :param species: Same as PeriodicSite 130 :param coords: Same as PeriodicSite, but must be fractional. 131 :param lattice: Same as PeriodicSite 132 :param properties: Same as PeriodicSite 133 :param nn_distance: Distance to some other Site. 134 :param index: Index within structure. 135 :param image: PeriodicImage 136 """ 137 self._lattice = lattice 138 self._frac_coords = coords 139 self._species = species 140 self.properties = properties or {} 141 self.nn_distance = nn_distance 142 self.index = index 143 self.image = image 144 145 @property # type: ignore 146 def coords(self) -> np.ndarray: # type: ignore 147 """ 148 :return: Cartesian coords. 149 """ 150 return self._lattice.get_cartesian_coords(self._frac_coords) 151 152 def __len__(self): 153 """ 154 Make neighbor Tuple-like to retain backwards compatibility. 155 """ 156 return 4 157 158 def __getitem__(self, i: int): # type: ignore 159 """ 160 Make neighbor Tuple-like to retain backwards compatibility. 161 162 :param i: 163 :return: 164 """ 165 return (self, self.nn_distance, self.index, self.image)[i] 166 167 168class SiteCollection(collections.abc.Sequence, metaclass=ABCMeta): 169 """ 170 Basic SiteCollection. Essentially a sequence of Sites or PeriodicSites. 171 This serves as a base class for Molecule (a collection of Site, i.e., no 172 periodicity) and Structure (a collection of PeriodicSites, i.e., 173 periodicity). Not meant to be instantiated directly. 174 """ 175 176 # Tolerance in Angstrom for determining if sites are too close. 177 DISTANCE_TOLERANCE = 0.5 178 179 @property 180 @abstractmethod 181 def sites(self): 182 """ 183 Returns a tuple of sites. 184 """ 185 186 @abstractmethod 187 def get_distance(self, i: int, j: int) -> float: 188 """ 189 Returns distance between sites at index i and j. 190 191 Args: 192 i: Index of first site 193 j: Index of second site 194 195 Returns: 196 Distance between sites at index i and index j. 197 """ 198 199 @property 200 def distance_matrix(self) -> np.ndarray: 201 """ 202 Returns the distance matrix between all sites in the structure. For 203 periodic structures, this is overwritten to return the nearest image 204 distance. 205 """ 206 return all_distances(self.cart_coords, self.cart_coords) 207 208 @property 209 def species(self) -> List[Composition]: 210 """ 211 Only works for ordered structures. 212 Disordered structures will raise an AttributeError. 213 214 Returns: 215 ([Species]) List of species at each site of the structure. 216 """ 217 return [site.specie for site in self] 218 219 @property 220 def species_and_occu(self) -> List[Composition]: 221 """ 222 List of species and occupancies at each site of the structure. 223 """ 224 return [site.species for site in self] 225 226 @property 227 def ntypesp(self) -> int: 228 """Number of types of atoms.""" 229 return len(self.types_of_species) 230 231 @property 232 def types_of_species(self) -> Tuple[Union[Element, Species, DummySpecies]]: 233 """ 234 List of types of specie. 235 """ 236 # Cannot use set since we want a deterministic algorithm. 237 types = [] # type: List[Union[Element, Species, DummySpecies]] 238 for site in self: 239 for sp, v in site.species.items(): 240 if v != 0: 241 types.append(sp) 242 return tuple(sorted(set(types))) # type: ignore 243 244 @property 245 def types_of_specie(self) -> Tuple[Union[Element, Species, DummySpecies]]: 246 """ 247 Specie->Species rename. Maintained for backwards compatibility. 248 """ 249 return self.types_of_species 250 251 def group_by_types(self) -> Iterator[Union[Site, PeriodicSite]]: 252 """Iterate over species grouped by type""" 253 for t in self.types_of_species: 254 for site in self: 255 if site.specie == t: 256 yield site 257 258 def indices_from_symbol(self, symbol: str) -> Tuple[int, ...]: 259 """ 260 Returns a tuple with the sequential indices of the sites 261 that contain an element with the given chemical symbol. 262 """ 263 return tuple((i for i, specie in enumerate(self.species) if specie.symbol == symbol)) 264 265 @property 266 def symbol_set(self) -> Tuple[str]: 267 """ 268 Tuple with the set of chemical symbols. 269 Note that len(symbol_set) == len(types_of_specie) 270 """ 271 return tuple(sorted(specie.symbol for specie in self.types_of_species)) # type: ignore 272 273 @property # type: ignore 274 def atomic_numbers(self) -> Tuple[int]: 275 """List of atomic numbers.""" 276 return tuple(site.specie.Z for site in self) # type: ignore 277 278 @property 279 def site_properties(self) -> Dict[str, List]: 280 """ 281 Returns the site properties as a dict of sequences. E.g., 282 {"magmom": (5,-5), "charge": (-4,4)}. 283 """ 284 props = {} # type: Dict[str, List] 285 prop_keys = set() # type: Set[str] 286 for site in self: 287 prop_keys.update(site.properties.keys()) 288 289 for k in prop_keys: 290 props[k] = [site.properties.get(k, None) for site in self] 291 return props 292 293 def __contains__(self, site): 294 return site in self.sites 295 296 def __iter__(self): 297 return self.sites.__iter__() 298 299 def __getitem__(self, ind): 300 return self.sites[ind] 301 302 def __len__(self): 303 return len(self.sites) 304 305 def __hash__(self): 306 # for now, just use the composition hash code. 307 return self.composition.__hash__() 308 309 @property 310 def num_sites(self) -> int: 311 """ 312 Number of sites. 313 """ 314 return len(self) 315 316 @property 317 def cart_coords(self): 318 """ 319 Returns a np.array of the cartesian coordinates of sites in the 320 structure. 321 """ 322 return np.array([site.coords for site in self]) 323 324 @property 325 def formula(self) -> str: 326 """ 327 (str) Returns the formula. 328 """ 329 return self.composition.formula 330 331 @property 332 def composition(self) -> Composition: 333 """ 334 (Composition) Returns the composition 335 """ 336 elmap = collections.defaultdict(float) # type: Dict[Species, float] 337 for site in self: 338 for species, occu in site.species.items(): 339 elmap[species] += occu 340 return Composition(elmap) 341 342 @property 343 def charge(self) -> float: 344 """ 345 Returns the net charge of the structure based on oxidation states. If 346 Elements are found, a charge of 0 is assumed. 347 """ 348 charge = 0 349 for site in self: 350 for specie, amt in site.species.items(): 351 charge += getattr(specie, "oxi_state", 0) * amt 352 return charge 353 354 @property 355 def is_ordered(self) -> bool: 356 """ 357 Checks if structure is ordered, meaning no partial occupancies in any 358 of the sites. 359 """ 360 return all((site.is_ordered for site in self)) 361 362 def get_angle(self, i: int, j: int, k: int) -> float: 363 """ 364 Returns angle specified by three sites. 365 366 Args: 367 i: Index of first site. 368 j: Index of second site. 369 k: Index of third site. 370 371 Returns: 372 Angle in degrees. 373 """ 374 v1 = self[i].coords - self[j].coords 375 v2 = self[k].coords - self[j].coords 376 return get_angle(v1, v2, units="degrees") 377 378 def get_dihedral(self, i: int, j: int, k: int, l: int) -> float: 379 """ 380 Returns dihedral angle specified by four sites. 381 382 Args: 383 i: Index of first site 384 j: Index of second site 385 k: Index of third site 386 l: Index of fourth site 387 388 Returns: 389 Dihedral angle in degrees. 390 """ 391 v1 = self[k].coords - self[l].coords 392 v2 = self[j].coords - self[k].coords 393 v3 = self[i].coords - self[j].coords 394 v23 = np.cross(v2, v3) 395 v12 = np.cross(v1, v2) 396 return math.degrees(math.atan2(np.linalg.norm(v2) * np.dot(v1, v23), np.dot(v12, v23))) 397 398 def is_valid(self, tol: float = DISTANCE_TOLERANCE) -> bool: 399 """ 400 True if SiteCollection does not contain atoms that are too close 401 together. Note that the distance definition is based on type of 402 SiteCollection. Cartesian distances are used for non-periodic 403 Molecules, while PBC is taken into account for periodic structures. 404 405 Args: 406 tol (float): Distance tolerance. Default is 0.5A. 407 408 Returns: 409 (bool) True if SiteCollection does not contain atoms that are too 410 close together. 411 """ 412 if len(self.sites) == 1: 413 return True 414 all_dists = self.distance_matrix[np.triu_indices(len(self), 1)] 415 return bool(np.min(all_dists) > tol) 416 417 @abstractmethod 418 def to(self, fmt: str = None, filename: str = None): 419 """ 420 Generates well-known string representations of SiteCollections (e.g., 421 molecules / structures). Should return a string type or write to a file. 422 """ 423 424 @classmethod 425 @abstractmethod 426 def from_str(cls, input_string: str, fmt: Any): 427 """ 428 Reads in SiteCollection from a string. 429 """ 430 431 @classmethod 432 @abstractmethod 433 def from_file(cls, filename: str): 434 """ 435 Reads in SiteCollection from a filename. 436 """ 437 438 def add_site_property(self, property_name: str, values: List): 439 """ 440 Adds a property to a site. 441 442 Args: 443 property_name (str): The name of the property to add. 444 values (list): A sequence of values. Must be same length as 445 number of sites. 446 """ 447 if len(values) != len(self.sites): 448 raise ValueError("Values must be same length as sites.") 449 for site, val in zip(self.sites, values): 450 site.properties[property_name] = val 451 452 def remove_site_property(self, property_name: str): 453 """ 454 Removes a property to a site. 455 456 Args: 457 property_name (str): The name of the property to remove. 458 """ 459 for site in self.sites: 460 del site.properties[property_name] 461 462 def replace_species(self, species_mapping: Dict[SpeciesLike, SpeciesLike]): 463 """ 464 Swap species. 465 466 Args: 467 species_mapping (dict): dict of species to swap. Species can be 468 elements too. E.g., {Element("Li"): Element("Na")} performs 469 a Li for Na substitution. The second species can be a 470 sp_and_occu dict. For example, a site with 0.5 Si that is 471 passed the mapping {Element('Si): {Element('Ge'):0.75, 472 Element('C'):0.25} } will have .375 Ge and .125 C. 473 """ 474 475 sp_mapping = {get_el_sp(k): v for k, v in species_mapping.items()} 476 sp_to_replace = set(sp_mapping.keys()) 477 sp_in_structure = set(self.composition.keys()) 478 if not sp_in_structure.issuperset(sp_to_replace): 479 warnings.warn( 480 "Some species to be substituted are not present in " 481 "structure. Pls check your input. Species to be " 482 "substituted = %s; Species in structure = %s" % (sp_to_replace, sp_in_structure) 483 ) 484 485 for site in self.sites: 486 if sp_to_replace.intersection(site.species): 487 c = Composition() 488 for sp, amt in site.species.items(): 489 new_sp = sp_mapping.get(sp, sp) 490 try: 491 c += Composition(new_sp) * amt 492 except Exception: 493 c += {new_sp: amt} 494 site.species = c 495 496 def add_oxidation_state_by_element(self, oxidation_states: Dict[str, float]): 497 """ 498 Add oxidation states. 499 500 Args: 501 oxidation_states (dict): Dict of oxidation states. 502 E.g., {"Li":1, "Fe":2, "P":5, "O":-2} 503 """ 504 try: 505 for site in self.sites: 506 new_sp = {} 507 for el, occu in site.species.items(): 508 sym = el.symbol 509 new_sp[Species(sym, oxidation_states[sym])] = occu 510 site.species = Composition(new_sp) 511 except KeyError: 512 raise ValueError("Oxidation state of all elements must be " "specified in the dictionary.") 513 514 def add_oxidation_state_by_site(self, oxidation_states: List[float]): 515 """ 516 Add oxidation states to a structure by site. 517 518 Args: 519 oxidation_states (list): List of oxidation states. 520 E.g., [1, 1, 1, 1, 2, 2, 2, 2, 5, 5, 5, 5, -2, -2, -2, -2] 521 """ 522 if len(oxidation_states) != len(self.sites): 523 raise ValueError("Oxidation states of all sites must be " "specified.") 524 for site, ox in zip(self.sites, oxidation_states): 525 new_sp = {} 526 for el, occu in site.species.items(): 527 sym = el.symbol 528 new_sp[Species(sym, ox)] = occu 529 site.species = Composition(new_sp) 530 531 def remove_oxidation_states(self): 532 """ 533 Removes oxidation states from a structure. 534 """ 535 for site in self.sites: 536 new_sp = collections.defaultdict(float) 537 for el, occu in site.species.items(): 538 sym = el.symbol 539 new_sp[Element(sym)] += occu 540 site.species = Composition(new_sp) 541 542 def add_oxidation_state_by_guess(self, **kwargs): 543 """ 544 Decorates the structure with oxidation state, guessing 545 using Composition.oxi_state_guesses() 546 547 Args: 548 **kwargs: parameters to pass into oxi_state_guesses() 549 """ 550 oxid_guess = self.composition.oxi_state_guesses(**kwargs) 551 oxid_guess = oxid_guess or [{e.symbol: 0 for e in self.composition}] 552 self.add_oxidation_state_by_element(oxid_guess[0]) 553 554 def add_spin_by_element(self, spins: Dict[str, float]): 555 """ 556 Add spin states to a structure. 557 558 Args: 559 spins (dict): Dict of spins associated with elements or species, 560 e.g. {"Ni":+5} or {"Ni2+":5} 561 """ 562 for site in self.sites: 563 new_sp = {} 564 for sp, occu in site.species.items(): 565 sym = sp.symbol 566 oxi_state = getattr(sp, "oxi_state", None) 567 new_sp[ 568 Species( 569 sym, 570 oxidation_state=oxi_state, 571 properties={"spin": spins.get(str(sp), spins.get(sym, None))}, 572 ) 573 ] = occu 574 site.species = Composition(new_sp) 575 576 def add_spin_by_site(self, spins: List[float]): 577 """ 578 Add spin states to a structure by site. 579 580 Args: 581 spins (list): List of spins 582 E.g., [+5, -5, 0, 0] 583 """ 584 if len(spins) != len(self.sites): 585 raise ValueError("Spin of all sites must be " "specified in the dictionary.") 586 587 for site, spin in zip(self.sites, spins): 588 new_sp = {} 589 for sp, occu in site.species.items(): 590 sym = sp.symbol 591 oxi_state = getattr(sp, "oxi_state", None) 592 new_sp[Species(sym, oxidation_state=oxi_state, properties={"spin": spin})] = occu 593 site.species = Composition(new_sp) 594 595 def remove_spin(self): 596 """ 597 Removes spin states from a structure. 598 """ 599 for site in self.sites: 600 new_sp = collections.defaultdict(float) 601 for sp, occu in site.species.items(): 602 oxi_state = getattr(sp, "oxi_state", None) 603 new_sp[Species(sp.symbol, oxidation_state=oxi_state)] += occu 604 site.species = new_sp 605 606 def extract_cluster(self, target_sites: List[Site], **kwargs): 607 r""" 608 Extracts a cluster of atoms based on bond lengths 609 610 Args: 611 target_sites ([Site]): List of initial sites to nucleate cluster. 612 **kwargs: kwargs passed through to CovalentBond.is_bonded. 613 614 Returns: 615 [Site/PeriodicSite] Cluster of atoms. 616 """ 617 cluster = list(target_sites) 618 others = [site for site in self if site not in cluster] 619 size = 0 620 while len(cluster) > size: 621 size = len(cluster) 622 new_others = [] 623 for site in others: 624 for site2 in cluster: 625 if CovalentBond.is_bonded(site, site2, **kwargs): 626 cluster.append(site) 627 break 628 else: 629 new_others.append(site) 630 others = new_others 631 return cluster 632 633 634class IStructure(SiteCollection, MSONable): 635 """ 636 Basic immutable Structure object with periodicity. Essentially a sequence 637 of PeriodicSites having a common lattice. IStructure is made to be 638 (somewhat) immutable so that they can function as keys in a dict. To make 639 modifications, use the standard Structure object instead. Structure 640 extends Sequence and Hashable, which means that in many cases, 641 it can be used like any Python sequence. Iterating through a 642 structure is equivalent to going through the sites in sequence. 643 """ 644 645 def __init__( 646 self, 647 lattice: Union[ArrayLike, Lattice], 648 species: Sequence[CompositionLike], 649 coords: Sequence[ArrayLike], 650 charge: float = None, 651 validate_proximity: bool = False, 652 to_unit_cell: bool = False, 653 coords_are_cartesian: bool = False, 654 site_properties: dict = None, 655 ): 656 """ 657 Create a periodic structure. 658 659 Args: 660 lattice (Lattice/3x3 array): The lattice, either as a 661 :class:`pymatgen.core.lattice.Lattice` or 662 simply as any 2D array. Each row should correspond to a lattice 663 vector. E.g., [[10,0,0], [20,10,0], [0,0,30]] specifies a 664 lattice with lattice vectors [10,0,0], [20,10,0] and [0,0,30]. 665 species ([Species]): Sequence of species on each site. Can take in 666 flexible input, including: 667 668 i. A sequence of element / species specified either as string 669 symbols, e.g. ["Li", "Fe2+", "P", ...] or atomic numbers, 670 e.g., (3, 56, ...) or actual Element or Species objects. 671 672 ii. List of dict of elements/species and occupancies, e.g., 673 [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of 674 disordered structures. 675 coords (Nx3 array): list of fractional/cartesian coordinates of 676 each species. 677 charge (int): overall charge of the structure. Defaults to behavior 678 in SiteCollection where total charge is the sum of the oxidation 679 states. 680 validate_proximity (bool): Whether to check if there are sites 681 that are less than 0.01 Ang apart. Defaults to False. 682 to_unit_cell (bool): Whether to map all sites into the unit cell, 683 i.e., fractional coords between 0 and 1. Defaults to False. 684 coords_are_cartesian (bool): Set to True if you are providing 685 coordinates in cartesian coordinates. Defaults to False. 686 site_properties (dict): Properties associated with the sites as a 687 dict of sequences, e.g., {"magmom":[5,5,5,5]}. The sequences 688 have to be the same length as the atomic species and 689 fractional_coords. Defaults to None for no properties. 690 """ 691 if len(species) != len(coords): 692 raise StructureError( 693 "The list of atomic species must be of the" " same length as the list of fractional" " coordinates." 694 ) 695 696 if isinstance(lattice, Lattice): 697 self._lattice = lattice 698 else: 699 self._lattice = Lattice(lattice) 700 701 sites = [] 702 for i, sp in enumerate(species): 703 prop = None 704 if site_properties: 705 prop = {k: v[i] for k, v in site_properties.items()} 706 707 sites.append( 708 PeriodicSite( 709 sp, 710 coords[i], 711 self._lattice, 712 to_unit_cell, 713 coords_are_cartesian=coords_are_cartesian, 714 properties=prop, 715 ) 716 ) 717 self._sites: Tuple[PeriodicSite, ...] = tuple(sites) 718 if validate_proximity and not self.is_valid(): 719 raise StructureError(("Structure contains sites that are ", "less than 0.01 Angstrom apart!")) 720 self._charge = charge 721 722 @classmethod 723 def from_sites( 724 cls, 725 sites: List[PeriodicSite], 726 charge: float = None, 727 validate_proximity: bool = False, 728 to_unit_cell: bool = False, 729 ): 730 """ 731 Convenience constructor to make a Structure from a list of sites. 732 733 Args: 734 sites: Sequence of PeriodicSites. Sites must have the same 735 lattice. 736 charge: Charge of structure. 737 validate_proximity (bool): Whether to check if there are sites 738 that are less than 0.01 Ang apart. Defaults to False. 739 to_unit_cell (bool): Whether to translate sites into the unit 740 cell. 741 742 Returns: 743 (Structure) Note that missing properties are set as None. 744 """ 745 if len(sites) < 1: 746 raise ValueError("You need at least one site to construct a %s" % cls) 747 prop_keys = [] # type: List[str] 748 props = {} 749 lattice = sites[0].lattice 750 for i, site in enumerate(sites): 751 if site.lattice != lattice: 752 raise ValueError("Sites must belong to the same lattice") 753 for k, v in site.properties.items(): 754 if k not in prop_keys: 755 prop_keys.append(k) 756 props[k] = [None] * len(sites) 757 props[k][i] = v 758 for k, v in props.items(): 759 if any((vv is None for vv in v)): 760 warnings.warn("Not all sites have property %s. Missing values " "are set to None." % k) 761 return cls( 762 lattice, 763 [site.species for site in sites], 764 [site.frac_coords for site in sites], 765 charge=charge, 766 site_properties=props, 767 validate_proximity=validate_proximity, 768 to_unit_cell=to_unit_cell, 769 ) 770 771 @classmethod 772 def from_spacegroup( 773 cls, 774 sg: str, 775 lattice: Union[List, np.ndarray, Lattice], 776 species: Sequence[Union[str, Element, Species, DummySpecies, Composition]], 777 coords: Sequence[Sequence[float]], 778 site_properties: Dict[str, Sequence] = None, 779 coords_are_cartesian: bool = False, 780 tol: float = 1e-5, 781 ) -> Union["IStructure", "Structure"]: 782 """ 783 Generate a structure using a spacegroup. Note that only symmetrically 784 distinct species and coords should be provided. All equivalent sites 785 are generated from the spacegroup operations. 786 787 Args: 788 sg (str/int): The spacegroup. If a string, it will be interpreted 789 as one of the notations supported by 790 pymatgen.symmetry.groups.Spacegroup. E.g., "R-3c" or "Fm-3m". 791 If an int, it will be interpreted as an international number. 792 lattice (Lattice/3x3 array): The lattice, either as a 793 :class:`pymatgen.core.lattice.Lattice` or 794 simply as any 2D array. Each row should correspond to a lattice 795 vector. E.g., [[10,0,0], [20,10,0], [0,0,30]] specifies a 796 lattice with lattice vectors [10,0,0], [20,10,0] and [0,0,30]. 797 Note that no attempt is made to check that the lattice is 798 compatible with the spacegroup specified. This may be 799 introduced in a future version. 800 species ([Species]): Sequence of species on each site. Can take in 801 flexible input, including: 802 803 i. A sequence of element / species specified either as string 804 symbols, e.g. ["Li", "Fe2+", "P", ...] or atomic numbers, 805 e.g., (3, 56, ...) or actual Element or Species objects. 806 807 ii. List of dict of elements/species and occupancies, e.g., 808 [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of 809 disordered structures. 810 coords (Nx3 array): list of fractional/cartesian coordinates of 811 each species. 812 coords_are_cartesian (bool): Set to True if you are providing 813 coordinates in cartesian coordinates. Defaults to False. 814 site_properties (dict): Properties associated with the sites as a 815 dict of sequences, e.g., {"magmom":[5,5,5,5]}. The sequences 816 have to be the same length as the atomic species and 817 fractional_coords. Defaults to None for no properties. 818 tol (float): A fractional tolerance to deal with numerical 819 precision issues in determining if orbits are the same. 820 """ 821 from pymatgen.symmetry.groups import SpaceGroup 822 823 try: 824 i = int(sg) 825 sgp = SpaceGroup.from_int_number(i) 826 except ValueError: 827 sgp = SpaceGroup(sg) 828 829 if isinstance(lattice, Lattice): 830 latt = lattice 831 else: 832 latt = Lattice(lattice) 833 834 if not sgp.is_compatible(latt): 835 raise ValueError( 836 "Supplied lattice with parameters %s is incompatible with " 837 "supplied spacegroup %s!" % (latt.parameters, sgp.symbol) 838 ) 839 840 if len(species) != len(coords): 841 raise ValueError( 842 "Supplied species and coords lengths (%d vs %d) are " "different!" % (len(species), len(coords)) 843 ) 844 845 frac_coords = ( 846 np.array(coords, dtype=np.float_) if not coords_are_cartesian else latt.get_fractional_coords(coords) 847 ) 848 849 props = {} if site_properties is None else site_properties 850 851 all_sp = [] # type: List[Union[str, Element, Species, DummySpecies, Composition]] 852 all_coords = [] # type: List[List[float]] 853 all_site_properties = collections.defaultdict(list) # type: Dict[str, List] 854 for i, (sp, c) in enumerate(zip(species, frac_coords)): 855 cc = sgp.get_orbit(c, tol=tol) 856 all_sp.extend([sp] * len(cc)) 857 all_coords.extend(cc) 858 for k, v in props.items(): 859 all_site_properties[k].extend([v[i]] * len(cc)) 860 861 return cls(latt, all_sp, all_coords, site_properties=all_site_properties) 862 863 @classmethod 864 def from_magnetic_spacegroup( 865 cls, 866 msg: Union[str, "MagneticSpaceGroup"], # type: ignore # noqa: F821 867 lattice: Union[List, np.ndarray, Lattice], 868 species: Sequence[Union[str, Element, Species, DummySpecies, Composition]], 869 coords: Sequence[Sequence[float]], 870 site_properties: Dict[str, Sequence], 871 coords_are_cartesian: bool = False, 872 tol: float = 1e-5, 873 ) -> Union["IStructure", "Structure"]: 874 """ 875 Generate a structure using a magnetic spacegroup. Note that only 876 symmetrically distinct species, coords and magmoms should be provided.] 877 All equivalent sites are generated from the spacegroup operations. 878 879 Args: 880 msg (str/list/:class:`pymatgen.symmetry.maggroups.MagneticSpaceGroup`): 881 The magnetic spacegroup. 882 If a string, it will be interpreted as one of the notations 883 supported by MagneticSymmetryGroup, e.g., "R-3'c" or "Fm'-3'm". 884 If a list of two ints, it will be interpreted as the number of 885 the spacegroup in its Belov, Neronova and Smirnova (BNS) setting. 886 lattice (Lattice/3x3 array): The lattice, either as a 887 :class:`pymatgen.core.lattice.Lattice` or 888 simply as any 2D array. Each row should correspond to a lattice 889 vector. E.g., [[10,0,0], [20,10,0], [0,0,30]] specifies a 890 lattice with lattice vectors [10,0,0], [20,10,0] and [0,0,30]. 891 Note that no attempt is made to check that the lattice is 892 compatible with the spacegroup specified. This may be 893 introduced in a future version. 894 species ([Species]): Sequence of species on each site. Can take in 895 flexible input, including: 896 i. A sequence of element / species specified either as string 897 symbols, e.g. ["Li", "Fe2+", "P", ...] or atomic numbers, 898 e.g., (3, 56, ...) or actual Element or Species objects. 899 900 ii. List of dict of elements/species and occupancies, e.g., 901 [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of 902 disordered structures. 903 coords (Nx3 array): list of fractional/cartesian coordinates of 904 each species. 905 site_properties (dict): Properties associated with the sites as a 906 dict of sequences, e.g., {"magmom":[5,5,5,5]}. The sequences 907 have to be the same length as the atomic species and 908 fractional_coords. Unlike Structure.from_spacegroup(), 909 this argument is mandatory, since magnetic moment information 910 has to be included. Note that the *direction* of the supplied 911 magnetic moment relative to the crystal is important, even if 912 the resulting structure is used for collinear calculations. 913 coords_are_cartesian (bool): Set to True if you are providing 914 coordinates in cartesian coordinates. Defaults to False. 915 tol (float): A fractional tolerance to deal with numerical 916 precision issues in determining if orbits are the same. 917 """ 918 from pymatgen.electronic_structure.core import Magmom 919 from pymatgen.symmetry.maggroups import MagneticSpaceGroup 920 921 if "magmom" not in site_properties: 922 raise ValueError("Magnetic moments have to be defined.") 923 924 magmoms = [Magmom(m) for m in site_properties["magmom"]] 925 926 if not isinstance(msg, MagneticSpaceGroup): 927 msg = MagneticSpaceGroup(msg) # type: ignore 928 929 if isinstance(lattice, Lattice): 930 latt = lattice 931 else: 932 latt = Lattice(lattice) 933 934 if not msg.is_compatible(latt): 935 raise ValueError( 936 "Supplied lattice with parameters %s is incompatible with " 937 "supplied spacegroup %s!" % (latt.parameters, msg.sg_symbol) 938 ) 939 940 if len(species) != len(coords): 941 raise ValueError( 942 "Supplied species and coords lengths (%d vs %d) are " "different!" % (len(species), len(coords)) 943 ) 944 945 if len(species) != len(magmoms): 946 raise ValueError( 947 "Supplied species and magmom lengths (%d vs %d) are " "different!" % (len(species), len(magmoms)) 948 ) 949 950 frac_coords = coords if not coords_are_cartesian else latt.get_fractional_coords(coords) 951 952 all_sp = [] # type: List[Union[str, Element, Species, DummySpecies, Composition]] 953 all_coords = [] # type: List[List[float]] 954 all_magmoms = [] # type: List[float] 955 all_site_properties = collections.defaultdict(list) # type: Dict[str, List] 956 for i, (sp, c, m) in enumerate(zip(species, frac_coords, magmoms)): # type: ignore 957 cc, mm = msg.get_orbit(c, m, tol=tol) 958 all_sp.extend([sp] * len(cc)) 959 all_coords.extend(cc) 960 all_magmoms.extend(mm) 961 for k, v in site_properties.items(): 962 if k != "magmom": 963 all_site_properties[k].extend([v[i]] * len(cc)) 964 965 all_site_properties["magmom"] = all_magmoms 966 967 return cls(latt, all_sp, all_coords, site_properties=all_site_properties) 968 969 @property 970 def charge(self) -> float: 971 """ 972 Overall charge of the structure 973 """ 974 if self._charge is None: 975 return super().charge 976 return self._charge 977 978 @property 979 def distance_matrix(self) -> np.ndarray: 980 """ 981 Returns the distance matrix between all sites in the structure. For 982 periodic structures, this should return the nearest image distance. 983 """ 984 return self.lattice.get_all_distances(self.frac_coords, self.frac_coords) 985 986 @property 987 def sites(self) -> Tuple[PeriodicSite, ...]: 988 """ 989 Returns an iterator for the sites in the Structure. 990 """ 991 return self._sites 992 993 @property 994 def lattice(self) -> Lattice: 995 """ 996 Lattice of the structure. 997 """ 998 return self._lattice 999 1000 @property 1001 def density(self) -> float: 1002 """ 1003 Returns the density in units of g/cc 1004 """ 1005 m = Mass(self.composition.weight, "amu") 1006 return m.to("g") / (self.volume * Length(1, "ang").to("cm") ** 3) 1007 1008 def get_space_group_info(self, symprec=1e-2, angle_tolerance=5.0) -> Tuple[str, int]: 1009 """ 1010 Convenience method to quickly get the spacegroup of a structure. 1011 1012 Args: 1013 symprec (float): Same definition as in SpacegroupAnalyzer. 1014 Defaults to 1e-2. 1015 angle_tolerance (float): Same definition as in SpacegroupAnalyzer. 1016 Defaults to 5 degrees. 1017 1018 Returns: 1019 spacegroup_symbol, international_number 1020 """ 1021 # Import within method needed to avoid cyclic dependency. 1022 from pymatgen.symmetry.analyzer import SpacegroupAnalyzer 1023 1024 a = SpacegroupAnalyzer(self, symprec=symprec, angle_tolerance=angle_tolerance) 1025 return a.get_space_group_symbol(), a.get_space_group_number() 1026 1027 def matches(self, other, anonymous=False, **kwargs): 1028 """ 1029 Check whether this structure is similar to another structure. 1030 Basically a convenience method to call structure matching. 1031 1032 Args: 1033 other (IStructure/Structure): Another structure. 1034 **kwargs: Same **kwargs as in 1035 :class:`pymatgen.analysis.structure_matcher.StructureMatcher`. 1036 1037 Returns: 1038 (bool) True is the structures are similar under some affine 1039 transformation. 1040 """ 1041 from pymatgen.analysis.structure_matcher import StructureMatcher 1042 1043 m = StructureMatcher(**kwargs) 1044 if not anonymous: 1045 return m.fit(self, other) 1046 return m.fit_anonymous(self, other) 1047 1048 def __eq__(self, other): 1049 if other is self: 1050 return True 1051 if other is None: 1052 return False 1053 if len(self) != len(other): 1054 return False 1055 if self.lattice != other.lattice: 1056 return False 1057 for site in self: 1058 if site not in other: 1059 return False 1060 return True 1061 1062 def __ne__(self, other): 1063 return not self.__eq__(other) 1064 1065 def __hash__(self): 1066 # For now, just use the composition hash code. 1067 return self.composition.__hash__() 1068 1069 def __mul__(self, scaling_matrix): 1070 """ 1071 Makes a supercell. Allowing to have sites outside the unit cell 1072 1073 Args: 1074 scaling_matrix: A scaling matrix for transforming the lattice 1075 vectors. Has to be all integers. Several options are possible: 1076 1077 a. A full 3x3 scaling matrix defining the linear combination 1078 the old lattice vectors. E.g., [[2,1,0],[0,3,0],[0,0, 1079 1]] generates a new structure with lattice vectors a' = 1080 2a + b, b' = 3b, c' = c where a, b, and c are the lattice 1081 vectors of the original structure. 1082 b. An sequence of three scaling factors. E.g., [2, 1, 1] 1083 specifies that the supercell should have dimensions 2a x b x 1084 c. 1085 c. A number, which simply scales all lattice vectors by the 1086 same factor. 1087 1088 Returns: 1089 Supercell structure. Note that a Structure is always returned, 1090 even if the input structure is a subclass of Structure. This is 1091 to avoid different arguments signatures from causing problems. If 1092 you prefer a subclass to return its own type, you need to override 1093 this method in the subclass. 1094 """ 1095 scale_matrix = np.array(scaling_matrix, np.int16) 1096 if scale_matrix.shape != (3, 3): 1097 scale_matrix = np.array(scale_matrix * np.eye(3), np.int16) 1098 new_lattice = Lattice(np.dot(scale_matrix, self._lattice.matrix)) 1099 1100 f_lat = lattice_points_in_supercell(scale_matrix) 1101 c_lat = new_lattice.get_cartesian_coords(f_lat) 1102 1103 new_sites = [] 1104 for site in self: 1105 for v in c_lat: 1106 s = PeriodicSite( 1107 site.species, 1108 site.coords + v, 1109 new_lattice, 1110 properties=site.properties, 1111 coords_are_cartesian=True, 1112 to_unit_cell=False, 1113 skip_checks=True, 1114 ) 1115 new_sites.append(s) 1116 1117 new_charge = self._charge * np.linalg.det(scale_matrix) if self._charge else None 1118 return Structure.from_sites(new_sites, charge=new_charge) 1119 1120 def __rmul__(self, scaling_matrix): 1121 """ 1122 Similar to __mul__ to preserve commutativeness. 1123 """ 1124 return self.__mul__(scaling_matrix) 1125 1126 @property 1127 def frac_coords(self): 1128 """ 1129 Fractional coordinates as a Nx3 numpy array. 1130 """ 1131 return np.array([site.frac_coords for site in self._sites]) 1132 1133 @property 1134 def volume(self): 1135 """ 1136 Returns the volume of the structure. 1137 """ 1138 return self._lattice.volume 1139 1140 def get_distance(self, i: int, j: int, jimage=None) -> float: 1141 """ 1142 Get distance between site i and j assuming periodic boundary 1143 conditions. If the index jimage of two sites atom j is not specified it 1144 selects the jimage nearest to the i atom and returns the distance and 1145 jimage indices in terms of lattice vector translations if the index 1146 jimage of atom j is specified it returns the distance between the i 1147 atom and the specified jimage atom. 1148 1149 Args: 1150 i (int): Index of first site 1151 j (int): Index of second site 1152 jimage: Number of lattice translations in each lattice direction. 1153 Default is None for nearest image. 1154 1155 Returns: 1156 distance 1157 """ 1158 return self[i].distance(self[j], jimage) 1159 1160 def get_sites_in_sphere( 1161 self, 1162 pt: ArrayLike, 1163 r: float, 1164 include_index: bool = False, 1165 include_image: bool = False, 1166 ) -> List[PeriodicNeighbor]: 1167 """ 1168 Find all sites within a sphere from the point, including a site (if any) 1169 sitting on the point itself. This includes sites in other periodic 1170 images. 1171 1172 Algorithm: 1173 1174 1. place sphere of radius r in crystal and determine minimum supercell 1175 (parallelpiped) which would contain a sphere of radius r. for this 1176 we need the projection of a_1 on a unit vector perpendicular 1177 to a_2 & a_3 (i.e. the unit vector in the direction b_1) to 1178 determine how many a_1"s it will take to contain the sphere. 1179 1180 Nxmax = r * length_of_b_1 / (2 Pi) 1181 1182 2. keep points falling within r. 1183 1184 Args: 1185 pt (3x1 array): cartesian coordinates of center of sphere. 1186 r (float): Radius of sphere. 1187 include_index (bool): Whether the non-supercell site index 1188 is included in the returned data 1189 include_image (bool): Whether to include the supercell image 1190 is included in the returned data 1191 1192 Returns: 1193 [:class:`pymatgen.core.structure.PeriodicNeighbor`] 1194 """ 1195 site_fcoords = np.mod(self.frac_coords, 1) 1196 neighbors = [] # type: List[PeriodicNeighbor] 1197 for fcoord, dist, i, img in self._lattice.get_points_in_sphere(site_fcoords, pt, r): 1198 nnsite = PeriodicNeighbor( 1199 self[i].species, 1200 fcoord, 1201 self._lattice, 1202 properties=self[i].properties, 1203 nn_distance=dist, 1204 image=img, # type: ignore 1205 index=i, 1206 ) 1207 neighbors.append(nnsite) 1208 return neighbors 1209 1210 def get_neighbors( 1211 self, 1212 site: PeriodicSite, 1213 r: float, 1214 include_index: bool = False, 1215 include_image: bool = False, 1216 ) -> List[PeriodicNeighbor]: 1217 """ 1218 Get all neighbors to a site within a sphere of radius r. Excludes the 1219 site itself. 1220 1221 Args: 1222 site (Site): Which is the center of the sphere. 1223 r (float): Radius of sphere. 1224 include_index (bool): Deprecated. Now, the non-supercell site index 1225 is always included in the returned data. 1226 include_image (bool): Deprecated. Now the supercell image 1227 is always included in the returned data. 1228 1229 Returns: 1230 [:class:`pymatgen.core.structure.PeriodicNeighbor`] 1231 """ 1232 return self.get_all_neighbors(r, include_index=include_index, include_image=include_image, sites=[site])[0] 1233 1234 @deprecated(get_neighbors, "This is retained purely for checking purposes.") 1235 def get_neighbors_old(self, site, r, include_index=False, include_image=False): 1236 """ 1237 Get all neighbors to a site within a sphere of radius r. Excludes the 1238 site itself. 1239 1240 Args: 1241 site (Site): Which is the center of the sphere. 1242 r (float): Radius of sphere. 1243 include_index (bool): Whether the non-supercell site index 1244 is included in the returned data 1245 include_image (bool): Whether to include the supercell image 1246 is included in the returned data 1247 1248 Returns: 1249 [:class:`pymatgen.core.structure.PeriodicNeighbor`] 1250 """ 1251 nn = self.get_sites_in_sphere(site.coords, r, include_index=include_index, include_image=include_image) 1252 return [d for d in nn if site != d[0]] 1253 1254 def _get_neighbor_list_py( 1255 self, 1256 r: float, 1257 sites: List[PeriodicSite] = None, 1258 numerical_tol: float = 1e-8, 1259 exclude_self: bool = True, 1260 ) -> Tuple[np.ndarray, ...]: 1261 """ 1262 A python version of getting neighbor_list. The returned values are a tuple of 1263 numpy arrays (center_indices, points_indices, offset_vectors, distances). 1264 Atom `center_indices[i]` has neighbor atom `points_indices[i]` that is 1265 translated by `offset_vectors[i]` lattice vectors, and the distance is 1266 `distances[i]`. 1267 1268 Args: 1269 r (float): Radius of sphere 1270 sites (list of Sites or None): sites for getting all neighbors, 1271 default is None, which means neighbors will be obtained for all 1272 sites. This is useful in the situation where you are interested 1273 only in one subspecies type, and makes it a lot faster. 1274 numerical_tol (float): This is a numerical tolerance for distances. 1275 Sites which are < numerical_tol are determined to be conincident 1276 with the site. Sites which are r + numerical_tol away is deemed 1277 to be within r from the site. The default of 1e-8 should be 1278 ok in most instances. 1279 exclude_self (bool): whether to exclude atom neighboring with itself within 1280 numerical tolerance distance, default to True 1281 Returns: (center_indices, points_indices, offset_vectors, distances) 1282 """ 1283 neighbors = self.get_all_neighbors_py( 1284 r=r, include_index=True, include_image=True, sites=sites, numerical_tol=1e-8 1285 ) 1286 center_indices = [] 1287 points_indices = [] 1288 offsets = [] 1289 distances = [] 1290 for i, nns in enumerate(neighbors): 1291 if len(nns) > 0: 1292 for n in nns: 1293 if exclude_self and (i == n.index) and (n.nn_distance <= numerical_tol): 1294 continue 1295 center_indices.append(i) 1296 points_indices.append(n.index) 1297 offsets.append(n.image) 1298 distances.append(n.nn_distance) 1299 return tuple( 1300 ( 1301 np.array(center_indices), 1302 np.array(points_indices), 1303 np.array(offsets), 1304 np.array(distances), 1305 ) 1306 ) 1307 1308 def get_neighbor_list( 1309 self, 1310 r: float, 1311 sites: Sequence[PeriodicSite] = None, 1312 numerical_tol: float = 1e-8, 1313 exclude_self: bool = True, 1314 ) -> Tuple[np.ndarray, ...]: 1315 """ 1316 Get neighbor lists using numpy array representations without constructing 1317 Neighbor objects. If the cython extension is installed, this method will 1318 be orders of magnitude faster than `get_all_neighbors_old` and 2-3x faster 1319 than `get_all_neighbors`. 1320 The returned values are a tuple of numpy arrays 1321 (center_indices, points_indices, offset_vectors, distances). 1322 Atom `center_indices[i]` has neighbor atom `points_indices[i]` that is 1323 translated by `offset_vectors[i]` lattice vectors, and the distance is 1324 `distances[i]`. 1325 1326 Args: 1327 r (float): Radius of sphere 1328 sites (list of Sites or None): sites for getting all neighbors, 1329 default is None, which means neighbors will be obtained for all 1330 sites. This is useful in the situation where you are interested 1331 only in one subspecies type, and makes it a lot faster. 1332 numerical_tol (float): This is a numerical tolerance for distances. 1333 Sites which are < numerical_tol are determined to be conincident 1334 with the site. Sites which are r + numerical_tol away is deemed 1335 to be within r from the site. The default of 1e-8 should be 1336 ok in most instances. 1337 exclude_self (bool): whether to exclude atom neighboring with itself within 1338 numerical tolerance distance, default to True 1339 Returns: (center_indices, points_indices, offset_vectors, distances) 1340 1341 """ 1342 try: 1343 from pymatgen.optimization.neighbors import ( 1344 find_points_in_spheres, # type: ignore 1345 ) 1346 except ImportError: 1347 return self._get_neighbor_list_py(r, sites, exclude_self=exclude_self) # type: ignore 1348 else: 1349 if sites is None: 1350 sites = self.sites 1351 site_coords = np.array([site.coords for site in sites], dtype=float) 1352 cart_coords = np.ascontiguousarray(np.array(self.cart_coords), dtype=float) 1353 lattice_matrix = np.ascontiguousarray(np.array(self.lattice.matrix), dtype=float) 1354 r = float(r) 1355 center_indices, points_indices, images, distances = find_points_in_spheres( 1356 cart_coords, 1357 site_coords, 1358 r=r, 1359 pbc=np.array([1, 1, 1], dtype=int), 1360 lattice=lattice_matrix, 1361 tol=numerical_tol, 1362 ) 1363 cond = np.array([True] * len(center_indices)) 1364 if exclude_self: 1365 self_pair = (center_indices == points_indices) & (distances <= numerical_tol) 1366 cond = ~self_pair 1367 return tuple( 1368 ( 1369 center_indices[cond], 1370 points_indices[cond], 1371 images[cond], 1372 distances[cond], 1373 ) 1374 ) 1375 1376 def get_all_neighbors( 1377 self, 1378 r: float, 1379 include_index: bool = False, 1380 include_image: bool = False, 1381 sites: Sequence[PeriodicSite] = None, 1382 numerical_tol: float = 1e-8, 1383 ) -> List[List[PeriodicNeighbor]]: 1384 """ 1385 Get neighbors for each atom in the unit cell, out to a distance r 1386 Returns a list of list of neighbors for each site in structure. 1387 Use this method if you are planning on looping over all sites in the 1388 crystal. If you only want neighbors for a particular site, use the 1389 method get_neighbors as it may not have to build such a large supercell 1390 However if you are looping over all sites in the crystal, this method 1391 is more efficient since it only performs one pass over a large enough 1392 supercell to contain all possible atoms out to a distance r. 1393 The return type is a [(site, dist) ...] since most of the time, 1394 subsequent processing requires the distance. 1395 1396 A note about periodic images: Before computing the neighbors, this 1397 operation translates all atoms to within the unit cell (having 1398 fractional coordinates within [0,1)). This means that the "image" of a 1399 site does not correspond to how much it has been translates from its 1400 current position, but which image of the unit cell it resides. 1401 1402 Args: 1403 r (float): Radius of sphere. 1404 include_index (bool): Deprecated. Now, the non-supercell site index 1405 is always included in the returned data. 1406 include_image (bool): Deprecated. Now the supercell image 1407 is always included in the returned data. 1408 sites (list of Sites or None): sites for getting all neighbors, 1409 default is None, which means neighbors will be obtained for all 1410 sites. This is useful in the situation where you are interested 1411 only in one subspecies type, and makes it a lot faster. 1412 numerical_tol (float): This is a numerical tolerance for distances. 1413 Sites which are < numerical_tol are determined to be conincident 1414 with the site. Sites which are r + numerical_tol away is deemed 1415 to be within r from the site. The default of 1e-8 should be 1416 ok in most instances. 1417 1418 Returns: 1419 [[:class:`pymatgen.core.structure.PeriodicNeighbor`], ..] 1420 """ 1421 if sites is None: 1422 sites = self.sites 1423 center_indices, points_indices, images, distances = self.get_neighbor_list( 1424 r=r, sites=sites, numerical_tol=numerical_tol 1425 ) 1426 if len(points_indices) < 1: 1427 return [[]] * len(sites) 1428 f_coords = self.frac_coords[points_indices] + images 1429 neighbor_dict: Dict[int, List] = collections.defaultdict(list) 1430 lattice = self.lattice 1431 atol = Site.position_atol 1432 all_sites = self.sites 1433 for cindex, pindex, image, f_coord, d in zip(center_indices, points_indices, images, f_coords, distances): 1434 psite = all_sites[pindex] 1435 csite = sites[cindex] 1436 if ( 1437 d > numerical_tol 1438 or 1439 # This simply compares the psite and csite. The reason why manual comparison is done is 1440 # for speed. This does not check the lattice since they are always equal. Also, the or construct 1441 # returns True immediately once one of the conditions are satisfied. 1442 psite.species != csite.species 1443 or (not np.allclose(psite.coords, csite.coords, atol=atol)) 1444 or (not psite.properties == csite.properties) 1445 ): 1446 neighbor_dict[cindex].append( 1447 PeriodicNeighbor( 1448 species=psite.species, 1449 coords=f_coord, 1450 lattice=lattice, 1451 properties=psite.properties, 1452 nn_distance=d, 1453 index=pindex, 1454 image=tuple(image), 1455 ) 1456 ) 1457 1458 neighbors: List[List[PeriodicNeighbor]] = [] 1459 1460 for i in range(len(sites)): 1461 neighbors.append(neighbor_dict[i]) 1462 return neighbors 1463 1464 def get_all_neighbors_py( 1465 self, 1466 r: float, 1467 include_index: bool = False, 1468 include_image: bool = False, 1469 sites: Sequence[PeriodicSite] = None, 1470 numerical_tol: float = 1e-8, 1471 ) -> List[List[PeriodicNeighbor]]: 1472 """ 1473 Get neighbors for each atom in the unit cell, out to a distance r 1474 Returns a list of list of neighbors for each site in structure. 1475 Use this method if you are planning on looping over all sites in the 1476 crystal. If you only want neighbors for a particular site, use the 1477 method get_neighbors as it may not have to build such a large supercell 1478 However if you are looping over all sites in the crystal, this method 1479 is more efficient since it only performs one pass over a large enough 1480 supercell to contain all possible atoms out to a distance r. 1481 The return type is a [(site, dist) ...] since most of the time, 1482 subsequent processing requires the distance. 1483 1484 A note about periodic images: Before computing the neighbors, this 1485 operation translates all atoms to within the unit cell (having 1486 fractional coordinates within [0,1)). This means that the "image" of a 1487 site does not correspond to how much it has been translates from its 1488 current position, but which image of the unit cell it resides. 1489 1490 Args: 1491 r (float): Radius of sphere. 1492 include_index (bool): Deprecated. Now, the non-supercell site index 1493 is always included in the returned data. 1494 include_image (bool): Deprecated. Now the supercell image 1495 is always included in the returned data. 1496 sites (list of Sites or None): sites for getting all neighbors, 1497 default is None, which means neighbors will be obtained for all 1498 sites. This is useful in the situation where you are interested 1499 only in one subspecies type, and makes it a lot faster. 1500 numerical_tol (float): This is a numerical tolerance for distances. 1501 Sites which are < numerical_tol are determined to be conincident 1502 with the site. Sites which are r + numerical_tol away is deemed 1503 to be within r from the site. The default of 1e-8 should be 1504 ok in most instances. 1505 1506 Returns: 1507 [[:class:`pymatgen.core.structure.PeriodicNeighbor`],...] 1508 """ 1509 1510 if sites is None: 1511 sites = self.sites 1512 site_coords = np.array([site.coords for site in sites]) 1513 point_neighbors = get_points_in_spheres( 1514 self.cart_coords, 1515 site_coords, 1516 r=r, 1517 pbc=True, 1518 numerical_tol=numerical_tol, 1519 lattice=self.lattice, 1520 ) 1521 neighbors: List[List[PeriodicNeighbor]] = [] 1522 for point_neighbor, site in zip(point_neighbors, sites): 1523 nns: List[PeriodicNeighbor] = [] 1524 if len(point_neighbor) < 1: 1525 neighbors.append([]) 1526 continue 1527 for n in point_neighbor: 1528 coord, d, index, image = n 1529 if (d > numerical_tol) or (self[index] != site): 1530 neighbor = PeriodicNeighbor( 1531 species=self[index].species, 1532 coords=coord, 1533 lattice=self.lattice, 1534 properties=self[index].properties, 1535 nn_distance=d, 1536 index=index, 1537 image=tuple(image), 1538 ) 1539 nns.append(neighbor) 1540 neighbors.append(nns) 1541 return neighbors 1542 1543 @deprecated(get_all_neighbors, "This is retained purely for checking purposes.") 1544 def get_all_neighbors_old(self, r, include_index=False, include_image=False, include_site=True): 1545 """ 1546 Get neighbors for each atom in the unit cell, out to a distance r 1547 Returns a list of list of neighbors for each site in structure. 1548 Use this method if you are planning on looping over all sites in the 1549 crystal. If you only want neighbors for a particular site, use the 1550 method get_neighbors as it may not have to build such a large supercell 1551 However if you are looping over all sites in the crystal, this method 1552 is more efficient since it only performs one pass over a large enough 1553 supercell to contain all possible atoms out to a distance r. 1554 The return type is a [(site, dist) ...] since most of the time, 1555 subsequent processing requires the distance. 1556 1557 A note about periodic images: Before computing the neighbors, this 1558 operation translates all atoms to within the unit cell (having 1559 fractional coordinates within [0,1)). This means that the "image" of a 1560 site does not correspond to how much it has been translates from its 1561 current position, but which image of the unit cell it resides. 1562 1563 Args: 1564 r (float): Radius of sphere. 1565 include_index (bool): Whether to include the non-supercell site 1566 in the returned data 1567 include_image (bool): Whether to include the supercell image 1568 in the returned data 1569 include_site (bool): Whether to include the site in the returned 1570 data. Defaults to True. 1571 1572 Returns: 1573 [:class:`pymatgen.core.structure.PeriodicNeighbor`] 1574 """ 1575 # Use same algorithm as get_sites_in_sphere to determine supercell but 1576 # loop over all atoms in crystal 1577 recp_len = np.array(self.lattice.reciprocal_lattice.abc) 1578 maxr = np.ceil((r + 0.15) * recp_len / (2 * math.pi)) 1579 nmin = np.floor(np.min(self.frac_coords, axis=0)) - maxr 1580 nmax = np.ceil(np.max(self.frac_coords, axis=0)) + maxr 1581 1582 all_ranges = [np.arange(x, y) for x, y in zip(nmin, nmax)] 1583 latt = self._lattice 1584 matrix = latt.matrix 1585 neighbors = [[] for _ in range(len(self._sites))] 1586 all_fcoords = np.mod(self.frac_coords, 1) 1587 coords_in_cell = np.dot(all_fcoords, matrix) 1588 site_coords = self.cart_coords 1589 1590 indices = np.arange(len(self)) 1591 1592 for image in itertools.product(*all_ranges): 1593 coords = np.dot(image, matrix) + coords_in_cell 1594 all_dists = all_distances(coords, site_coords) 1595 all_within_r = np.bitwise_and(all_dists <= r, all_dists > 1e-8) 1596 1597 for (j, d, within_r) in zip(indices, all_dists, all_within_r): 1598 if include_site: 1599 nnsite = PeriodicSite( 1600 self[j].species, 1601 coords[j], 1602 latt, 1603 properties=self[j].properties, 1604 coords_are_cartesian=True, 1605 skip_checks=True, 1606 ) 1607 1608 for i in indices[within_r]: 1609 item = [] 1610 if include_site: 1611 item.append(nnsite) 1612 item.append(d[i]) 1613 if include_index: 1614 item.append(j) 1615 # Add the image, if requested 1616 if include_image: 1617 item.append(image) 1618 neighbors[i].append(item) 1619 return neighbors 1620 1621 def get_neighbors_in_shell( 1622 self, origin: ArrayLike, r: float, dr: float, include_index: bool = False, include_image: bool = False 1623 ) -> List[PeriodicNeighbor]: 1624 """ 1625 Returns all sites in a shell centered on origin (coords) between radii 1626 r-dr and r+dr. 1627 1628 Args: 1629 origin (3x1 array): Cartesian coordinates of center of sphere. 1630 r (float): Inner radius of shell. 1631 dr (float): Width of shell. 1632 include_index (bool): Deprecated. Now, the non-supercell site index 1633 is always included in the returned data. 1634 include_image (bool): Deprecated. Now the supercell image 1635 is always included in the returned data. 1636 1637 Returns: 1638 [NearestNeighbor] where Nearest Neighbor is a named tuple containing 1639 (site, distance, index, image). 1640 """ 1641 outer = self.get_sites_in_sphere(origin, r + dr, include_index=include_index, include_image=include_image) 1642 inner = r - dr 1643 return [t for t in outer if t.nn_distance > inner] 1644 1645 def get_sorted_structure( 1646 self, key: Optional[Callable] = None, reverse: bool = False 1647 ) -> Union["IStructure", "Structure"]: 1648 """ 1649 Get a sorted copy of the structure. The parameters have the same 1650 meaning as in list.sort. By default, sites are sorted by the 1651 electronegativity of the species. 1652 1653 Args: 1654 key: Specifies a function of one argument that is used to extract 1655 a comparison key from each list element: key=str.lower. The 1656 default value is None (compare the elements directly). 1657 reverse (bool): If set to True, then the list elements are sorted 1658 as if each comparison were reversed. 1659 """ 1660 sites = sorted(self, key=key, reverse=reverse) 1661 return self.__class__.from_sites(sites, charge=self._charge) 1662 1663 def get_reduced_structure( 1664 self, reduction_algo: Literal["niggli", "LLL"] = "niggli" 1665 ) -> Union["IStructure", "Structure"]: 1666 """ 1667 Get a reduced structure. 1668 1669 Args: 1670 reduction_algo ("niggli" | "LLL"): The lattice reduction algorithm to use. 1671 Defaults to "niggli". 1672 """ 1673 if reduction_algo == "niggli": 1674 reduced_latt = self._lattice.get_niggli_reduced_lattice() 1675 elif reduction_algo == "LLL": 1676 reduced_latt = self._lattice.get_lll_reduced_lattice() 1677 else: 1678 raise ValueError("Invalid reduction algo : {}".format(reduction_algo)) 1679 1680 if reduced_latt != self.lattice: 1681 return self.__class__( # type: ignore 1682 reduced_latt, 1683 self.species_and_occu, 1684 self.cart_coords, 1685 coords_are_cartesian=True, 1686 to_unit_cell=True, 1687 site_properties=self.site_properties, 1688 charge=self._charge, 1689 ) 1690 return self.copy() 1691 1692 def copy(self, site_properties=None, sanitize=False): 1693 """ 1694 Convenience method to get a copy of the structure, with options to add 1695 site properties. 1696 1697 Args: 1698 site_properties (dict): Properties to add or override. The 1699 properties are specified in the same way as the constructor, 1700 i.e., as a dict of the form {property: [values]}. The 1701 properties should be in the order of the *original* structure 1702 if you are performing sanitization. 1703 sanitize (bool): If True, this method will return a sanitized 1704 structure. Sanitization performs a few things: (i) The sites are 1705 sorted by electronegativity, (ii) a LLL lattice reduction is 1706 carried out to obtain a relatively orthogonalized cell, 1707 (iii) all fractional coords for sites are mapped into the 1708 unit cell. 1709 1710 Returns: 1711 A copy of the Structure, with optionally new site_properties and 1712 optionally sanitized. 1713 """ 1714 props = self.site_properties 1715 if site_properties: 1716 props.update(site_properties) 1717 if not sanitize: 1718 return self.__class__( 1719 self._lattice, 1720 self.species_and_occu, 1721 self.frac_coords, 1722 charge=self._charge, 1723 site_properties=props, 1724 ) 1725 reduced_latt = self._lattice.get_lll_reduced_lattice() 1726 new_sites = [] 1727 for i, site in enumerate(self): 1728 frac_coords = reduced_latt.get_fractional_coords(site.coords) 1729 site_props = {} 1730 for p, v in props.items(): 1731 site_props[p] = v[i] 1732 new_sites.append( 1733 PeriodicSite( 1734 site.species, 1735 frac_coords, 1736 reduced_latt, 1737 to_unit_cell=True, 1738 properties=site_props, 1739 skip_checks=True, 1740 ) 1741 ) 1742 new_sites = sorted(new_sites) 1743 return self.__class__.from_sites(new_sites, charge=self._charge) 1744 1745 def interpolate( 1746 self, 1747 end_structure: Union["IStructure", "Structure"], 1748 nimages: Union[int, Iterable] = 10, 1749 interpolate_lattices: bool = False, 1750 pbc: bool = True, 1751 autosort_tol: float = 0, 1752 ) -> List[Union["IStructure", "Structure"]]: 1753 """ 1754 Interpolate between this structure and end_structure. Useful for 1755 construction of NEB inputs. 1756 1757 Args: 1758 end_structure (Structure): structure to interpolate between this 1759 structure and end. 1760 nimages (int,list): No. of interpolation images or a list of 1761 interpolation images. Defaults to 10 images. 1762 interpolate_lattices (bool): Whether to interpolate the lattices. 1763 Interpolates the lengths and angles (rather than the matrix) 1764 so orientation may be affected. 1765 pbc (bool): Whether to use periodic boundary conditions to find 1766 the shortest path between endpoints. 1767 autosort_tol (float): A distance tolerance in angstrom in 1768 which to automatically sort end_structure to match to the 1769 closest points in this particular structure. This is usually 1770 what you want in a NEB calculation. 0 implies no sorting. 1771 Otherwise, a 0.5 value usually works pretty well. 1772 1773 Returns: 1774 List of interpolated structures. The starting and ending 1775 structures included as the first and last structures respectively. 1776 A total of (nimages + 1) structures are returned. 1777 """ 1778 # Check length of structures 1779 if len(self) != len(end_structure): 1780 raise ValueError("Structures have different lengths!") 1781 1782 if not (interpolate_lattices or self.lattice == end_structure.lattice): 1783 raise ValueError("Structures with different lattices!") 1784 1785 if not isinstance(nimages, collections.abc.Iterable): 1786 images = np.arange(nimages + 1) / nimages 1787 else: 1788 images = nimages 1789 1790 # Check that both structures have the same species 1791 for i, site in enumerate(self): 1792 if site.species != end_structure[i].species: 1793 raise ValueError( 1794 "Different species!\nStructure 1:\n" + str(self) + "\nStructure 2\n" + str(end_structure) 1795 ) 1796 1797 start_coords = np.array(self.frac_coords) 1798 end_coords = np.array(end_structure.frac_coords) 1799 1800 if autosort_tol: 1801 dist_matrix = self.lattice.get_all_distances(start_coords, end_coords) 1802 site_mappings = collections.defaultdict(list) # type: Dict[int, List[int]] 1803 unmapped_start_ind = [] 1804 for i, row in enumerate(dist_matrix): 1805 ind = np.where(row < autosort_tol)[0] 1806 if len(ind) == 1: 1807 site_mappings[i].append(ind[0]) 1808 else: 1809 unmapped_start_ind.append(i) 1810 1811 if len(unmapped_start_ind) > 1: 1812 raise ValueError( 1813 "Unable to reliably match structures " 1814 "with auto_sort_tol = %f. unmapped indices " 1815 "= %s" % (autosort_tol, unmapped_start_ind) 1816 ) 1817 1818 sorted_end_coords = np.zeros_like(end_coords) 1819 matched = [] 1820 for i, j in site_mappings.items(): 1821 if len(j) > 1: 1822 raise ValueError( 1823 "Unable to reliably match structures " 1824 "with auto_sort_tol = %f. More than one " 1825 "site match!" % autosort_tol 1826 ) 1827 sorted_end_coords[i] = end_coords[j[0]] 1828 matched.append(j[0]) 1829 1830 if len(unmapped_start_ind) == 1: 1831 i = unmapped_start_ind[0] 1832 j = list(set(range(len(start_coords))).difference(matched))[0] # type: ignore 1833 sorted_end_coords[i] = end_coords[j] 1834 1835 end_coords = sorted_end_coords 1836 1837 vec = end_coords - start_coords 1838 if pbc: 1839 vec -= np.round(vec) 1840 sp = self.species_and_occu 1841 structs = [] 1842 1843 if interpolate_lattices: 1844 # interpolate lattice matrices using polar decomposition 1845 from scipy.linalg import polar 1846 1847 # u is unitary (rotation), p is stretch 1848 u, p = polar(np.dot(end_structure.lattice.matrix.T, np.linalg.inv(self.lattice.matrix.T))) 1849 lvec = p - np.identity(3) 1850 lstart = self.lattice.matrix.T 1851 1852 for x in images: 1853 if interpolate_lattices: 1854 l_a = np.dot(np.identity(3) + x * lvec, lstart).T 1855 lat = Lattice(l_a) 1856 else: 1857 lat = self.lattice 1858 fcoords = start_coords + x * vec 1859 structs.append(self.__class__(lat, sp, fcoords, site_properties=self.site_properties)) # type: ignore 1860 return structs 1861 1862 def get_miller_index_from_site_indexes(self, site_ids, round_dp=4, verbose=True): 1863 """ 1864 Get the Miller index of a plane from a set of sites indexes. 1865 1866 A minimum of 3 sites are required. If more than 3 sites are given 1867 the best plane that minimises the distance to all points will be 1868 calculated. 1869 1870 Args: 1871 site_ids (list of int): A list of site indexes to consider. A 1872 minimum of three site indexes are required. If more than three 1873 sites are provided, the best plane that minimises the distance 1874 to all sites will be calculated. 1875 round_dp (int, optional): The number of decimal places to round the 1876 miller index to. 1877 verbose (bool, optional): Whether to print warnings. 1878 1879 Returns: 1880 (tuple): The Miller index. 1881 """ 1882 return self.lattice.get_miller_index_from_coords( 1883 self.frac_coords[site_ids], 1884 coords_are_cartesian=False, 1885 round_dp=round_dp, 1886 verbose=verbose, 1887 ) 1888 1889 def get_primitive_structure( 1890 self, tolerance: float = 0.25, use_site_props: bool = False, constrain_latt: Union[List, Dict] = None 1891 ): 1892 """ 1893 This finds a smaller unit cell than the input. Sometimes it doesn"t 1894 find the smallest possible one, so this method is recursively called 1895 until it is unable to find a smaller cell. 1896 1897 NOTE: if the tolerance is greater than 1/2 the minimum inter-site 1898 distance in the primitive cell, the algorithm will reject this lattice. 1899 1900 Args: 1901 tolerance (float), Angstroms: Tolerance for each coordinate of a 1902 particular site. For example, [0.1, 0, 0.1] in cartesian 1903 coordinates will be considered to be on the same coordinates 1904 as [0, 0, 0] for a tolerance of 0.25. Defaults to 0.25. 1905 use_site_props (bool): Whether to account for site properties in 1906 differntiating sites. 1907 constrain_latt (list/dict): List of lattice parameters we want to 1908 preserve, e.g. ["alpha", "c"] or dict with the lattice 1909 parameter names as keys and values we want the parameters to 1910 be e.g. {"alpha": 90, "c": 2.5}. 1911 1912 Returns: 1913 The most primitive structure found. 1914 """ 1915 if constrain_latt is None: 1916 constrain_latt = [] 1917 1918 def site_label(site): 1919 if not use_site_props: 1920 return site.species_string 1921 d = [site.species_string] 1922 for k in sorted(site.properties.keys()): 1923 d.append(k + "=" + str(site.properties[k])) 1924 return ", ".join(d) 1925 1926 # group sites by species string 1927 sites = sorted(self._sites, key=site_label) 1928 1929 grouped_sites = [list(a[1]) for a in itertools.groupby(sites, key=site_label)] 1930 grouped_fcoords = [np.array([s.frac_coords for s in g]) for g in grouped_sites] 1931 1932 # min_vecs are approximate periodicities of the cell. The exact 1933 # periodicities from the supercell matrices are checked against these 1934 # first 1935 min_fcoords = min(grouped_fcoords, key=lambda x: len(x)) 1936 min_vecs = min_fcoords - min_fcoords[0] 1937 1938 # fractional tolerance in the supercell 1939 super_ftol = np.divide(tolerance, self.lattice.abc) 1940 super_ftol_2 = super_ftol * 2 1941 1942 def pbc_coord_intersection(fc1, fc2, tol): 1943 """ 1944 Returns the fractional coords in fc1 that have coordinates 1945 within tolerance to some coordinate in fc2 1946 """ 1947 d = fc1[:, None, :] - fc2[None, :, :] 1948 d -= np.round(d) 1949 np.abs(d, d) 1950 return fc1[np.any(np.all(d < tol, axis=-1), axis=-1)] 1951 1952 # here we reduce the number of min_vecs by enforcing that every 1953 # vector in min_vecs approximately maps each site onto a similar site. 1954 # The subsequent processing is O(fu^3 * min_vecs) = O(n^4) if we do no 1955 # reduction. 1956 # This reduction is O(n^3) so usually is an improvement. Using double 1957 # the tolerance because both vectors are approximate 1958 for g in sorted(grouped_fcoords, key=lambda x: len(x)): 1959 for f in g: 1960 min_vecs = pbc_coord_intersection(min_vecs, g - f, super_ftol_2) 1961 1962 def get_hnf(fu): 1963 """ 1964 Returns all possible distinct supercell matrices given a 1965 number of formula units in the supercell. Batches the matrices 1966 by the values in the diagonal (for less numpy overhead). 1967 Computational complexity is O(n^3), and difficult to improve. 1968 Might be able to do something smart with checking combinations of a 1969 and b first, though unlikely to reduce to O(n^2). 1970 """ 1971 1972 def factors(n): 1973 for i in range(1, n + 1): 1974 if n % i == 0: 1975 yield i 1976 1977 for det in factors(fu): 1978 if det == 1: 1979 continue 1980 for a in factors(det): 1981 for e in factors(det // a): 1982 g = det // a // e 1983 yield det, np.array( 1984 [ 1985 [[a, b, c], [0, e, f], [0, 0, g]] 1986 for b, c, f in itertools.product(range(a), range(a), range(e)) 1987 ] 1988 ) 1989 1990 # we cant let sites match to their neighbors in the supercell 1991 grouped_non_nbrs = [] 1992 for gfcoords in grouped_fcoords: 1993 fdist = gfcoords[None, :, :] - gfcoords[:, None, :] 1994 fdist -= np.round(fdist) 1995 np.abs(fdist, fdist) 1996 non_nbrs = np.any(fdist > 2 * super_ftol[None, None, :], axis=-1) 1997 # since we want sites to match to themselves 1998 np.fill_diagonal(non_nbrs, True) 1999 grouped_non_nbrs.append(non_nbrs) 2000 2001 num_fu = functools.reduce(math.gcd, map(len, grouped_sites)) 2002 for size, ms in get_hnf(num_fu): 2003 inv_ms = np.linalg.inv(ms) 2004 2005 # find sets of lattice vectors that are are present in min_vecs 2006 dist = inv_ms[:, :, None, :] - min_vecs[None, None, :, :] 2007 dist -= np.round(dist) 2008 np.abs(dist, dist) 2009 is_close = np.all(dist < super_ftol, axis=-1) 2010 any_close = np.any(is_close, axis=-1) 2011 inds = np.all(any_close, axis=-1) 2012 2013 for inv_m, m in zip(inv_ms[inds], ms[inds]): 2014 new_m = np.dot(inv_m, self.lattice.matrix) 2015 ftol = np.divide(tolerance, np.sqrt(np.sum(new_m ** 2, axis=1))) 2016 2017 valid = True 2018 new_coords = [] 2019 new_sp = [] 2020 new_props = collections.defaultdict(list) 2021 for gsites, gfcoords, non_nbrs in zip(grouped_sites, grouped_fcoords, grouped_non_nbrs): 2022 all_frac = np.dot(gfcoords, m) 2023 2024 # calculate grouping of equivalent sites, represented by 2025 # adjacency matrix 2026 fdist = all_frac[None, :, :] - all_frac[:, None, :] 2027 fdist = np.abs(fdist - np.round(fdist)) 2028 close_in_prim = np.all(fdist < ftol[None, None, :], axis=-1) 2029 groups = np.logical_and(close_in_prim, non_nbrs) 2030 2031 # check that groups are correct 2032 if not np.all(np.sum(groups, axis=0) == size): 2033 valid = False 2034 break 2035 2036 # check that groups are all cliques 2037 for g in groups: 2038 if not np.all(groups[g][:, g]): 2039 valid = False 2040 break 2041 if not valid: 2042 break 2043 2044 # add the new sites, averaging positions 2045 added = np.zeros(len(gsites)) 2046 new_fcoords = all_frac % 1 2047 for i, group in enumerate(groups): 2048 if not added[i]: 2049 added[group] = True 2050 inds = np.where(group)[0] 2051 coords = new_fcoords[inds[0]] 2052 for n, j in enumerate(inds[1:]): 2053 offset = new_fcoords[j] - coords 2054 coords += (offset - np.round(offset)) / (n + 2) 2055 new_sp.append(gsites[inds[0]].species) 2056 for k in gsites[inds[0]].properties: 2057 new_props[k].append(gsites[inds[0]].properties[k]) 2058 new_coords.append(coords) 2059 2060 if valid: 2061 inv_m = np.linalg.inv(m) 2062 new_l = Lattice(np.dot(inv_m, self.lattice.matrix)) 2063 s = Structure( 2064 new_l, 2065 new_sp, 2066 new_coords, 2067 site_properties=new_props, 2068 coords_are_cartesian=False, 2069 ) 2070 2071 # Default behavior 2072 p = s.get_primitive_structure( 2073 tolerance=tolerance, 2074 use_site_props=use_site_props, 2075 constrain_latt=constrain_latt, 2076 ).get_reduced_structure() 2077 if not constrain_latt: 2078 return p 2079 2080 # Only return primitive structures that 2081 # satisfy the restriction condition 2082 p_latt, s_latt = p.lattice, self.lattice 2083 if type(constrain_latt).__name__ == "list": 2084 if all(getattr(p_latt, p) == getattr(s_latt, p) for p in constrain_latt): 2085 return p 2086 elif type(constrain_latt).__name__ == "dict": 2087 if all(getattr(p_latt, p) == constrain_latt[p] for p in constrain_latt.keys()): # type: ignore 2088 return p 2089 2090 return self.copy() 2091 2092 def __repr__(self): 2093 outs = ["Structure Summary", repr(self.lattice)] 2094 if self._charge: 2095 if self._charge >= 0: 2096 outs.append("Overall Charge: +{}".format(self._charge)) 2097 else: 2098 outs.append("Overall Charge: -{}".format(self._charge)) 2099 for s in self: 2100 outs.append(repr(s)) 2101 return "\n".join(outs) 2102 2103 def __str__(self): 2104 outs = [ 2105 "Full Formula ({s})".format(s=self.composition.formula), 2106 "Reduced Formula: {}".format(self.composition.reduced_formula), 2107 ] 2108 2109 def to_s(x): 2110 return "%0.6f" % x 2111 2112 outs.append("abc : " + " ".join([to_s(i).rjust(10) for i in self.lattice.abc])) 2113 outs.append("angles: " + " ".join([to_s(i).rjust(10) for i in self.lattice.angles])) 2114 if self._charge: 2115 if self._charge >= 0: 2116 outs.append("Overall Charge: +{}".format(self._charge)) 2117 else: 2118 outs.append("Overall Charge: -{}".format(self._charge)) 2119 outs.append("Sites ({i})".format(i=len(self))) 2120 data = [] 2121 props = self.site_properties 2122 keys = sorted(props.keys()) 2123 for i, site in enumerate(self): 2124 row = [str(i), site.species_string] 2125 row.extend([to_s(j) for j in site.frac_coords]) 2126 for k in keys: 2127 row.append(props[k][i]) 2128 data.append(row) 2129 outs.append( 2130 tabulate( 2131 data, 2132 headers=["#", "SP", "a", "b", "c"] + keys, 2133 ) 2134 ) 2135 return "\n".join(outs) 2136 2137 def get_orderings(self, mode: Literal["enum", "sqs"] = "enum", **kwargs) -> List["Structure"]: 2138 r""" 2139 Returns list of orderings for a disordered structure. If structure 2140 does not contain disorder, the default structure is returned. 2141 2142 Args: 2143 mode ("enum" | "sqs"): Either "enum" or "sqs". If enum, 2144 the enumlib will be used to return all distinct 2145 orderings. If sqs, mcsqs will be used to return 2146 an sqs structure. 2147 kwargs: kwargs passed to either 2148 pymatgen.command_line..enumlib_caller.EnumlibAdaptor 2149 or pymatgen.command_line.mcsqs_caller.run_mcsqs. 2150 For run_mcsqs, a default cluster search of 2 cluster interactions 2151 with 1NN distance and 3 cluster interactions with 2NN distance 2152 is set. 2153 2154 Returns: 2155 List[Structure] 2156 """ 2157 if self.is_ordered: 2158 return [self] 2159 if mode.startswith("enum"): 2160 from pymatgen.command_line.enumlib_caller import EnumlibAdaptor 2161 2162 adaptor = EnumlibAdaptor(self, **kwargs) 2163 adaptor.run() 2164 return adaptor.structures 2165 if mode == "sqs": 2166 from pymatgen.command_line.mcsqs_caller import run_mcsqs 2167 2168 if "clusters" not in kwargs: 2169 disordered_sites = [site for site in self if not site.is_ordered] 2170 subset_structure = Structure.from_sites(disordered_sites) 2171 dist_matrix = subset_structure.distance_matrix 2172 dists = sorted(set(dist_matrix.ravel())) 2173 unique_dists = [] 2174 for i in range(1, len(dists)): 2175 if dists[i] - dists[i - 1] > 0.1: 2176 unique_dists.append(dists[i]) 2177 clusters = {(i + 2): d + 0.01 for i, d in enumerate(unique_dists) if i < 2} 2178 kwargs["clusters"] = clusters 2179 return [run_mcsqs(self, **kwargs).bestsqs] 2180 raise ValueError() 2181 2182 def as_dict(self, verbosity=1, fmt=None, **kwargs): 2183 """ 2184 Dict representation of Structure. 2185 2186 Args: 2187 verbosity (int): Verbosity level. Default of 1 includes both 2188 direct and cartesian coordinates for all sites, lattice 2189 parameters, etc. Useful for reading and for insertion into a 2190 database. Set to 0 for an extremely lightweight version 2191 that only includes sufficient information to reconstruct the 2192 object. 2193 fmt (str): Specifies a format for the dict. Defaults to None, 2194 which is the default format used in pymatgen. Other options 2195 include "abivars". 2196 **kwargs: Allow passing of other kwargs needed for certain 2197 formats, e.g., "abivars". 2198 2199 Returns: 2200 JSON serializable dict representation. 2201 """ 2202 if fmt == "abivars": 2203 """Returns a dictionary with the ABINIT variables.""" 2204 from pymatgen.io.abinit.abiobjects import structure_to_abivars 2205 2206 return structure_to_abivars(self, **kwargs) 2207 2208 latt_dict = self._lattice.as_dict(verbosity=verbosity) 2209 del latt_dict["@module"] 2210 del latt_dict["@class"] 2211 2212 d = { 2213 "@module": self.__class__.__module__, 2214 "@class": self.__class__.__name__, 2215 "charge": self._charge, 2216 "lattice": latt_dict, 2217 "sites": [], 2218 } 2219 for site in self: 2220 site_dict = site.as_dict(verbosity=verbosity) 2221 del site_dict["lattice"] 2222 del site_dict["@module"] 2223 del site_dict["@class"] 2224 d["sites"].append(site_dict) 2225 return d 2226 2227 def as_dataframe(self): 2228 """ 2229 Returns a Pandas dataframe of the sites. Structure level attributes are stored in DataFrame.attrs. Example: 2230 2231 Species a b c x y z magmom 2232 0 (Si) 0.0 0.0 0.000000e+00 0.0 0.000000e+00 0.000000e+00 5 2233 1 (Si) 0.0 0.0 1.000000e-07 0.0 -2.217138e-07 3.135509e-07 -5 2234 """ 2235 data = [] 2236 site_properties = self.site_properties 2237 prop_keys = list(site_properties.keys()) 2238 for site in self: 2239 row = [site.species] + list(site.frac_coords) + list(site.coords) 2240 for k in prop_keys: 2241 row.append(site.properties.get(k)) 2242 data.append(row) 2243 import pandas as pd 2244 2245 df = pd.DataFrame(data, columns=["Species", "a", "b", "c", "x", "y", "z"] + prop_keys) 2246 df.attrs["Reduced Formula"] = self.composition.reduced_formula 2247 df.attrs["Lattice"] = self.lattice 2248 return df 2249 2250 @classmethod 2251 def from_dict(cls, d, fmt=None): 2252 """ 2253 Reconstitute a Structure object from a dict representation of Structure 2254 created using as_dict(). 2255 2256 Args: 2257 d (dict): Dict representation of structure. 2258 2259 Returns: 2260 Structure object 2261 """ 2262 if fmt == "abivars": 2263 from pymatgen.io.abinit.abiobjects import structure_from_abivars 2264 2265 return structure_from_abivars(cls=cls, **d) 2266 2267 lattice = Lattice.from_dict(d["lattice"]) 2268 sites = [PeriodicSite.from_dict(sd, lattice) for sd in d["sites"]] 2269 charge = d.get("charge", None) 2270 return cls.from_sites(sites, charge=charge) 2271 2272 def to(self, fmt: str = None, filename=None, **kwargs) -> Optional[str]: 2273 r""" 2274 Outputs the structure to a file or string. 2275 2276 Args: 2277 fmt (str): Format to output to. Defaults to JSON unless filename 2278 is provided. If fmt is specifies, it overrides whatever the 2279 filename is. Options include "cif", "poscar", "cssr", "json". 2280 Non-case sensitive. 2281 filename (str): If provided, output will be written to a file. If 2282 fmt is not specified, the format is determined from the 2283 filename. Defaults is None, i.e. string output. 2284 **kwargs: Kwargs passthru to relevant methods. E.g., This allows 2285 the passing of parameters like symprec to the 2286 CifWriter.__init__ method for generation of symmetric cifs. 2287 2288 Returns: 2289 (str) if filename is None. None otherwise. 2290 """ 2291 filename = filename or "" 2292 fmt = "" if fmt is None else fmt.lower() 2293 fname = os.path.basename(filename) 2294 2295 if fmt == "cif" or fnmatch(fname.lower(), "*.cif*"): 2296 from pymatgen.io.cif import CifWriter 2297 2298 writer = CifWriter(self, **kwargs) 2299 elif fmt == "mcif" or fnmatch(fname.lower(), "*.mcif*"): 2300 from pymatgen.io.cif import CifWriter 2301 2302 writer = CifWriter(self, write_magmoms=True, **kwargs) 2303 elif fmt == "poscar" or fnmatch(fname, "*POSCAR*"): 2304 from pymatgen.io.vasp import Poscar 2305 2306 writer = Poscar(self, **kwargs) 2307 elif fmt == "cssr" or fnmatch(fname.lower(), "*.cssr*"): 2308 from pymatgen.io.cssr import Cssr 2309 2310 writer = Cssr(self) # type: ignore 2311 elif fmt == "json" or fnmatch(fname.lower(), "*.json"): 2312 s = json.dumps(self.as_dict()) 2313 if filename: 2314 with zopen(filename, "wt") as f: 2315 f.write("%s" % s) 2316 return s 2317 elif fmt == "xsf" or fnmatch(fname.lower(), "*.xsf*"): 2318 from pymatgen.io.xcrysden import XSF 2319 2320 s = XSF(self).to_string() 2321 if filename: 2322 with zopen(fname, "wt", encoding="utf8") as f: 2323 f.write(s) 2324 return s 2325 elif ( 2326 fmt == "mcsqs" or fnmatch(fname, "*rndstr.in*") or fnmatch(fname, "*lat.in*") or fnmatch(fname, "*bestsqs*") 2327 ): 2328 from pymatgen.io.atat import Mcsqs 2329 2330 s = Mcsqs(self).to_string() 2331 if filename: 2332 with zopen(fname, "wt", encoding="ascii") as f: 2333 f.write(s) 2334 return s 2335 elif fmt == "prismatic" or fnmatch(fname, "*prismatic*"): 2336 from pymatgen.io.prismatic import Prismatic 2337 2338 s = Prismatic(self).to_string() 2339 return s 2340 elif fmt == "yaml" or fnmatch(fname, "*.yaml*") or fnmatch(fname, "*.yml*"): 2341 if filename: 2342 with zopen(filename, "wt") as f: 2343 yaml.safe_dump(self.as_dict(), f) 2344 return None 2345 return yaml.safe_dump(self.as_dict()) 2346 elif fmt == "fleur-inpgen" or fnmatch(fname, "*.in*"): 2347 from pymatgen.io.fleur import FleurInput 2348 2349 writer = FleurInput(self, **kwargs) 2350 else: 2351 raise ValueError("Invalid format: `%s`" % str(fmt)) 2352 2353 if filename: 2354 writer.write_file(filename) 2355 return None 2356 return writer.__str__() 2357 2358 @classmethod 2359 def from_str( 2360 cls, 2361 input_string: str, 2362 fmt: Literal["cif", "poscar", "cssr", "json", "yaml", "xsf", "mcsqs"], 2363 primitive=False, 2364 sort=False, 2365 merge_tol=0.0, 2366 ): 2367 """ 2368 Reads a structure from a string. 2369 2370 Args: 2371 input_string (str): String to parse. 2372 fmt (str): A file format specification. One of "cif", "poscar", "cssr", 2373 "json", "yaml", "xsf", "mcsqs". 2374 primitive (bool): Whether to find a primitive cell. Defaults to 2375 False. 2376 sort (bool): Whether to sort the sites in accordance to the default 2377 ordering criteria, i.e., electronegativity. 2378 merge_tol (float): If this is some positive number, sites that 2379 are within merge_tol from each other will be merged. Usually 2380 0.01 should be enough to deal with common numerical issues. 2381 2382 Returns: 2383 IStructure / Structure 2384 """ 2385 2386 fmt_low = fmt.lower() 2387 if fmt_low == "cif": 2388 from pymatgen.io.cif import CifParser 2389 2390 parser = CifParser.from_string(input_string) 2391 s = parser.get_structures(primitive=primitive)[0] 2392 elif fmt_low == "poscar": 2393 from pymatgen.io.vasp import Poscar 2394 2395 s = Poscar.from_string(input_string, False, read_velocities=False).structure 2396 elif fmt_low == "cssr": 2397 from pymatgen.io.cssr import Cssr 2398 2399 cssr = Cssr.from_string(input_string) 2400 s = cssr.structure 2401 elif fmt_low == "json": 2402 d = json.loads(input_string) 2403 s = Structure.from_dict(d) 2404 elif fmt_low == "yaml": 2405 d = yaml.safe_load(input_string) 2406 s = Structure.from_dict(d) 2407 elif fmt_low == "xsf": 2408 from pymatgen.io.xcrysden import XSF 2409 2410 s = XSF.from_string(input_string).structure 2411 elif fmt_low == "mcsqs": 2412 from pymatgen.io.atat import Mcsqs 2413 2414 s = Mcsqs.structure_from_string(input_string) 2415 elif fmt == "fleur-inpgen": 2416 from pymatgen.io.fleur import FleurInput 2417 2418 s = FleurInput.from_string(input_string, inpgen_input=True).structure 2419 elif fmt == "fleur": 2420 from pymatgen.io.fleur import FleurInput 2421 2422 s = FleurInput.from_string(input_string, inpgen_input=False).structure 2423 else: 2424 raise ValueError(f"Unrecognized format `{fmt}`!") 2425 2426 if sort: 2427 s = s.get_sorted_structure() 2428 if merge_tol: 2429 s.merge_sites(merge_tol) 2430 return cls.from_sites(s) 2431 2432 @classmethod 2433 def from_file(cls, filename, primitive=False, sort=False, merge_tol=0.0): 2434 """ 2435 Reads a structure from a file. For example, anything ending in 2436 a "cif" is assumed to be a Crystallographic Information Format file. 2437 Supported formats include CIF, POSCAR/CONTCAR, CHGCAR, LOCPOT, 2438 vasprun.xml, CSSR, Netcdf and pymatgen's JSON serialized structures. 2439 2440 Args: 2441 filename (str): The filename to read from. 2442 primitive (bool): Whether to convert to a primitive cell 2443 Only available for cifs. Defaults to False. 2444 sort (bool): Whether to sort sites. Default to False. 2445 merge_tol (float): If this is some positive number, sites that 2446 are within merge_tol from each other will be merged. Usually 2447 0.01 should be enough to deal with common numerical issues. 2448 2449 Returns: 2450 Structure. 2451 """ 2452 filename = str(filename) 2453 if filename.endswith(".nc"): 2454 # Read Structure from a netcdf file. 2455 from pymatgen.io.abinit.netcdf import structure_from_ncdata 2456 2457 s = structure_from_ncdata(filename, cls=cls) 2458 if sort: 2459 s = s.get_sorted_structure() 2460 return s 2461 2462 from pymatgen.io.exciting import ExcitingInput 2463 from pymatgen.io.lmto import LMTOCtrl 2464 from pymatgen.io.vasp import Chgcar, Vasprun 2465 2466 fname = os.path.basename(filename) 2467 with zopen(filename, "rt") as f: 2468 contents = f.read() 2469 if fnmatch(fname.lower(), "*.cif*") or fnmatch(fname.lower(), "*.mcif*"): 2470 return cls.from_str(contents, fmt="cif", primitive=primitive, sort=sort, merge_tol=merge_tol) 2471 if fnmatch(fname, "*POSCAR*") or fnmatch(fname, "*CONTCAR*") or fnmatch(fname, "*.vasp"): 2472 s = cls.from_str( 2473 contents, 2474 fmt="poscar", 2475 primitive=primitive, 2476 sort=sort, 2477 merge_tol=merge_tol, 2478 ) 2479 2480 elif fnmatch(fname, "CHGCAR*") or fnmatch(fname, "LOCPOT*"): 2481 s = Chgcar.from_file(filename).structure 2482 elif fnmatch(fname, "vasprun*.xml*"): 2483 s = Vasprun(filename).final_structure 2484 elif fnmatch(fname.lower(), "*.cssr*"): 2485 return cls.from_str( 2486 contents, 2487 fmt="cssr", 2488 primitive=primitive, 2489 sort=sort, 2490 merge_tol=merge_tol, 2491 ) 2492 elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): 2493 return cls.from_str( 2494 contents, 2495 fmt="json", 2496 primitive=primitive, 2497 sort=sort, 2498 merge_tol=merge_tol, 2499 ) 2500 elif fnmatch(fname, "*.yaml*"): 2501 return cls.from_str( 2502 contents, 2503 fmt="yaml", 2504 primitive=primitive, 2505 sort=sort, 2506 merge_tol=merge_tol, 2507 ) 2508 elif fnmatch(fname, "*.xsf"): 2509 return cls.from_str(contents, fmt="xsf", primitive=primitive, sort=sort, merge_tol=merge_tol) 2510 elif fnmatch(fname, "input*.xml"): 2511 return ExcitingInput.from_file(fname).structure 2512 elif fnmatch(fname, "*rndstr.in*") or fnmatch(fname, "*lat.in*") or fnmatch(fname, "*bestsqs*"): 2513 return cls.from_str( 2514 contents, 2515 fmt="mcsqs", 2516 primitive=primitive, 2517 sort=sort, 2518 merge_tol=merge_tol, 2519 ) 2520 elif fnmatch(fname, "CTRL*"): 2521 return LMTOCtrl.from_file(filename=filename).structure 2522 elif fnmatch(fname, "inp*.xml") or fnmatch(fname, "*.in*") or fnmatch(fname, "inp_*"): 2523 from pymatgen.io.fleur import FleurInput 2524 2525 s = FleurInput.from_file(filename).structure 2526 else: 2527 raise ValueError("Unrecognized file extension!") 2528 if sort: 2529 s = s.get_sorted_structure() 2530 if merge_tol: 2531 s.merge_sites(merge_tol) 2532 2533 s.__class__ = cls 2534 return s 2535 2536 2537class IMolecule(SiteCollection, MSONable): 2538 """ 2539 Basic immutable Molecule object without periodicity. Essentially a 2540 sequence of sites. IMolecule is made to be immutable so that they can 2541 function as keys in a dict. For a mutable molecule, 2542 use the :class:Molecule. 2543 2544 Molecule extends Sequence and Hashable, which means that in many cases, 2545 it can be used like any Python sequence. Iterating through a molecule is 2546 equivalent to going through the sites in sequence. 2547 """ 2548 2549 def __init__( 2550 self, 2551 species: Sequence[CompositionLike], 2552 coords: Sequence[ArrayLike], 2553 charge: float = 0.0, 2554 spin_multiplicity: float = None, 2555 validate_proximity: bool = False, 2556 site_properties: dict = None, 2557 ): 2558 """ 2559 Creates a Molecule. 2560 2561 Args: 2562 species: list of atomic species. Possible kinds of input include a 2563 list of dict of elements/species and occupancies, a List of 2564 elements/specie specified as actual Element/Species, Strings 2565 ("Fe", "Fe2+") or atomic numbers (1,56). 2566 coords (3x1 array): list of cartesian coordinates of each species. 2567 charge (float): Charge for the molecule. Defaults to 0. 2568 spin_multiplicity (int): Spin multiplicity for molecule. 2569 Defaults to None, which means that the spin multiplicity is 2570 set to 1 if the molecule has no unpaired electrons and to 2 2571 if there are unpaired electrons. 2572 validate_proximity (bool): Whether to check if there are sites 2573 that are less than 1 Ang apart. Defaults to False. 2574 site_properties (dict): Properties associated with the sites as 2575 a dict of sequences, e.g., {"magmom":[5,5,5,5]}. The 2576 sequences have to be the same length as the atomic species 2577 and fractional_coords. Defaults to None for no properties. 2578 """ 2579 if len(species) != len(coords): 2580 raise StructureError( 2581 ( 2582 "The list of atomic species must be of the", 2583 " same length as the list of fractional ", 2584 "coordinates.", 2585 ) 2586 ) 2587 2588 sites = [] 2589 for i, _ in enumerate(species): 2590 prop = None 2591 if site_properties: 2592 prop = {k: v[i] for k, v in site_properties.items()} 2593 sites.append(Site(species[i], coords[i], properties=prop)) 2594 2595 self._sites = tuple(sites) 2596 if validate_proximity and not self.is_valid(): 2597 raise StructureError(("Molecule contains sites that are ", "less than 0.01 Angstrom apart!")) 2598 2599 self._charge = charge 2600 nelectrons = 0.0 2601 for site in sites: 2602 for sp, amt in site.species.items(): 2603 if not isinstance(sp, DummySpecies): 2604 nelectrons += sp.Z * amt # type: ignore 2605 nelectrons -= charge 2606 self._nelectrons = nelectrons 2607 if spin_multiplicity: 2608 if (nelectrons + spin_multiplicity) % 2 != 1: 2609 raise ValueError( 2610 "Charge of %d and spin multiplicity of %d is" 2611 " not possible for this molecule" % (self._charge, spin_multiplicity) 2612 ) 2613 self._spin_multiplicity = spin_multiplicity 2614 else: 2615 self._spin_multiplicity = 1 if nelectrons % 2 == 0 else 2 2616 2617 @property 2618 def charge(self) -> float: 2619 """ 2620 Charge of molecule 2621 """ 2622 return self._charge 2623 2624 @property 2625 def spin_multiplicity(self) -> float: 2626 """ 2627 Spin multiplicity of molecule. 2628 """ 2629 return self._spin_multiplicity 2630 2631 @property 2632 def nelectrons(self) -> float: 2633 """ 2634 Number of electrons in the molecule. 2635 """ 2636 return self._nelectrons 2637 2638 @property 2639 def center_of_mass(self) -> float: 2640 """ 2641 Center of mass of molecule. 2642 """ 2643 center = np.zeros(3) 2644 total_weight = 0 2645 for site in self: 2646 wt = site.species.weight 2647 center += site.coords * wt 2648 total_weight += wt 2649 return center / total_weight # type: ignore 2650 2651 @property 2652 def sites(self) -> Tuple[Site, ...]: 2653 """ 2654 Returns a tuple of sites in the Molecule. 2655 """ 2656 return self._sites 2657 2658 @classmethod 2659 def from_sites( 2660 cls, sites: Sequence[Site], charge: float = 0, spin_multiplicity: float = None, validate_proximity: bool = False 2661 ) -> Union["IMolecule", "Molecule"]: 2662 """ 2663 Convenience constructor to make a Molecule from a list of sites. 2664 2665 Args: 2666 sites ([Site]): Sequence of Sites. 2667 charge (int): Charge of molecule. Defaults to 0. 2668 spin_multiplicity (int): Spin multicipity. Defaults to None, 2669 in which it is determined automatically. 2670 validate_proximity (bool): Whether to check that atoms are too 2671 close. 2672 """ 2673 props = collections.defaultdict(list) 2674 for site in sites: 2675 for k, v in site.properties.items(): 2676 props[k].append(v) 2677 return cls( 2678 [site.species for site in sites], 2679 [site.coords for site in sites], 2680 charge=charge, 2681 spin_multiplicity=spin_multiplicity, 2682 validate_proximity=validate_proximity, 2683 site_properties=props, 2684 ) 2685 2686 def break_bond(self, ind1: int, ind2: int, tol: float = 0.2) -> Tuple[Union["IMolecule", "Molecule"], ...]: 2687 """ 2688 Returns two molecules based on breaking the bond between atoms at index 2689 ind1 and ind2. 2690 2691 Args: 2692 ind1 (int): Index of first site. 2693 ind2 (int): Index of second site. 2694 tol (float): Relative tolerance to test. Basically, the code 2695 checks if the distance between the sites is less than (1 + 2696 tol) * typical bond distances. Defaults to 0.2, i.e., 2697 20% longer. 2698 2699 Returns: 2700 Two Molecule objects representing the two clusters formed from 2701 breaking the bond. 2702 """ 2703 clusters = [[self._sites[ind1]], [self._sites[ind2]]] 2704 2705 sites = [site for i, site in enumerate(self._sites) if i not in (ind1, ind2)] 2706 2707 def belongs_to_cluster(site, cluster): 2708 for test_site in cluster: 2709 if CovalentBond.is_bonded(site, test_site, tol=tol): 2710 return True 2711 return False 2712 2713 while len(sites) > 0: 2714 unmatched = [] 2715 for site in sites: 2716 for cluster in clusters: 2717 if belongs_to_cluster(site, cluster): 2718 cluster.append(site) 2719 break 2720 else: 2721 unmatched.append(site) 2722 2723 if len(unmatched) == len(sites): 2724 raise ValueError("Not all sites are matched!") 2725 sites = unmatched 2726 2727 return tuple(self.__class__.from_sites(cluster) for cluster in clusters) 2728 2729 def get_covalent_bonds(self, tol: float = 0.2) -> List[CovalentBond]: 2730 """ 2731 Determines the covalent bonds in a molecule. 2732 2733 Args: 2734 tol (float): The tol to determine bonds in a structure. See 2735 CovalentBond.is_bonded. 2736 2737 Returns: 2738 List of bonds 2739 """ 2740 bonds = [] 2741 for site1, site2 in itertools.combinations(self._sites, 2): 2742 if CovalentBond.is_bonded(site1, site2, tol): 2743 bonds.append(CovalentBond(site1, site2)) 2744 return bonds 2745 2746 def __eq__(self, other): 2747 if other is None: 2748 return False 2749 if len(self) != len(other): 2750 return False 2751 if self.charge != other.charge: 2752 return False 2753 if self.spin_multiplicity != other.spin_multiplicity: 2754 return False 2755 for site in self: 2756 if site not in other: 2757 return False 2758 return True 2759 2760 def __ne__(self, other): 2761 return not self.__eq__(other) 2762 2763 def __hash__(self): 2764 # For now, just use the composition hash code. 2765 return self.composition.__hash__() 2766 2767 def __repr__(self): 2768 outs = ["Molecule Summary"] 2769 for s in self: 2770 outs.append(s.__repr__()) 2771 return "\n".join(outs) 2772 2773 def __str__(self): 2774 outs = [ 2775 "Full Formula (%s)" % self.composition.formula, 2776 "Reduced Formula: " + self.composition.reduced_formula, 2777 "Charge = %s, Spin Mult = %s" % (self._charge, self._spin_multiplicity), 2778 "Sites (%d)" % len(self), 2779 ] 2780 for i, site in enumerate(self): 2781 outs.append( 2782 " ".join( 2783 [ 2784 str(i), 2785 site.species_string, 2786 " ".join([("%0.6f" % j).rjust(12) for j in site.coords]), 2787 ] 2788 ) 2789 ) 2790 return "\n".join(outs) 2791 2792 def as_dict(self): 2793 """ 2794 Json-serializable dict representation of Molecule 2795 """ 2796 d = { 2797 "@module": self.__class__.__module__, 2798 "@class": self.__class__.__name__, 2799 "charge": self._charge, 2800 "spin_multiplicity": self._spin_multiplicity, 2801 "sites": [], 2802 } 2803 for site in self: 2804 site_dict = site.as_dict() 2805 del site_dict["@module"] 2806 del site_dict["@class"] 2807 d["sites"].append(site_dict) 2808 return d 2809 2810 @classmethod 2811 def from_dict(cls, d) -> dict: 2812 """ 2813 Reconstitute a Molecule object from a dict representation created using 2814 as_dict(). 2815 2816 Args: 2817 d (dict): dict representation of Molecule. 2818 2819 Returns: 2820 Molecule object 2821 """ 2822 sites = [Site.from_dict(sd) for sd in d["sites"]] 2823 charge = d.get("charge", 0) 2824 spin_multiplicity = d.get("spin_multiplicity") 2825 return cls.from_sites(sites, charge=charge, spin_multiplicity=spin_multiplicity) 2826 2827 def get_distance(self, i: int, j: int) -> float: 2828 """ 2829 Get distance between site i and j. 2830 2831 Args: 2832 i (int): Index of first site 2833 j (int): Index of second site 2834 2835 Returns: 2836 Distance between the two sites. 2837 """ 2838 return self[i].distance(self[j]) 2839 2840 def get_sites_in_sphere(self, pt: ArrayLike, r: float) -> List[Neighbor]: 2841 """ 2842 Find all sites within a sphere from a point. 2843 2844 Args: 2845 pt (3x1 array): Cartesian coordinates of center of sphere 2846 r (float): Radius of sphere. 2847 2848 Returns: 2849 [:class:`pymatgen.core.structure.Neighbor`] 2850 """ 2851 neighbors = [] 2852 for i, site in enumerate(self._sites): 2853 dist = site.distance_from_point(pt) 2854 if dist <= r: 2855 neighbors.append(Neighbor(site.species, site.coords, site.properties, dist, i)) 2856 return neighbors 2857 2858 def get_neighbors(self, site: Site, r: float) -> List[Neighbor]: 2859 """ 2860 Get all neighbors to a site within a sphere of radius r. Excludes the 2861 site itself. 2862 2863 Args: 2864 site (Site): Site at the center of the sphere. 2865 r (float): Radius of sphere. 2866 2867 Returns: 2868 [:class:`pymatgen.core.structure.Neighbor`] 2869 """ 2870 nns = self.get_sites_in_sphere(site.coords, r) 2871 return [nn for nn in nns if nn != site] 2872 2873 def get_neighbors_in_shell(self, origin: ArrayLike, r: float, dr: float) -> List[Neighbor]: 2874 """ 2875 Returns all sites in a shell centered on origin (coords) between radii 2876 r-dr and r+dr. 2877 2878 Args: 2879 origin (3x1 array): Cartesian coordinates of center of sphere. 2880 r (float): Inner radius of shell. 2881 dr (float): Width of shell. 2882 2883 Returns: 2884 [:class:`pymatgen.core.structure.Neighbor`] 2885 """ 2886 outer = self.get_sites_in_sphere(origin, r + dr) 2887 inner = r - dr 2888 return [nn for nn in outer if nn.nn_distance > inner] 2889 2890 def get_boxed_structure( 2891 self, 2892 a: float, 2893 b: float, 2894 c: float, 2895 images: ArrayLike = (1, 1, 1), 2896 random_rotation: bool = False, 2897 min_dist: float = 1.0, 2898 cls=None, 2899 offset: ArrayLike = None, 2900 no_cross: bool = False, 2901 reorder: bool = True, 2902 ) -> Union["IStructure", "Structure"]: 2903 """ 2904 Creates a Structure from a Molecule by putting the Molecule in the 2905 center of a orthorhombic box. Useful for creating Structure for 2906 calculating molecules using periodic codes. 2907 2908 Args: 2909 a (float): a-lattice parameter. 2910 b (float): b-lattice parameter. 2911 c (float): c-lattice parameter. 2912 images: No. of boxed images in each direction. Defaults to 2913 (1, 1, 1), meaning single molecule with 1 lattice parameter 2914 in each direction. 2915 random_rotation (bool): Whether to apply a random rotation to 2916 each molecule. This jumbles all the molecules so that they 2917 are not exact images of each other. 2918 min_dist (float): The minimum distance that atoms should be from 2919 each other. This is only used if random_rotation is True. 2920 The randomized rotations are searched such that no two atoms 2921 are less than min_dist from each other. 2922 cls: The Structure class to instantiate (defaults to pymatgen 2923 structure) 2924 offset: Translation to offset molecule from center of mass coords 2925 no_cross: Whether to forbid molecule coords from extending beyond 2926 boundary of box. 2927 reorder: Whether to reorder the sites to be in electronegativity 2928 order. 2929 2930 Returns: 2931 Structure containing molecule in a box. 2932 """ 2933 if offset is None: 2934 offset = np.array([0, 0, 0]) 2935 2936 coords = np.array(self.cart_coords) 2937 x_range = max(coords[:, 0]) - min(coords[:, 0]) 2938 y_range = max(coords[:, 1]) - min(coords[:, 1]) 2939 z_range = max(coords[:, 2]) - min(coords[:, 2]) 2940 2941 if a <= x_range or b <= y_range or c <= z_range: 2942 raise ValueError("Box is not big enough to contain Molecule.") 2943 lattice = Lattice.from_parameters(a * images[0], b * images[1], c * images[2], 90, 90, 90) # type: ignore 2944 nimages = images[0] * images[1] * images[2] # type: ignore 2945 all_coords: List[ArrayLike] = [] 2946 2947 centered_coords = self.cart_coords - self.center_of_mass + offset 2948 2949 for i, j, k in itertools.product( 2950 list(range(images[0])), list(range(images[1])), list(range(images[2])) # type: ignore 2951 ): 2952 box_center = [(i + 0.5) * a, (j + 0.5) * b, (k + 0.5) * c] # type: ignore 2953 if random_rotation: 2954 while True: 2955 op = SymmOp.from_origin_axis_angle( 2956 (0, 0, 0), 2957 axis=np.random.rand(3), 2958 angle=random.uniform(-180, 180), 2959 ) 2960 m = op.rotation_matrix 2961 new_coords = np.dot(m, centered_coords.T).T + box_center 2962 if no_cross: 2963 x_max, x_min = max(new_coords[:, 0]), min(new_coords[:, 0]) 2964 y_max, y_min = max(new_coords[:, 1]), min(new_coords[:, 1]) 2965 z_max, z_min = max(new_coords[:, 2]), min(new_coords[:, 2]) 2966 if x_max > a or x_min < 0 or y_max > b or y_min < 0 or z_max > c or z_min < 0: 2967 raise ValueError("Molecule crosses boundary of box.") 2968 if len(all_coords) == 0: 2969 break 2970 distances = lattice.get_all_distances( 2971 lattice.get_fractional_coords(new_coords), 2972 lattice.get_fractional_coords(all_coords), 2973 ) 2974 if np.amin(distances) > min_dist: 2975 break 2976 else: 2977 new_coords = centered_coords + box_center 2978 if no_cross: 2979 x_max, x_min = max(new_coords[:, 0]), min(new_coords[:, 0]) 2980 y_max, y_min = max(new_coords[:, 1]), min(new_coords[:, 1]) 2981 z_max, z_min = max(new_coords[:, 2]), min(new_coords[:, 2]) 2982 if x_max > a or x_min < 0 or y_max > b or y_min < 0 or z_max > c or z_min < 0: 2983 raise ValueError("Molecule crosses boundary of box.") 2984 all_coords.extend(new_coords) 2985 sprops = {k: v * nimages for k, v in self.site_properties.items()} # type: ignore 2986 2987 if cls is None: 2988 cls = Structure 2989 2990 if reorder: 2991 return cls( 2992 lattice, 2993 self.species * nimages, # type: ignore 2994 all_coords, 2995 coords_are_cartesian=True, 2996 site_properties=sprops, 2997 ).get_sorted_structure() 2998 2999 return cls( 3000 lattice, 3001 self.species * nimages, # type: ignore 3002 coords, 3003 coords_are_cartesian=True, 3004 site_properties=sprops, 3005 ) 3006 3007 def get_centered_molecule(self) -> Union["IMolecule", "Molecule"]: 3008 """ 3009 Returns a Molecule centered at the center of mass. 3010 3011 Returns: 3012 Molecule centered with center of mass at origin. 3013 """ 3014 center = self.center_of_mass 3015 new_coords = np.array(self.cart_coords) - center 3016 return self.__class__( # type: ignore 3017 self.species_and_occu, 3018 new_coords, # type: ignore 3019 charge=self._charge, 3020 spin_multiplicity=self._spin_multiplicity, 3021 site_properties=self.site_properties, 3022 ) 3023 3024 def to(self, fmt=None, filename=None): 3025 """ 3026 Outputs the molecule to a file or string. 3027 3028 Args: 3029 fmt (str): Format to output to. Defaults to JSON unless filename 3030 is provided. If fmt is specifies, it overrides whatever the 3031 filename is. Options include "xyz", "gjf", "g03", "json". If 3032 you have OpenBabel installed, any of the formats supported by 3033 OpenBabel. Non-case sensitive. 3034 filename (str): If provided, output will be written to a file. If 3035 fmt is not specified, the format is determined from the 3036 filename. Defaults is None, i.e. string output. 3037 3038 Returns: 3039 (str) if filename is None. None otherwise. 3040 """ 3041 from pymatgen.io.babel import BabelMolAdaptor 3042 from pymatgen.io.gaussian import GaussianInput 3043 from pymatgen.io.xyz import XYZ 3044 3045 fmt = "" if fmt is None else fmt.lower() 3046 fname = os.path.basename(filename or "") 3047 if fmt == "xyz" or fnmatch(fname.lower(), "*.xyz*"): 3048 writer = XYZ(self) 3049 elif any(fmt == r or fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"]): 3050 writer = GaussianInput(self) 3051 elif fmt == "json" or fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): 3052 if filename: 3053 with zopen(filename, "wt", encoding="utf8") as f: 3054 return json.dump(self.as_dict(), f) 3055 else: 3056 return json.dumps(self.as_dict()) 3057 elif fmt == "yaml" or fnmatch(fname, "*.yaml*"): 3058 if filename: 3059 with zopen(fname, "wt", encoding="utf8") as f: 3060 return yaml.safe_dump(self.as_dict(), f) 3061 else: 3062 return yaml.safe_dump(self.as_dict()) 3063 3064 else: 3065 m = re.search(r"\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", fname.lower()) 3066 if (not fmt) and m: 3067 fmt = m.group(1) 3068 writer = BabelMolAdaptor(self) 3069 return writer.write_file(filename, file_format=fmt) 3070 3071 if filename: 3072 writer.write_file(filename) 3073 return str(writer) 3074 3075 @classmethod 3076 def from_str(cls, input_string: str, fmt: str): 3077 """ 3078 Reads the molecule from a string. 3079 3080 Args: 3081 input_string (str): String to parse. 3082 fmt (str): Format to output to. Defaults to JSON unless filename 3083 is provided. If fmt is specifies, it overrides whatever the 3084 filename is. Options include "xyz", "gjf", "g03", "json". If 3085 you have OpenBabel installed, any of the formats supported by 3086 OpenBabel. Non-case sensitive. 3087 3088 Returns: 3089 IMolecule or Molecule. 3090 """ 3091 from pymatgen.io.gaussian import GaussianInput 3092 from pymatgen.io.xyz import XYZ 3093 3094 if fmt.lower() == "xyz": 3095 m = XYZ.from_string(input_string).molecule 3096 elif fmt in ["gjf", "g03", "g09", "com", "inp"]: 3097 m = GaussianInput.from_string(input_string).molecule 3098 elif fmt == "json": 3099 d = json.loads(input_string) 3100 return cls.from_dict(d) 3101 elif fmt == "yaml": 3102 d = yaml.safe_load(input_string) 3103 return cls.from_dict(d) 3104 else: 3105 from pymatgen.io.babel import BabelMolAdaptor 3106 3107 m = BabelMolAdaptor.from_string(input_string, file_format=fmt).pymatgen_mol 3108 return cls.from_sites(m) 3109 3110 @classmethod 3111 def from_file(cls, filename): 3112 """ 3113 Reads a molecule from a file. Supported formats include xyz, 3114 gaussian input (gjf|g03|g09|com|inp), Gaussian output (.out|and 3115 pymatgen's JSON serialized molecules. Using openbabel, 3116 many more extensions are supported but requires openbabel to be 3117 installed. 3118 3119 Args: 3120 filename (str): The filename to read from. 3121 3122 Returns: 3123 Molecule 3124 """ 3125 filename = str(filename) 3126 from pymatgen.io.gaussian import GaussianOutput 3127 3128 with zopen(filename) as f: 3129 contents = f.read() 3130 fname = filename.lower() 3131 if fnmatch(fname, "*.xyz*"): 3132 return cls.from_str(contents, fmt="xyz") 3133 if any(fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"]): 3134 return cls.from_str(contents, fmt="g09") 3135 if any(fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["out", "lis", "log"]): 3136 return GaussianOutput(filename).final_structure 3137 if fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): 3138 return cls.from_str(contents, fmt="json") 3139 if fnmatch(fname, "*.yaml*"): 3140 return cls.from_str(contents, fmt="yaml") 3141 from pymatgen.io.babel import BabelMolAdaptor 3142 3143 m = re.search(r"\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower()) 3144 if m: 3145 new = BabelMolAdaptor.from_file(filename, m.group(1)).pymatgen_mol 3146 new.__class__ = cls 3147 return new 3148 raise ValueError("Cannot determine file type.") 3149 3150 3151class Structure(IStructure, collections.abc.MutableSequence): 3152 """ 3153 Mutable version of structure. 3154 """ 3155 3156 __hash__ = None # type: ignore 3157 3158 def __init__( 3159 self, 3160 lattice: Union[ArrayLike, Lattice], 3161 species: Sequence[CompositionLike], 3162 coords: Sequence[ArrayLike], 3163 charge: float = None, 3164 validate_proximity: bool = False, 3165 to_unit_cell: bool = False, 3166 coords_are_cartesian: bool = False, 3167 site_properties: dict = None, 3168 ): 3169 """ 3170 Create a periodic structure. 3171 3172 Args: 3173 lattice: The lattice, either as a pymatgen.core.lattice.Lattice or 3174 simply as any 2D array. Each row should correspond to a lattice 3175 vector. E.g., [[10,0,0], [20,10,0], [0,0,30]] specifies a 3176 lattice with lattice vectors [10,0,0], [20,10,0] and [0,0,30]. 3177 species: List of species on each site. Can take in flexible input, 3178 including: 3179 3180 i. A sequence of element / species specified either as string 3181 symbols, e.g. ["Li", "Fe2+", "P", ...] or atomic numbers, 3182 e.g., (3, 56, ...) or actual Element or Species objects. 3183 3184 ii. List of dict of elements/species and occupancies, e.g., 3185 [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of 3186 disordered structures. 3187 coords (Nx3 array): list of fractional/cartesian coordinates of 3188 each species. 3189 charge (int): overall charge of the structure. Defaults to behavior 3190 in SiteCollection where total charge is the sum of the oxidation 3191 states. 3192 validate_proximity (bool): Whether to check if there are sites 3193 that are less than 0.01 Ang apart. Defaults to False. 3194 to_unit_cell (bool): Whether to map all sites into the unit cell, 3195 i.e., fractional coords between 0 and 1. Defaults to False. 3196 coords_are_cartesian (bool): Set to True if you are providing 3197 coordinates in cartesian coordinates. Defaults to False. 3198 site_properties (dict): Properties associated with the sites as a 3199 dict of sequences, e.g., {"magmom":[5,5,5,5]}. The sequences 3200 have to be the same length as the atomic species and 3201 fractional_coords. Defaults to None for no properties. 3202 """ 3203 super().__init__( 3204 lattice, 3205 species, 3206 coords, 3207 charge=charge, 3208 validate_proximity=validate_proximity, 3209 to_unit_cell=to_unit_cell, 3210 coords_are_cartesian=coords_are_cartesian, 3211 site_properties=site_properties, 3212 ) 3213 3214 self._sites: List[PeriodicSite] = list(self._sites) # type: ignore 3215 3216 def __setitem__( # type: ignore 3217 self, i: Union[int, slice, Sequence[int], SpeciesLike], site: Union[SpeciesLike, PeriodicSite, Sequence] 3218 ): 3219 """ 3220 Modify a site in the structure. 3221 3222 Args: 3223 i (int, [int], slice, Species-like): Indices to change. You can 3224 specify these as an int, a list of int, or a species-like 3225 string. 3226 site (PeriodicSite/Species/Sequence): Three options exist. You 3227 can provide a PeriodicSite directly (lattice will be 3228 checked). Or more conveniently, you can provide a 3229 specie-like object or a tuple of up to length 3. 3230 3231 Examples: 3232 s[0] = "Fe" 3233 s[0] = Element("Fe") 3234 both replaces the species only. 3235 s[0] = "Fe", [0.5, 0.5, 0.5] 3236 Replaces site and *fractional* coordinates. Any properties 3237 are inherited from current site. 3238 s[0] = "Fe", [0.5, 0.5, 0.5], {"spin": 2} 3239 Replaces site and *fractional* coordinates and properties. 3240 3241 s[(0, 2, 3)] = "Fe" 3242 Replaces sites 0, 2 and 3 with Fe. 3243 3244 s[0::2] = "Fe" 3245 Replaces all even index sites with Fe. 3246 3247 s["Mn"] = "Fe" 3248 Replaces all Mn in the structure with Fe. This is 3249 a short form for the more complex replace_species. 3250 3251 s["Mn"] = "Fe0.5Co0.5" 3252 Replaces all Mn in the structure with Fe: 0.5, Co: 0.5, i.e., 3253 creates a disordered structure! 3254 """ 3255 3256 if isinstance(i, int): 3257 indices = [i] 3258 elif isinstance(i, (str, Element, Species)): 3259 self.replace_species({i: site}) # type: ignore 3260 return 3261 elif isinstance(i, slice): 3262 to_mod = self[i] 3263 indices = [ii for ii, s in enumerate(self._sites) if s in to_mod] 3264 else: 3265 indices = list(i) 3266 3267 for ii in indices: 3268 if isinstance(site, PeriodicSite): 3269 if site.lattice != self._lattice: 3270 raise ValueError("PeriodicSite added must have same lattice " "as Structure!") 3271 if len(indices) != 1: 3272 raise ValueError("Site assignments makes sense only for " "single int indices!") 3273 self._sites[ii] = site # type: ignore 3274 else: 3275 if isinstance(site, str) or (not isinstance(site, collections.abc.Sequence)): 3276 self._sites[ii].species = site # type: ignore 3277 else: 3278 self._sites[ii].species = site[0] # type: ignore 3279 if len(site) > 1: 3280 self._sites[ii].frac_coords = site[1] # type: ignore 3281 if len(site) > 2: 3282 self._sites[ii].properties = site[2] # type: ignore 3283 3284 def __delitem__(self, i): 3285 """ 3286 Deletes a site from the Structure. 3287 """ 3288 self._sites.__delitem__(i) 3289 3290 @property 3291 def lattice(self) -> Lattice: 3292 """ 3293 :return: Lattice assciated with structure. 3294 """ 3295 return self._lattice 3296 3297 @lattice.setter 3298 def lattice(self, lattice: Union[ArrayLike, Lattice]): 3299 if not isinstance(lattice, Lattice): 3300 lattice = Lattice(lattice) 3301 self._lattice = lattice 3302 for site in self._sites: 3303 site.lattice = lattice 3304 3305 def append( # type: ignore 3306 self, 3307 species: CompositionLike, 3308 coords: ArrayLike, 3309 coords_are_cartesian: bool = False, 3310 validate_proximity: bool = False, 3311 properties: dict = None, 3312 ): 3313 """ 3314 Append a site to the structure. 3315 3316 Args: 3317 species: Species of inserted site 3318 coords (3x1 array): Coordinates of inserted site 3319 coords_are_cartesian (bool): Whether coordinates are cartesian. 3320 Defaults to False. 3321 validate_proximity (bool): Whether to check if inserted site is 3322 too close to an existing site. Defaults to False. 3323 properties (dict): Properties of the site. 3324 3325 Returns: 3326 New structure with inserted site. 3327 """ 3328 return self.insert( 3329 len(self), 3330 species, 3331 coords, 3332 coords_are_cartesian=coords_are_cartesian, 3333 validate_proximity=validate_proximity, 3334 properties=properties, 3335 ) 3336 3337 def insert( # type: ignore 3338 self, 3339 i: int, 3340 species: CompositionLike, 3341 coords: ArrayLike, 3342 coords_are_cartesian: bool = False, 3343 validate_proximity: bool = False, 3344 properties: dict = None, 3345 ): 3346 """ 3347 Insert a site to the structure. 3348 3349 Args: 3350 i (int): Index to insert site 3351 species (species-like): Species of inserted site 3352 coords (3x1 array): Coordinates of inserted site 3353 coords_are_cartesian (bool): Whether coordinates are cartesian. 3354 Defaults to False. 3355 validate_proximity (bool): Whether to check if inserted site is 3356 too close to an existing site. Defaults to False. 3357 properties (dict): Properties associated with the site. 3358 3359 Returns: 3360 New structure with inserted site. 3361 """ 3362 if not coords_are_cartesian: 3363 new_site = PeriodicSite(species, coords, self._lattice, properties=properties) 3364 else: 3365 frac_coords = self._lattice.get_fractional_coords(coords) 3366 new_site = PeriodicSite(species, frac_coords, self._lattice, properties=properties) 3367 3368 if validate_proximity: 3369 for site in self: 3370 if site.distance(new_site) < self.DISTANCE_TOLERANCE: 3371 raise ValueError("New site is too close to an existing " "site!") 3372 3373 self._sites.insert(i, new_site) 3374 3375 def replace( 3376 self, 3377 i: int, 3378 species: CompositionLike, 3379 coords: ArrayLike = None, 3380 coords_are_cartesian: bool = False, 3381 properties: dict = None, 3382 ): 3383 """ 3384 Replace a single site. Takes either a species or a dict of species and 3385 occupations. 3386 3387 Args: 3388 i (int): Index of the site in the _sites list. 3389 species (species-like): Species of replacement site 3390 coords (3x1 array): Coordinates of replacement site. If None, 3391 the current coordinates are assumed. 3392 coords_are_cartesian (bool): Whether coordinates are cartesian. 3393 Defaults to False. 3394 properties (dict): Properties associated with the site. 3395 """ 3396 if coords is None: 3397 frac_coords = self[i].frac_coords 3398 elif coords_are_cartesian: 3399 frac_coords = self._lattice.get_fractional_coords(coords) 3400 else: 3401 frac_coords = coords # type: ignore 3402 3403 new_site = PeriodicSite(species, frac_coords, self._lattice, properties=properties) 3404 self._sites[i] = new_site 3405 3406 def substitute(self, index: int, func_group: Union["IMolecule", "Molecule", str], bond_order: int = 1): 3407 """ 3408 Substitute atom at index with a functional group. 3409 3410 Args: 3411 index (int): Index of atom to substitute. 3412 func_group: Substituent molecule. There are two options: 3413 3414 1. Providing an actual Molecule as the input. The first atom 3415 must be a DummySpecies X, indicating the position of 3416 nearest neighbor. The second atom must be the next 3417 nearest atom. For example, for a methyl group 3418 substitution, func_grp should be X-CH3, where X is the 3419 first site and C is the second site. What the code will 3420 do is to remove the index site, and connect the nearest 3421 neighbor to the C atom in CH3. The X-C bond indicates the 3422 directionality to connect the atoms. 3423 2. A string name. The molecule will be obtained from the 3424 relevant template in func_groups.json. 3425 bond_order (int): A specified bond order to calculate the bond 3426 length between the attached functional group and the nearest 3427 neighbor site. Defaults to 1. 3428 """ 3429 3430 # Find the nearest neighbor that is not a terminal atom. 3431 all_non_terminal_nn = [] 3432 for nn, dist, _, _ in self.get_neighbors(self[index], 3): 3433 # Check that the nn has neighbors within a sensible distance but 3434 # is not the site being substituted. 3435 for inn, dist2, _, _ in self.get_neighbors(nn, 3): 3436 if inn != self[index] and dist2 < 1.2 * get_bond_length(nn.specie, inn.specie): 3437 all_non_terminal_nn.append((nn, dist)) 3438 break 3439 3440 if len(all_non_terminal_nn) == 0: 3441 raise RuntimeError("Can't find a non-terminal neighbor to attach" " functional group to.") 3442 3443 non_terminal_nn = min(all_non_terminal_nn, key=lambda d: d[1])[0] 3444 3445 # Set the origin point to be the coordinates of the nearest 3446 # non-terminal neighbor. 3447 origin = non_terminal_nn.coords 3448 3449 # Pass value of functional group--either from user-defined or from 3450 # functional.json 3451 if not isinstance(func_group, Molecule): 3452 # Check to see whether the functional group is in database. 3453 if func_group not in FunctionalGroups: 3454 raise RuntimeError("Can't find functional group in list. " "Provide explicit coordinate instead") 3455 fgroup = FunctionalGroups[func_group] 3456 else: 3457 fgroup = func_group 3458 3459 # If a bond length can be found, modify func_grp so that the X-group 3460 # bond length is equal to the bond length. 3461 try: 3462 bl = get_bond_length(non_terminal_nn.specie, fgroup[1].specie, bond_order=bond_order) 3463 # Catches for case of incompatibility between Element(s) and Species(s) 3464 except TypeError: 3465 bl = None 3466 3467 if bl is not None: 3468 fgroup = fgroup.copy() 3469 vec = fgroup[0].coords - fgroup[1].coords 3470 vec /= np.linalg.norm(vec) 3471 fgroup[0] = "X", fgroup[1].coords + float(bl) * vec 3472 3473 # Align X to the origin. 3474 x = fgroup[0] 3475 fgroup.translate_sites(list(range(len(fgroup))), origin - x.coords) 3476 3477 # Find angle between the attaching bond and the bond to be replaced. 3478 v1 = fgroup[1].coords - origin 3479 v2 = self[index].coords - origin 3480 angle = get_angle(v1, v2) 3481 3482 if 1 < abs(angle % 180) < 179: 3483 # For angles which are not 0 or 180, we perform a rotation about 3484 # the origin along an axis perpendicular to both bonds to align 3485 # bonds. 3486 axis = np.cross(v1, v2) 3487 op = SymmOp.from_origin_axis_angle(origin, axis, angle) 3488 fgroup.apply_operation(op) 3489 elif abs(abs(angle) - 180) < 1: 3490 # We have a 180 degree angle. Simply do an inversion about the 3491 # origin 3492 for i, fg in enumerate(fgroup): 3493 fgroup[i] = (fg.species, origin - (fg.coords - origin)) 3494 3495 # Remove the atom to be replaced, and add the rest of the functional 3496 # group. 3497 del self[index] 3498 for site in fgroup[1:]: 3499 s_new = PeriodicSite(site.species, site.coords, self.lattice, coords_are_cartesian=True) 3500 self._sites.append(s_new) 3501 3502 def remove_species(self, species: Sequence[SpeciesLike]): 3503 """ 3504 Remove all occurrences of several species from a structure. 3505 3506 Args: 3507 species: Sequence of species to remove, e.g., ["Li", "Na"]. 3508 """ 3509 new_sites = [] 3510 species = [get_el_sp(s) for s in species] 3511 3512 for site in self._sites: 3513 new_sp_occu = {sp: amt for sp, amt in site.species.items() if sp not in species} 3514 if len(new_sp_occu) > 0: 3515 new_sites.append( 3516 PeriodicSite( 3517 new_sp_occu, 3518 site.frac_coords, 3519 self._lattice, 3520 properties=site.properties, 3521 ) 3522 ) 3523 self._sites = new_sites 3524 3525 def remove_sites(self, indices: Sequence[int]): 3526 """ 3527 Delete sites with at indices. 3528 3529 Args: 3530 indices: Sequence of indices of sites to delete. 3531 """ 3532 self._sites = [s for i, s in enumerate(self._sites) if i not in indices] 3533 3534 def apply_operation(self, symmop: SymmOp, fractional: bool = False): 3535 """ 3536 Apply a symmetry operation to the structure and return the new 3537 structure. The lattice is operated by the rotation matrix only. 3538 Coords are operated in full and then transformed to the new lattice. 3539 3540 Args: 3541 symmop (SymmOp): Symmetry operation to apply. 3542 fractional (bool): Whether the symmetry operation is applied in 3543 fractional space. Defaults to False, i.e., symmetry operation 3544 is applied in cartesian coordinates. 3545 """ 3546 if not fractional: 3547 self._lattice = Lattice([symmop.apply_rotation_only(row) for row in self._lattice.matrix]) 3548 3549 def operate_site(site): 3550 new_cart = symmop.operate(site.coords) 3551 new_frac = self._lattice.get_fractional_coords(new_cart) 3552 return PeriodicSite( 3553 site.species, 3554 new_frac, 3555 self._lattice, 3556 properties=site.properties, 3557 skip_checks=True, 3558 ) 3559 3560 else: 3561 new_latt = np.dot(symmop.rotation_matrix, self._lattice.matrix) 3562 self._lattice = Lattice(new_latt) 3563 3564 def operate_site(site): 3565 return PeriodicSite( 3566 site.species, 3567 symmop.operate(site.frac_coords), 3568 self._lattice, 3569 properties=site.properties, 3570 skip_checks=True, 3571 ) 3572 3573 self._sites = [operate_site(s) for s in self._sites] 3574 3575 def apply_strain(self, strain: ArrayLike): 3576 """ 3577 Apply a strain to the lattice. 3578 3579 Args: 3580 strain (float or list): Amount of strain to apply. Can be a float, 3581 or a sequence of 3 numbers. E.g., 0.01 means all lattice 3582 vectors are increased by 1%. This is equivalent to calling 3583 modify_lattice with a lattice with lattice parameters that 3584 are 1% larger. 3585 """ 3586 s = (1 + np.array(strain)) * np.eye(3) 3587 self.lattice = Lattice(np.dot(self._lattice.matrix.T, s).T) 3588 3589 def sort(self, key: Callable = None, reverse: bool = False): 3590 """ 3591 Sort a structure in place. The parameters have the same meaning as in 3592 list.sort. By default, sites are sorted by the electronegativity of 3593 the species. The difference between this method and 3594 get_sorted_structure (which also works in IStructure) is that the 3595 latter returns a new Structure, while this just sorts the Structure 3596 in place. 3597 3598 Args: 3599 key: Specifies a function of one argument that is used to extract 3600 a comparison key from each list element: key=str.lower. The 3601 default value is None (compare the elements directly). 3602 reverse (bool): If set to True, then the list elements are sorted 3603 as if each comparison were reversed. 3604 """ 3605 self._sites.sort(key=key, reverse=reverse) 3606 3607 def translate_sites( 3608 self, indices: Union[int, Sequence[int]], vector: ArrayLike, frac_coords: bool = True, to_unit_cell: bool = True 3609 ): 3610 """ 3611 Translate specific sites by some vector, keeping the sites within the 3612 unit cell. 3613 3614 Args: 3615 indices: Integer or List of site indices on which to perform the 3616 translation. 3617 vector: Translation vector for sites. 3618 frac_coords (bool): Whether the vector corresponds to fractional or 3619 cartesian coordinates. 3620 to_unit_cell (bool): Whether new sites are transformed to unit 3621 cell 3622 """ 3623 if not isinstance(indices, collections.abc.Iterable): 3624 indices = [indices] 3625 3626 for i in indices: 3627 site = self._sites[i] 3628 if frac_coords: 3629 fcoords = site.frac_coords + vector 3630 else: 3631 fcoords = self._lattice.get_fractional_coords(site.coords + vector) 3632 if to_unit_cell: 3633 fcoords = np.mod(fcoords, 1) 3634 self._sites[i].frac_coords = fcoords 3635 3636 def rotate_sites( 3637 self, 3638 indices: List[int] = None, 3639 theta: float = 0.0, 3640 axis: ArrayLike = None, 3641 anchor: ArrayLike = None, 3642 to_unit_cell: bool = True, 3643 ): 3644 """ 3645 Rotate specific sites by some angle around vector at anchor. 3646 3647 Args: 3648 indices (list): List of site indices on which to perform the 3649 translation. 3650 theta (float): Angle in radians 3651 axis (3x1 array): Rotation axis vector. 3652 anchor (3x1 array): Point of rotation. 3653 to_unit_cell (bool): Whether new sites are transformed to unit 3654 cell 3655 """ 3656 3657 from numpy import cross, eye 3658 from numpy.linalg import norm 3659 from scipy.linalg import expm 3660 3661 if indices is None: 3662 indices = list(range(len(self))) 3663 3664 if axis is None: 3665 axis = [0, 0, 1] 3666 3667 if anchor is None: 3668 anchor = [0, 0, 0] 3669 3670 anchor = np.array(anchor) 3671 axis = np.array(axis) 3672 3673 theta %= 2 * np.pi 3674 3675 rm = expm(cross(eye(3), axis / norm(axis)) * theta) 3676 for i in indices: 3677 site = self._sites[i] 3678 coords = ((np.dot(rm, np.array(site.coords - anchor).T)).T + anchor).ravel() 3679 new_site = PeriodicSite( 3680 site.species, 3681 coords, 3682 self._lattice, 3683 to_unit_cell=to_unit_cell, 3684 coords_are_cartesian=True, 3685 properties=site.properties, 3686 skip_checks=True, 3687 ) 3688 self._sites[i] = new_site 3689 3690 def perturb(self, distance: float, min_distance: float = None): 3691 """ 3692 Performs a random perturbation of the sites in a structure to break 3693 symmetries. 3694 3695 Args: 3696 distance (float): Distance in angstroms by which to perturb each 3697 site. 3698 min_distance (None, int, or float): if None, all displacements will 3699 be equal amplitude. If int or float, perturb each site a 3700 distance drawn from the uniform distribution between 3701 'min_distance' and 'distance'. 3702 3703 """ 3704 3705 def get_rand_vec(): 3706 # deals with zero vectors. 3707 vector = np.random.randn(3) 3708 vnorm = np.linalg.norm(vector) 3709 dist = distance 3710 if isinstance(min_distance, (float, int)): 3711 dist = np.random.uniform(min_distance, dist) 3712 return vector / vnorm * dist if vnorm != 0 else get_rand_vec() 3713 3714 for i in range(len(self._sites)): 3715 self.translate_sites([i], get_rand_vec(), frac_coords=False) 3716 3717 def make_supercell(self, scaling_matrix: ArrayLike, to_unit_cell: bool = True): 3718 """ 3719 Create a supercell. 3720 3721 Args: 3722 scaling_matrix: A scaling matrix for transforming the lattice 3723 vectors. Has to be all integers. Several options are possible: 3724 3725 a. A full 3x3 scaling matrix defining the linear combination 3726 the old lattice vectors. E.g., [[2,1,0],[0,3,0],[0,0, 3727 1]] generates a new structure with lattice vectors a' = 3728 2a + b, b' = 3b, c' = c where a, b, and c are the lattice 3729 vectors of the original structure. 3730 b. An sequence of three scaling factors. E.g., [2, 1, 1] 3731 specifies that the supercell should have dimensions 2a x b x 3732 c. 3733 c. A number, which simply scales all lattice vectors by the 3734 same factor. 3735 to_unit_cell: Whether or not to fall back sites into the unit cell 3736 """ 3737 s = self * scaling_matrix 3738 if to_unit_cell: 3739 for site in s: 3740 site.to_unit_cell(in_place=True) 3741 self._sites = s.sites 3742 self._lattice = s.lattice 3743 3744 def scale_lattice(self, volume: float): 3745 """ 3746 Performs a scaling of the lattice vectors so that length proportions 3747 and angles are preserved. 3748 3749 Args: 3750 volume (float): New volume of the unit cell in A^3. 3751 """ 3752 self.lattice = self._lattice.scale(volume) 3753 3754 def merge_sites(self, tol: float = 0.01, mode: Literal["sum", "delete", "average"] = "sum"): 3755 """ 3756 Merges sites (adding occupancies) within tol of each other. 3757 Removes site properties. 3758 3759 Args: 3760 tol (float): Tolerance for distance to merge sites. 3761 mode ('sum' | 'delete' | 'average'): "delete" means duplicate sites are 3762 deleted. "sum" means the occupancies are summed for the sites. 3763 "average" means that the site is deleted but the properties are averaged 3764 Only first letter is considered. 3765 3766 """ 3767 from scipy.cluster.hierarchy import fcluster, linkage 3768 from scipy.spatial.distance import squareform 3769 3770 d = self.distance_matrix 3771 np.fill_diagonal(d, 0) 3772 clusters = fcluster(linkage(squareform((d + d.T) / 2)), tol, "distance") 3773 sites = [] 3774 for c in np.unique(clusters): 3775 inds = np.where(clusters == c)[0] 3776 species = self[inds[0]].species 3777 coords = self[inds[0]].frac_coords 3778 props = self[inds[0]].properties 3779 for n, i in enumerate(inds[1:]): 3780 sp = self[i].species 3781 if mode.lower()[0] == "s": 3782 species += sp 3783 offset = self[i].frac_coords - coords 3784 coords = coords + ((offset - np.round(offset)) / (n + 2)).astype(coords.dtype) 3785 for key in props.keys(): 3786 if props[key] is not None and self[i].properties[key] != props[key]: 3787 if mode.lower()[0] == "a" and isinstance(props[key], float): 3788 # update a running total 3789 props[key] = props[key] * (n + 1) / (n + 2) + self[i].properties[key] / (n + 2) 3790 else: 3791 props[key] = None 3792 warnings.warn( 3793 "Sites with different site property %s are merged. " "So property is set to none" % key 3794 ) 3795 sites.append(PeriodicSite(species, coords, self.lattice, properties=props)) 3796 3797 self._sites = sites 3798 3799 def set_charge(self, new_charge: float = 0.0): 3800 """ 3801 Sets the overall structure charge 3802 3803 Args: 3804 new_charge (float): new charge to set 3805 """ 3806 self._charge = new_charge 3807 3808 3809class Molecule(IMolecule, collections.abc.MutableSequence): 3810 """ 3811 Mutable Molecule. It has all the methods in IMolecule, but in addition, 3812 it allows a user to perform edits on the molecule. 3813 """ 3814 3815 __hash__ = None # type: ignore 3816 3817 def __init__( 3818 self, 3819 species: Sequence[SpeciesLike], 3820 coords: Sequence[ArrayLike], 3821 charge: float = 0.0, 3822 spin_multiplicity: float = None, 3823 validate_proximity: bool = False, 3824 site_properties: dict = None, 3825 ): 3826 """ 3827 Creates a MutableMolecule. 3828 3829 Args: 3830 species: list of atomic species. Possible kinds of input include a 3831 list of dict of elements/species and occupancies, a List of 3832 elements/specie specified as actual Element/Species, Strings 3833 ("Fe", "Fe2+") or atomic numbers (1,56). 3834 coords (3x1 array): list of cartesian coordinates of each species. 3835 charge (float): Charge for the molecule. Defaults to 0. 3836 spin_multiplicity (int): Spin multiplicity for molecule. 3837 Defaults to None, which means that the spin multiplicity is 3838 set to 1 if the molecule has no unpaired electrons and to 2 3839 if there are unpaired electrons. 3840 validate_proximity (bool): Whether to check if there are sites 3841 that are less than 1 Ang apart. Defaults to False. 3842 site_properties (dict): Properties associated with the sites as 3843 a dict of sequences, e.g., {"magmom":[5,5,5,5]}. The 3844 sequences have to be the same length as the atomic species 3845 and fractional_coords. Defaults to None for no properties. 3846 """ 3847 super().__init__( 3848 species, 3849 coords, 3850 charge=charge, 3851 spin_multiplicity=spin_multiplicity, 3852 validate_proximity=validate_proximity, 3853 site_properties=site_properties, 3854 ) 3855 self._sites: List[Site] = list(self._sites) # type: ignore 3856 3857 def __setitem__( # type: ignore 3858 self, i: Union[int, slice, Sequence[int], SpeciesLike], site: Union[SpeciesLike, Site, Sequence] 3859 ): 3860 """ 3861 Modify a site in the molecule. 3862 3863 Args: 3864 i (int, [int], slice, Species-like): Indices to change. You can 3865 specify these as an int, a list of int, or a species-like 3866 string. 3867 site (PeriodicSite/Species/Sequence): Three options exist. You can 3868 provide a Site directly, or for convenience, you can provide 3869 simply a Species-like string/object, or finally a (Species, 3870 coords) sequence, e.g., ("Fe", [0.5, 0.5, 0.5]). 3871 """ 3872 3873 if isinstance(i, int): 3874 indices = [i] 3875 elif isinstance(i, (str, Element, Species)): 3876 self.replace_species({i: site}) # type: ignore 3877 return 3878 elif isinstance(i, slice): 3879 to_mod = self[i] 3880 indices = [ii for ii, s in enumerate(self._sites) if s in to_mod] 3881 else: 3882 indices = list(i) 3883 3884 for ii in indices: 3885 if isinstance(site, Site): 3886 self._sites[ii] = site 3887 else: 3888 if isinstance(site, str) or (not isinstance(site, collections.abc.Sequence)): 3889 self._sites[ii].species = site # type: ignore 3890 else: 3891 self._sites[ii].species = site[0] # type: ignore 3892 if len(site) > 1: 3893 self._sites[ii].coords = site[1] # type: ignore 3894 if len(site) > 2: 3895 self._sites[ii].properties = site[2] # type: ignore 3896 3897 def __delitem__(self, i): 3898 """ 3899 Deletes a site from the Structure. 3900 """ 3901 self._sites.__delitem__(i) 3902 3903 def append( # type: ignore 3904 self, 3905 species: CompositionLike, 3906 coords: ArrayLike, 3907 validate_proximity: bool = False, 3908 properties: dict = None, 3909 ): 3910 """ 3911 Appends a site to the molecule. 3912 3913 Args: 3914 species: Species of inserted site 3915 coords: Coordinates of inserted site 3916 validate_proximity (bool): Whether to check if inserted site is 3917 too close to an existing site. Defaults to False. 3918 properties (dict): A dict of properties for the Site. 3919 3920 Returns: 3921 New molecule with inserted site. 3922 """ 3923 return self.insert( 3924 len(self), 3925 species, 3926 coords, 3927 validate_proximity=validate_proximity, 3928 properties=properties, 3929 ) 3930 3931 def set_charge_and_spin(self, charge: float, spin_multiplicity: Optional[float] = None): 3932 """ 3933 Set the charge and spin multiplicity. 3934 3935 Args: 3936 charge (int): Charge for the molecule. Defaults to 0. 3937 spin_multiplicity (int): Spin multiplicity for molecule. 3938 Defaults to None, which means that the spin multiplicity is 3939 set to 1 if the molecule has no unpaired electrons and to 2 3940 if there are unpaired electrons. 3941 """ 3942 self._charge = charge 3943 nelectrons = 0.0 3944 for site in self._sites: 3945 for sp, amt in site.species.items(): 3946 if not isinstance(sp, DummySpecies): 3947 nelectrons += sp.Z * amt 3948 nelectrons -= charge 3949 self._nelectrons = nelectrons 3950 if spin_multiplicity: 3951 if (nelectrons + spin_multiplicity) % 2 != 1: 3952 raise ValueError( 3953 "Charge of {} and spin multiplicity of {} is" 3954 " not possible for this molecule".format(self._charge, spin_multiplicity) 3955 ) 3956 self._spin_multiplicity = spin_multiplicity 3957 else: 3958 self._spin_multiplicity = 1 if nelectrons % 2 == 0 else 2 3959 3960 def insert( # type: ignore 3961 self, 3962 i: int, 3963 species: CompositionLike, 3964 coords: ArrayLike, 3965 validate_proximity: bool = False, 3966 properties: dict = None, 3967 ): 3968 """ 3969 Insert a site to the molecule. 3970 3971 Args: 3972 i (int): Index to insert site 3973 species: species of inserted site 3974 coords (3x1 array): coordinates of inserted site 3975 validate_proximity (bool): Whether to check if inserted site is 3976 too close to an existing site. Defaults to True. 3977 properties (dict): Dict of properties for the Site. 3978 3979 Returns: 3980 New molecule with inserted site. 3981 """ 3982 new_site = Site(species, coords, properties=properties) 3983 if validate_proximity: 3984 for site in self: 3985 if site.distance(new_site) < self.DISTANCE_TOLERANCE: 3986 raise ValueError("New site is too close to an existing " "site!") 3987 self._sites.insert(i, new_site) 3988 3989 def remove_species(self, species: Sequence[SpeciesLike]): 3990 """ 3991 Remove all occurrences of a species from a molecule. 3992 3993 Args: 3994 species: Species to remove. 3995 """ 3996 new_sites = [] 3997 species = [get_el_sp(sp) for sp in species] 3998 for site in self._sites: 3999 new_sp_occu = {sp: amt for sp, amt in site.species.items() if sp not in species} 4000 if len(new_sp_occu) > 0: 4001 new_sites.append(Site(new_sp_occu, site.coords, properties=site.properties)) 4002 self._sites = new_sites 4003 4004 def remove_sites(self, indices: Sequence[int]): 4005 """ 4006 Delete sites with at indices. 4007 4008 Args: 4009 indices: Sequence of indices of sites to delete. 4010 """ 4011 self._sites = [self._sites[i] for i in range(len(self._sites)) if i not in indices] 4012 4013 def translate_sites(self, indices: Sequence[int] = None, vector: ArrayLike = None): 4014 """ 4015 Translate specific sites by some vector, keeping the sites within the 4016 unit cell. 4017 4018 Args: 4019 indices (list): List of site indices on which to perform the 4020 translation. 4021 vector (3x1 array): Translation vector for sites. 4022 """ 4023 if indices is None: 4024 indices = range(len(self)) 4025 if vector is None: 4026 vector = [0, 0, 0] 4027 for i in indices: 4028 site = self._sites[i] 4029 new_site = Site(site.species, site.coords + vector, properties=site.properties) # type: ignore 4030 self._sites[i] = new_site 4031 4032 def rotate_sites( 4033 self, indices: Sequence[int] = None, theta: float = 0.0, axis: ArrayLike = None, anchor: ArrayLike = None 4034 ): 4035 """ 4036 Rotate specific sites by some angle around vector at anchor. 4037 4038 Args: 4039 indices (list): List of site indices on which to perform the 4040 translation. 4041 theta (float): Angle in radians 4042 axis (3x1 array): Rotation axis vector. 4043 anchor (3x1 array): Point of rotation. 4044 """ 4045 4046 from numpy import cross, eye 4047 from numpy.linalg import norm 4048 from scipy.linalg import expm 4049 4050 if indices is None: 4051 indices = range(len(self)) 4052 4053 if axis is None: 4054 axis = [0, 0, 1] 4055 4056 if anchor is None: 4057 anchor = [0, 0, 0] 4058 4059 anchor = np.array(anchor) 4060 axis = np.array(axis) 4061 4062 theta %= 2 * np.pi 4063 4064 rm = expm(cross(eye(3), axis / norm(axis)) * theta) 4065 4066 for i in indices: 4067 site = self._sites[i] 4068 s = ((np.dot(rm, (site.coords - anchor).T)).T + anchor).ravel() 4069 new_site = Site(site.species, s, properties=site.properties) 4070 self._sites[i] = new_site 4071 4072 def perturb(self, distance: float): 4073 """ 4074 Performs a random perturbation of the sites in a structure to break 4075 symmetries. 4076 4077 Args: 4078 distance (float): Distance in angstroms by which to perturb each 4079 site. 4080 """ 4081 4082 def get_rand_vec(): 4083 # deals with zero vectors. 4084 vector = np.random.randn(3) 4085 vnorm = np.linalg.norm(vector) 4086 return vector / vnorm * distance if vnorm != 0 else get_rand_vec() 4087 4088 for i in range(len(self._sites)): 4089 self.translate_sites([i], get_rand_vec()) 4090 4091 def apply_operation(self, symmop: SymmOp): 4092 """ 4093 Apply a symmetry operation to the molecule. 4094 4095 Args: 4096 symmop (SymmOp): Symmetry operation to apply. 4097 """ 4098 4099 def operate_site(site): 4100 new_cart = symmop.operate(site.coords) 4101 return Site(site.species, new_cart, properties=site.properties) 4102 4103 self._sites = [operate_site(s) for s in self._sites] 4104 4105 def copy(self): 4106 """ 4107 Convenience method to get a copy of the molecule. 4108 4109 Returns: 4110 A copy of the Molecule. 4111 """ 4112 return self.__class__.from_sites(self) 4113 4114 def substitute(self, index: int, func_group: Union["IMolecule", "Molecule", str], bond_order: int = 1): 4115 """ 4116 Substitute atom at index with a functional group. 4117 4118 Args: 4119 index (int): Index of atom to substitute. 4120 func_grp: Substituent molecule. There are two options: 4121 4122 1. Providing an actual molecule as the input. The first atom 4123 must be a DummySpecies X, indicating the position of 4124 nearest neighbor. The second atom must be the next 4125 nearest atom. For example, for a methyl group 4126 substitution, func_grp should be X-CH3, where X is the 4127 first site and C is the second site. What the code will 4128 do is to remove the index site, and connect the nearest 4129 neighbor to the C atom in CH3. The X-C bond indicates the 4130 directionality to connect the atoms. 4131 2. A string name. The molecule will be obtained from the 4132 relevant template in func_groups.json. 4133 bond_order (int): A specified bond order to calculate the bond 4134 length between the attached functional group and the nearest 4135 neighbor site. Defaults to 1. 4136 """ 4137 4138 # Find the nearest neighbor that is not a terminal atom. 4139 all_non_terminal_nn = [] 4140 for nn in self.get_neighbors(self[index], 3): 4141 # Check that the nn has neighbors within a sensible distance but 4142 # is not the site being substituted. 4143 for nn2 in self.get_neighbors(nn, 3): 4144 if nn2 != self[index] and nn2.nn_distance < 1.2 * get_bond_length(nn.specie, nn2.specie): 4145 all_non_terminal_nn.append(nn) 4146 break 4147 4148 if len(all_non_terminal_nn) == 0: 4149 raise RuntimeError("Can't find a non-terminal neighbor to attach" " functional group to.") 4150 4151 non_terminal_nn = min(all_non_terminal_nn, key=lambda nn: nn.nn_distance) 4152 4153 # Set the origin point to be the coordinates of the nearest 4154 # non-terminal neighbor. 4155 origin = non_terminal_nn.coords 4156 4157 # Pass value of functional group--either from user-defined or from 4158 # functional.json 4159 if isinstance(func_group, Molecule): 4160 func_grp = func_group 4161 else: 4162 # Check to see whether the functional group is in database. 4163 if func_group not in FunctionalGroups: 4164 raise RuntimeError("Can't find functional group in list. " "Provide explicit coordinate instead") 4165 func_grp = FunctionalGroups[func_group] 4166 4167 # If a bond length can be found, modify func_grp so that the X-group 4168 # bond length is equal to the bond length. 4169 bl = get_bond_length(non_terminal_nn.specie, func_grp[1].specie, bond_order=bond_order) 4170 if bl is not None: 4171 func_grp = func_grp.copy() 4172 vec = func_grp[0].coords - func_grp[1].coords 4173 vec /= np.linalg.norm(vec) 4174 func_grp[0] = "X", func_grp[1].coords + float(bl) * vec 4175 4176 # Align X to the origin. 4177 x = func_grp[0] 4178 func_grp.translate_sites(list(range(len(func_grp))), origin - x.coords) 4179 4180 # Find angle between the attaching bond and the bond to be replaced. 4181 v1 = func_grp[1].coords - origin 4182 v2 = self[index].coords - origin 4183 angle = get_angle(v1, v2) 4184 4185 if 1 < abs(angle % 180) < 179: 4186 # For angles which are not 0 or 180, we perform a rotation about 4187 # the origin along an axis perpendicular to both bonds to align 4188 # bonds. 4189 axis = np.cross(v1, v2) 4190 op = SymmOp.from_origin_axis_angle(origin, axis, angle) 4191 func_grp.apply_operation(op) 4192 elif abs(abs(angle) - 180) < 1: 4193 # We have a 180 degree angle. Simply do an inversion about the 4194 # origin 4195 for i, fg in enumerate(func_grp): 4196 func_grp[i] = (fg.species, origin - (fg.coords - origin)) 4197 4198 # Remove the atom to be replaced, and add the rest of the functional 4199 # group. 4200 del self[index] 4201 for site in func_grp[1:]: 4202 self._sites.append(site) 4203 4204 4205class StructureError(Exception): 4206 """ 4207 Exception class for Structure. 4208 Raised when the structure has problems, e.g., atoms that are too close. 4209 """ 4210 4211 pass 4212 4213 4214with open(os.path.join(os.path.dirname(__file__), "func_groups.json"), "rt") as f: 4215 FunctionalGroups = {k: Molecule(v["species"], v["coords"]) for k, v in json.load(f).items()} 4216