1# coding: utf-8
2# Copyright (c) Pymatgen Development Team.
3# Distributed under the terms of the MIT License.
4
5"""
6This module provides classes used to define a non-periodic molecule and a
7periodic structure.
8"""
9
10import collections
11import functools
12import itertools
13import json
14import math
15import os
16import random
17import re
18import sys
19import warnings
20from abc import ABCMeta, abstractmethod
21from fnmatch import fnmatch
22from typing import (
23    Any,
24    Callable,
25    Dict,
26    Iterable,
27    Iterator,
28    List,
29    Optional,
30    Sequence,
31    Set,
32    Tuple,
33    Union,
34)
35
36import numpy as np
37from monty.dev import deprecated
38from monty.io import zopen
39from monty.json import MSONable
40from tabulate import tabulate
41
42from pymatgen.core import yaml
43from pymatgen.core.bonds import CovalentBond, get_bond_length
44from pymatgen.core.composition import Composition
45from pymatgen.core.lattice import Lattice, get_points_in_spheres
46from pymatgen.core.operations import SymmOp
47from pymatgen.core.periodic_table import DummySpecies, Element, Species, get_el_sp
48from pymatgen.core.sites import PeriodicSite, Site
49from pymatgen.core.units import Length, Mass
50from pymatgen.util.coord import all_distances, get_angle, lattice_points_in_supercell
51from pymatgen.util.typing import ArrayLike, CompositionLike, SpeciesLike
52
53if sys.version_info >= (3, 8):
54    from typing import Literal
55else:
56    from typing_extensions import Literal
57
58
59class Neighbor(Site):
60    """
61    Simple Site subclass to contain a neighboring atom that skips all the unnecessary checks for speed. Can be
62    used as a fixed-length tuple of size 3 to retain backwards compatibility with past use cases.
63
64        (site, nn_distance, index).
65
66    In future, usage should be to call attributes, e.g., Neighbor.index, Neighbor.distance, etc.
67    """
68
69    def __init__(
70        self,
71        species: Composition,
72        coords: np.ndarray,
73        properties: dict = None,
74        nn_distance: float = 0.0,
75        index: int = 0,
76    ):
77        """
78        :param species: Same as Site
79        :param coords: Same as Site, but must be fractional.
80        :param properties: Same as Site
81        :param nn_distance: Distance to some other Site.
82        :param index: Index within structure.
83        """
84        self.coords = coords
85        self._species = species
86        self.properties = properties or {}
87        self.nn_distance = nn_distance
88        self.index = index
89
90    def __len__(self):
91        """
92        Make neighbor Tuple-like to retain backwards compatibility.
93        """
94        return 3
95
96    def __getitem__(self, i: int):  # type: ignore
97        """
98        Make neighbor Tuple-like to retain backwards compatibility.
99
100        :param i:
101        :return:
102        """
103        return (self, self.nn_distance, self.index)[i]
104
105
106class PeriodicNeighbor(PeriodicSite):
107    """
108    Simple PeriodicSite subclass to contain a neighboring atom that skips all
109    the unnecessary checks for speed. Can be used as a fixed-length tuple of
110    size 4 to retain backwards compatibility with past use cases.
111
112        (site, distance, index, image).
113
114    In future, usage should be to call attributes, e.g., PeriodicNeighbor.index,
115    PeriodicNeighbor.distance, etc.
116    """
117
118    def __init__(
119        self,
120        species: Composition,
121        coords: np.ndarray,
122        lattice: Lattice,
123        properties: dict = None,
124        nn_distance: float = 0.0,
125        index: int = 0,
126        image: tuple = (0, 0, 0),
127    ):
128        """
129        :param species: Same as PeriodicSite
130        :param coords: Same as PeriodicSite, but must be fractional.
131        :param lattice: Same as PeriodicSite
132        :param properties: Same as PeriodicSite
133        :param nn_distance: Distance to some other Site.
134        :param index: Index within structure.
135        :param image: PeriodicImage
136        """
137        self._lattice = lattice
138        self._frac_coords = coords
139        self._species = species
140        self.properties = properties or {}
141        self.nn_distance = nn_distance
142        self.index = index
143        self.image = image
144
145    @property  # type: ignore
146    def coords(self) -> np.ndarray:  # type: ignore
147        """
148        :return: Cartesian coords.
149        """
150        return self._lattice.get_cartesian_coords(self._frac_coords)
151
152    def __len__(self):
153        """
154        Make neighbor Tuple-like to retain backwards compatibility.
155        """
156        return 4
157
158    def __getitem__(self, i: int):  # type: ignore
159        """
160        Make neighbor Tuple-like to retain backwards compatibility.
161
162        :param i:
163        :return:
164        """
165        return (self, self.nn_distance, self.index, self.image)[i]
166
167
168class SiteCollection(collections.abc.Sequence, metaclass=ABCMeta):
169    """
170    Basic SiteCollection. Essentially a sequence of Sites or PeriodicSites.
171    This serves as a base class for Molecule (a collection of Site, i.e., no
172    periodicity) and Structure (a collection of PeriodicSites, i.e.,
173    periodicity). Not meant to be instantiated directly.
174    """
175
176    # Tolerance in Angstrom for determining if sites are too close.
177    DISTANCE_TOLERANCE = 0.5
178
179    @property
180    @abstractmethod
181    def sites(self):
182        """
183        Returns a tuple of sites.
184        """
185
186    @abstractmethod
187    def get_distance(self, i: int, j: int) -> float:
188        """
189        Returns distance between sites at index i and j.
190
191        Args:
192            i: Index of first site
193            j: Index of second site
194
195        Returns:
196            Distance between sites at index i and index j.
197        """
198
199    @property
200    def distance_matrix(self) -> np.ndarray:
201        """
202        Returns the distance matrix between all sites in the structure. For
203        periodic structures, this is overwritten to return the nearest image
204        distance.
205        """
206        return all_distances(self.cart_coords, self.cart_coords)
207
208    @property
209    def species(self) -> List[Composition]:
210        """
211        Only works for ordered structures.
212        Disordered structures will raise an AttributeError.
213
214        Returns:
215            ([Species]) List of species at each site of the structure.
216        """
217        return [site.specie for site in self]
218
219    @property
220    def species_and_occu(self) -> List[Composition]:
221        """
222        List of species and occupancies at each site of the structure.
223        """
224        return [site.species for site in self]
225
226    @property
227    def ntypesp(self) -> int:
228        """Number of types of atoms."""
229        return len(self.types_of_species)
230
231    @property
232    def types_of_species(self) -> Tuple[Union[Element, Species, DummySpecies]]:
233        """
234        List of types of specie.
235        """
236        # Cannot use set since we want a deterministic algorithm.
237        types = []  # type: List[Union[Element, Species, DummySpecies]]
238        for site in self:
239            for sp, v in site.species.items():
240                if v != 0:
241                    types.append(sp)
242        return tuple(sorted(set(types)))  # type: ignore
243
244    @property
245    def types_of_specie(self) -> Tuple[Union[Element, Species, DummySpecies]]:
246        """
247        Specie->Species rename. Maintained for backwards compatibility.
248        """
249        return self.types_of_species
250
251    def group_by_types(self) -> Iterator[Union[Site, PeriodicSite]]:
252        """Iterate over species grouped by type"""
253        for t in self.types_of_species:
254            for site in self:
255                if site.specie == t:
256                    yield site
257
258    def indices_from_symbol(self, symbol: str) -> Tuple[int, ...]:
259        """
260        Returns a tuple with the sequential indices of the sites
261        that contain an element with the given chemical symbol.
262        """
263        return tuple((i for i, specie in enumerate(self.species) if specie.symbol == symbol))
264
265    @property
266    def symbol_set(self) -> Tuple[str]:
267        """
268        Tuple with the set of chemical symbols.
269        Note that len(symbol_set) == len(types_of_specie)
270        """
271        return tuple(sorted(specie.symbol for specie in self.types_of_species))  # type: ignore
272
273    @property  # type: ignore
274    def atomic_numbers(self) -> Tuple[int]:
275        """List of atomic numbers."""
276        return tuple(site.specie.Z for site in self)  # type: ignore
277
278    @property
279    def site_properties(self) -> Dict[str, List]:
280        """
281        Returns the site properties as a dict of sequences. E.g.,
282        {"magmom": (5,-5), "charge": (-4,4)}.
283        """
284        props = {}  # type: Dict[str, List]
285        prop_keys = set()  # type: Set[str]
286        for site in self:
287            prop_keys.update(site.properties.keys())
288
289        for k in prop_keys:
290            props[k] = [site.properties.get(k, None) for site in self]
291        return props
292
293    def __contains__(self, site):
294        return site in self.sites
295
296    def __iter__(self):
297        return self.sites.__iter__()
298
299    def __getitem__(self, ind):
300        return self.sites[ind]
301
302    def __len__(self):
303        return len(self.sites)
304
305    def __hash__(self):
306        # for now, just use the composition hash code.
307        return self.composition.__hash__()
308
309    @property
310    def num_sites(self) -> int:
311        """
312        Number of sites.
313        """
314        return len(self)
315
316    @property
317    def cart_coords(self):
318        """
319        Returns a np.array of the cartesian coordinates of sites in the
320        structure.
321        """
322        return np.array([site.coords for site in self])
323
324    @property
325    def formula(self) -> str:
326        """
327        (str) Returns the formula.
328        """
329        return self.composition.formula
330
331    @property
332    def composition(self) -> Composition:
333        """
334        (Composition) Returns the composition
335        """
336        elmap = collections.defaultdict(float)  # type: Dict[Species, float]
337        for site in self:
338            for species, occu in site.species.items():
339                elmap[species] += occu
340        return Composition(elmap)
341
342    @property
343    def charge(self) -> float:
344        """
345        Returns the net charge of the structure based on oxidation states. If
346        Elements are found, a charge of 0 is assumed.
347        """
348        charge = 0
349        for site in self:
350            for specie, amt in site.species.items():
351                charge += getattr(specie, "oxi_state", 0) * amt
352        return charge
353
354    @property
355    def is_ordered(self) -> bool:
356        """
357        Checks if structure is ordered, meaning no partial occupancies in any
358        of the sites.
359        """
360        return all((site.is_ordered for site in self))
361
362    def get_angle(self, i: int, j: int, k: int) -> float:
363        """
364        Returns angle specified by three sites.
365
366        Args:
367            i: Index of first site.
368            j: Index of second site.
369            k: Index of third site.
370
371        Returns:
372            Angle in degrees.
373        """
374        v1 = self[i].coords - self[j].coords
375        v2 = self[k].coords - self[j].coords
376        return get_angle(v1, v2, units="degrees")
377
378    def get_dihedral(self, i: int, j: int, k: int, l: int) -> float:
379        """
380        Returns dihedral angle specified by four sites.
381
382        Args:
383            i: Index of first site
384            j: Index of second site
385            k: Index of third site
386            l: Index of fourth site
387
388        Returns:
389            Dihedral angle in degrees.
390        """
391        v1 = self[k].coords - self[l].coords
392        v2 = self[j].coords - self[k].coords
393        v3 = self[i].coords - self[j].coords
394        v23 = np.cross(v2, v3)
395        v12 = np.cross(v1, v2)
396        return math.degrees(math.atan2(np.linalg.norm(v2) * np.dot(v1, v23), np.dot(v12, v23)))
397
398    def is_valid(self, tol: float = DISTANCE_TOLERANCE) -> bool:
399        """
400        True if SiteCollection does not contain atoms that are too close
401        together. Note that the distance definition is based on type of
402        SiteCollection. Cartesian distances are used for non-periodic
403        Molecules, while PBC is taken into account for periodic structures.
404
405        Args:
406            tol (float): Distance tolerance. Default is 0.5A.
407
408        Returns:
409            (bool) True if SiteCollection does not contain atoms that are too
410            close together.
411        """
412        if len(self.sites) == 1:
413            return True
414        all_dists = self.distance_matrix[np.triu_indices(len(self), 1)]
415        return bool(np.min(all_dists) > tol)
416
417    @abstractmethod
418    def to(self, fmt: str = None, filename: str = None):
419        """
420        Generates well-known string representations of SiteCollections (e.g.,
421        molecules / structures). Should return a string type or write to a file.
422        """
423
424    @classmethod
425    @abstractmethod
426    def from_str(cls, input_string: str, fmt: Any):
427        """
428        Reads in SiteCollection from a string.
429        """
430
431    @classmethod
432    @abstractmethod
433    def from_file(cls, filename: str):
434        """
435        Reads in SiteCollection from a filename.
436        """
437
438    def add_site_property(self, property_name: str, values: List):
439        """
440        Adds a property to a site.
441
442        Args:
443            property_name (str): The name of the property to add.
444            values (list): A sequence of values. Must be same length as
445                number of sites.
446        """
447        if len(values) != len(self.sites):
448            raise ValueError("Values must be same length as sites.")
449        for site, val in zip(self.sites, values):
450            site.properties[property_name] = val
451
452    def remove_site_property(self, property_name: str):
453        """
454        Removes a property to a site.
455
456        Args:
457            property_name (str): The name of the property to remove.
458        """
459        for site in self.sites:
460            del site.properties[property_name]
461
462    def replace_species(self, species_mapping: Dict[SpeciesLike, SpeciesLike]):
463        """
464        Swap species.
465
466        Args:
467            species_mapping (dict): dict of species to swap. Species can be
468                elements too. E.g., {Element("Li"): Element("Na")} performs
469                a Li for Na substitution. The second species can be a
470                sp_and_occu dict. For example, a site with 0.5 Si that is
471                passed the mapping {Element('Si): {Element('Ge'):0.75,
472                Element('C'):0.25} } will have .375 Ge and .125 C.
473        """
474
475        sp_mapping = {get_el_sp(k): v for k, v in species_mapping.items()}
476        sp_to_replace = set(sp_mapping.keys())
477        sp_in_structure = set(self.composition.keys())
478        if not sp_in_structure.issuperset(sp_to_replace):
479            warnings.warn(
480                "Some species to be substituted are not present in "
481                "structure. Pls check your input. Species to be "
482                "substituted = %s; Species in structure = %s" % (sp_to_replace, sp_in_structure)
483            )
484
485        for site in self.sites:
486            if sp_to_replace.intersection(site.species):
487                c = Composition()
488                for sp, amt in site.species.items():
489                    new_sp = sp_mapping.get(sp, sp)
490                    try:
491                        c += Composition(new_sp) * amt
492                    except Exception:
493                        c += {new_sp: amt}
494                site.species = c
495
496    def add_oxidation_state_by_element(self, oxidation_states: Dict[str, float]):
497        """
498        Add oxidation states.
499
500        Args:
501            oxidation_states (dict): Dict of oxidation states.
502                E.g., {"Li":1, "Fe":2, "P":5, "O":-2}
503        """
504        try:
505            for site in self.sites:
506                new_sp = {}
507                for el, occu in site.species.items():
508                    sym = el.symbol
509                    new_sp[Species(sym, oxidation_states[sym])] = occu
510                site.species = Composition(new_sp)
511        except KeyError:
512            raise ValueError("Oxidation state of all elements must be " "specified in the dictionary.")
513
514    def add_oxidation_state_by_site(self, oxidation_states: List[float]):
515        """
516        Add oxidation states to a structure by site.
517
518        Args:
519            oxidation_states (list): List of oxidation states.
520                E.g., [1, 1, 1, 1, 2, 2, 2, 2, 5, 5, 5, 5, -2, -2, -2, -2]
521        """
522        if len(oxidation_states) != len(self.sites):
523            raise ValueError("Oxidation states of all sites must be " "specified.")
524        for site, ox in zip(self.sites, oxidation_states):
525            new_sp = {}
526            for el, occu in site.species.items():
527                sym = el.symbol
528                new_sp[Species(sym, ox)] = occu
529            site.species = Composition(new_sp)
530
531    def remove_oxidation_states(self):
532        """
533        Removes oxidation states from a structure.
534        """
535        for site in self.sites:
536            new_sp = collections.defaultdict(float)
537            for el, occu in site.species.items():
538                sym = el.symbol
539                new_sp[Element(sym)] += occu
540            site.species = Composition(new_sp)
541
542    def add_oxidation_state_by_guess(self, **kwargs):
543        """
544        Decorates the structure with oxidation state, guessing
545        using Composition.oxi_state_guesses()
546
547        Args:
548            **kwargs: parameters to pass into oxi_state_guesses()
549        """
550        oxid_guess = self.composition.oxi_state_guesses(**kwargs)
551        oxid_guess = oxid_guess or [{e.symbol: 0 for e in self.composition}]
552        self.add_oxidation_state_by_element(oxid_guess[0])
553
554    def add_spin_by_element(self, spins: Dict[str, float]):
555        """
556        Add spin states to a structure.
557
558        Args:
559            spins (dict): Dict of spins associated with elements or species,
560                e.g. {"Ni":+5} or {"Ni2+":5}
561        """
562        for site in self.sites:
563            new_sp = {}
564            for sp, occu in site.species.items():
565                sym = sp.symbol
566                oxi_state = getattr(sp, "oxi_state", None)
567                new_sp[
568                    Species(
569                        sym,
570                        oxidation_state=oxi_state,
571                        properties={"spin": spins.get(str(sp), spins.get(sym, None))},
572                    )
573                ] = occu
574            site.species = Composition(new_sp)
575
576    def add_spin_by_site(self, spins: List[float]):
577        """
578        Add spin states to a structure by site.
579
580        Args:
581            spins (list): List of spins
582                E.g., [+5, -5, 0, 0]
583        """
584        if len(spins) != len(self.sites):
585            raise ValueError("Spin of all sites must be " "specified in the dictionary.")
586
587        for site, spin in zip(self.sites, spins):
588            new_sp = {}
589            for sp, occu in site.species.items():
590                sym = sp.symbol
591                oxi_state = getattr(sp, "oxi_state", None)
592                new_sp[Species(sym, oxidation_state=oxi_state, properties={"spin": spin})] = occu
593            site.species = Composition(new_sp)
594
595    def remove_spin(self):
596        """
597        Removes spin states from a structure.
598        """
599        for site in self.sites:
600            new_sp = collections.defaultdict(float)
601            for sp, occu in site.species.items():
602                oxi_state = getattr(sp, "oxi_state", None)
603                new_sp[Species(sp.symbol, oxidation_state=oxi_state)] += occu
604            site.species = new_sp
605
606    def extract_cluster(self, target_sites: List[Site], **kwargs):
607        r"""
608        Extracts a cluster of atoms based on bond lengths
609
610        Args:
611            target_sites ([Site]): List of initial sites to nucleate cluster.
612            **kwargs: kwargs passed through to CovalentBond.is_bonded.
613
614        Returns:
615            [Site/PeriodicSite] Cluster of atoms.
616        """
617        cluster = list(target_sites)
618        others = [site for site in self if site not in cluster]
619        size = 0
620        while len(cluster) > size:
621            size = len(cluster)
622            new_others = []
623            for site in others:
624                for site2 in cluster:
625                    if CovalentBond.is_bonded(site, site2, **kwargs):
626                        cluster.append(site)
627                        break
628                else:
629                    new_others.append(site)
630            others = new_others
631        return cluster
632
633
634class IStructure(SiteCollection, MSONable):
635    """
636    Basic immutable Structure object with periodicity. Essentially a sequence
637    of PeriodicSites having a common lattice. IStructure is made to be
638    (somewhat) immutable so that they can function as keys in a dict. To make
639    modifications, use the standard Structure object instead. Structure
640    extends Sequence and Hashable, which means that in many cases,
641    it can be used like any Python sequence. Iterating through a
642    structure is equivalent to going through the sites in sequence.
643    """
644
645    def __init__(
646        self,
647        lattice: Union[ArrayLike, Lattice],
648        species: Sequence[CompositionLike],
649        coords: Sequence[ArrayLike],
650        charge: float = None,
651        validate_proximity: bool = False,
652        to_unit_cell: bool = False,
653        coords_are_cartesian: bool = False,
654        site_properties: dict = None,
655    ):
656        """
657        Create a periodic structure.
658
659        Args:
660            lattice (Lattice/3x3 array): The lattice, either as a
661                :class:`pymatgen.core.lattice.Lattice` or
662                simply as any 2D array. Each row should correspond to a lattice
663                vector. E.g., [[10,0,0], [20,10,0], [0,0,30]] specifies a
664                lattice with lattice vectors [10,0,0], [20,10,0] and [0,0,30].
665            species ([Species]): Sequence of species on each site. Can take in
666                flexible input, including:
667
668                i.  A sequence of element / species specified either as string
669                    symbols, e.g. ["Li", "Fe2+", "P", ...] or atomic numbers,
670                    e.g., (3, 56, ...) or actual Element or Species objects.
671
672                ii. List of dict of elements/species and occupancies, e.g.,
673                    [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of
674                    disordered structures.
675            coords (Nx3 array): list of fractional/cartesian coordinates of
676                each species.
677            charge (int): overall charge of the structure. Defaults to behavior
678                in SiteCollection where total charge is the sum of the oxidation
679                states.
680            validate_proximity (bool): Whether to check if there are sites
681                that are less than 0.01 Ang apart. Defaults to False.
682            to_unit_cell (bool): Whether to map all sites into the unit cell,
683                i.e., fractional coords between 0 and 1. Defaults to False.
684            coords_are_cartesian (bool): Set to True if you are providing
685                coordinates in cartesian coordinates. Defaults to False.
686            site_properties (dict): Properties associated with the sites as a
687                dict of sequences, e.g., {"magmom":[5,5,5,5]}. The sequences
688                have to be the same length as the atomic species and
689                fractional_coords. Defaults to None for no properties.
690        """
691        if len(species) != len(coords):
692            raise StructureError(
693                "The list of atomic species must be of the" " same length as the list of fractional" " coordinates."
694            )
695
696        if isinstance(lattice, Lattice):
697            self._lattice = lattice
698        else:
699            self._lattice = Lattice(lattice)
700
701        sites = []
702        for i, sp in enumerate(species):
703            prop = None
704            if site_properties:
705                prop = {k: v[i] for k, v in site_properties.items()}
706
707            sites.append(
708                PeriodicSite(
709                    sp,
710                    coords[i],
711                    self._lattice,
712                    to_unit_cell,
713                    coords_are_cartesian=coords_are_cartesian,
714                    properties=prop,
715                )
716            )
717        self._sites: Tuple[PeriodicSite, ...] = tuple(sites)
718        if validate_proximity and not self.is_valid():
719            raise StructureError(("Structure contains sites that are ", "less than 0.01 Angstrom apart!"))
720        self._charge = charge
721
722    @classmethod
723    def from_sites(
724        cls,
725        sites: List[PeriodicSite],
726        charge: float = None,
727        validate_proximity: bool = False,
728        to_unit_cell: bool = False,
729    ):
730        """
731        Convenience constructor to make a Structure from a list of sites.
732
733        Args:
734            sites: Sequence of PeriodicSites. Sites must have the same
735                lattice.
736            charge: Charge of structure.
737            validate_proximity (bool): Whether to check if there are sites
738                that are less than 0.01 Ang apart. Defaults to False.
739            to_unit_cell (bool): Whether to translate sites into the unit
740                cell.
741
742        Returns:
743            (Structure) Note that missing properties are set as None.
744        """
745        if len(sites) < 1:
746            raise ValueError("You need at least one site to construct a %s" % cls)
747        prop_keys = []  # type: List[str]
748        props = {}
749        lattice = sites[0].lattice
750        for i, site in enumerate(sites):
751            if site.lattice != lattice:
752                raise ValueError("Sites must belong to the same lattice")
753            for k, v in site.properties.items():
754                if k not in prop_keys:
755                    prop_keys.append(k)
756                    props[k] = [None] * len(sites)
757                props[k][i] = v
758        for k, v in props.items():
759            if any((vv is None for vv in v)):
760                warnings.warn("Not all sites have property %s. Missing values " "are set to None." % k)
761        return cls(
762            lattice,
763            [site.species for site in sites],
764            [site.frac_coords for site in sites],
765            charge=charge,
766            site_properties=props,
767            validate_proximity=validate_proximity,
768            to_unit_cell=to_unit_cell,
769        )
770
771    @classmethod
772    def from_spacegroup(
773        cls,
774        sg: str,
775        lattice: Union[List, np.ndarray, Lattice],
776        species: Sequence[Union[str, Element, Species, DummySpecies, Composition]],
777        coords: Sequence[Sequence[float]],
778        site_properties: Dict[str, Sequence] = None,
779        coords_are_cartesian: bool = False,
780        tol: float = 1e-5,
781    ) -> Union["IStructure", "Structure"]:
782        """
783        Generate a structure using a spacegroup. Note that only symmetrically
784        distinct species and coords should be provided. All equivalent sites
785        are generated from the spacegroup operations.
786
787        Args:
788            sg (str/int): The spacegroup. If a string, it will be interpreted
789                as one of the notations supported by
790                pymatgen.symmetry.groups.Spacegroup. E.g., "R-3c" or "Fm-3m".
791                If an int, it will be interpreted as an international number.
792            lattice (Lattice/3x3 array): The lattice, either as a
793                :class:`pymatgen.core.lattice.Lattice` or
794                simply as any 2D array. Each row should correspond to a lattice
795                vector. E.g., [[10,0,0], [20,10,0], [0,0,30]] specifies a
796                lattice with lattice vectors [10,0,0], [20,10,0] and [0,0,30].
797                Note that no attempt is made to check that the lattice is
798                compatible with the spacegroup specified. This may be
799                introduced in a future version.
800            species ([Species]): Sequence of species on each site. Can take in
801                flexible input, including:
802
803                i.  A sequence of element / species specified either as string
804                    symbols, e.g. ["Li", "Fe2+", "P", ...] or atomic numbers,
805                    e.g., (3, 56, ...) or actual Element or Species objects.
806
807                ii. List of dict of elements/species and occupancies, e.g.,
808                    [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of
809                    disordered structures.
810            coords (Nx3 array): list of fractional/cartesian coordinates of
811                each species.
812            coords_are_cartesian (bool): Set to True if you are providing
813                coordinates in cartesian coordinates. Defaults to False.
814            site_properties (dict): Properties associated with the sites as a
815                dict of sequences, e.g., {"magmom":[5,5,5,5]}. The sequences
816                have to be the same length as the atomic species and
817                fractional_coords. Defaults to None for no properties.
818            tol (float): A fractional tolerance to deal with numerical
819               precision issues in determining if orbits are the same.
820        """
821        from pymatgen.symmetry.groups import SpaceGroup
822
823        try:
824            i = int(sg)
825            sgp = SpaceGroup.from_int_number(i)
826        except ValueError:
827            sgp = SpaceGroup(sg)
828
829        if isinstance(lattice, Lattice):
830            latt = lattice
831        else:
832            latt = Lattice(lattice)
833
834        if not sgp.is_compatible(latt):
835            raise ValueError(
836                "Supplied lattice with parameters %s is incompatible with "
837                "supplied spacegroup %s!" % (latt.parameters, sgp.symbol)
838            )
839
840        if len(species) != len(coords):
841            raise ValueError(
842                "Supplied species and coords lengths (%d vs %d) are " "different!" % (len(species), len(coords))
843            )
844
845        frac_coords = (
846            np.array(coords, dtype=np.float_) if not coords_are_cartesian else latt.get_fractional_coords(coords)
847        )
848
849        props = {} if site_properties is None else site_properties
850
851        all_sp = []  # type: List[Union[str, Element, Species, DummySpecies, Composition]]
852        all_coords = []  # type: List[List[float]]
853        all_site_properties = collections.defaultdict(list)  # type: Dict[str, List]
854        for i, (sp, c) in enumerate(zip(species, frac_coords)):
855            cc = sgp.get_orbit(c, tol=tol)
856            all_sp.extend([sp] * len(cc))
857            all_coords.extend(cc)
858            for k, v in props.items():
859                all_site_properties[k].extend([v[i]] * len(cc))
860
861        return cls(latt, all_sp, all_coords, site_properties=all_site_properties)
862
863    @classmethod
864    def from_magnetic_spacegroup(
865        cls,
866        msg: Union[str, "MagneticSpaceGroup"],  # type: ignore  # noqa: F821
867        lattice: Union[List, np.ndarray, Lattice],
868        species: Sequence[Union[str, Element, Species, DummySpecies, Composition]],
869        coords: Sequence[Sequence[float]],
870        site_properties: Dict[str, Sequence],
871        coords_are_cartesian: bool = False,
872        tol: float = 1e-5,
873    ) -> Union["IStructure", "Structure"]:
874        """
875        Generate a structure using a magnetic spacegroup. Note that only
876        symmetrically distinct species, coords and magmoms should be provided.]
877        All equivalent sites are generated from the spacegroup operations.
878
879        Args:
880            msg (str/list/:class:`pymatgen.symmetry.maggroups.MagneticSpaceGroup`):
881                The magnetic spacegroup.
882                If a string, it will be interpreted as one of the notations
883                supported by MagneticSymmetryGroup, e.g., "R-3'c" or "Fm'-3'm".
884                If a list of two ints, it will be interpreted as the number of
885                the spacegroup in its Belov, Neronova and Smirnova (BNS) setting.
886            lattice (Lattice/3x3 array): The lattice, either as a
887                :class:`pymatgen.core.lattice.Lattice` or
888                simply as any 2D array. Each row should correspond to a lattice
889                vector. E.g., [[10,0,0], [20,10,0], [0,0,30]] specifies a
890                lattice with lattice vectors [10,0,0], [20,10,0] and [0,0,30].
891                Note that no attempt is made to check that the lattice is
892                compatible with the spacegroup specified. This may be
893                introduced in a future version.
894            species ([Species]): Sequence of species on each site. Can take in
895                flexible input, including:
896                i.  A sequence of element / species specified either as string
897                symbols, e.g. ["Li", "Fe2+", "P", ...] or atomic numbers,
898                e.g., (3, 56, ...) or actual Element or Species objects.
899
900                ii. List of dict of elements/species and occupancies, e.g.,
901                    [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of
902                    disordered structures.
903            coords (Nx3 array): list of fractional/cartesian coordinates of
904                each species.
905            site_properties (dict): Properties associated with the sites as a
906                dict of sequences, e.g., {"magmom":[5,5,5,5]}. The sequences
907                have to be the same length as the atomic species and
908                fractional_coords. Unlike Structure.from_spacegroup(),
909                this argument is mandatory, since magnetic moment information
910                has to be included. Note that the *direction* of the supplied
911                magnetic moment relative to the crystal is important, even if
912                the resulting structure is used for collinear calculations.
913            coords_are_cartesian (bool): Set to True if you are providing
914                coordinates in cartesian coordinates. Defaults to False.
915            tol (float): A fractional tolerance to deal with numerical
916                precision issues in determining if orbits are the same.
917        """
918        from pymatgen.electronic_structure.core import Magmom
919        from pymatgen.symmetry.maggroups import MagneticSpaceGroup
920
921        if "magmom" not in site_properties:
922            raise ValueError("Magnetic moments have to be defined.")
923
924        magmoms = [Magmom(m) for m in site_properties["magmom"]]
925
926        if not isinstance(msg, MagneticSpaceGroup):
927            msg = MagneticSpaceGroup(msg)  # type: ignore
928
929        if isinstance(lattice, Lattice):
930            latt = lattice
931        else:
932            latt = Lattice(lattice)
933
934        if not msg.is_compatible(latt):
935            raise ValueError(
936                "Supplied lattice with parameters %s is incompatible with "
937                "supplied spacegroup %s!" % (latt.parameters, msg.sg_symbol)
938            )
939
940        if len(species) != len(coords):
941            raise ValueError(
942                "Supplied species and coords lengths (%d vs %d) are " "different!" % (len(species), len(coords))
943            )
944
945        if len(species) != len(magmoms):
946            raise ValueError(
947                "Supplied species and magmom lengths (%d vs %d) are " "different!" % (len(species), len(magmoms))
948            )
949
950        frac_coords = coords if not coords_are_cartesian else latt.get_fractional_coords(coords)
951
952        all_sp = []  # type: List[Union[str, Element, Species, DummySpecies, Composition]]
953        all_coords = []  # type: List[List[float]]
954        all_magmoms = []  # type: List[float]
955        all_site_properties = collections.defaultdict(list)  # type: Dict[str, List]
956        for i, (sp, c, m) in enumerate(zip(species, frac_coords, magmoms)):  # type: ignore
957            cc, mm = msg.get_orbit(c, m, tol=tol)
958            all_sp.extend([sp] * len(cc))
959            all_coords.extend(cc)
960            all_magmoms.extend(mm)
961            for k, v in site_properties.items():
962                if k != "magmom":
963                    all_site_properties[k].extend([v[i]] * len(cc))
964
965        all_site_properties["magmom"] = all_magmoms
966
967        return cls(latt, all_sp, all_coords, site_properties=all_site_properties)
968
969    @property
970    def charge(self) -> float:
971        """
972        Overall charge of the structure
973        """
974        if self._charge is None:
975            return super().charge
976        return self._charge
977
978    @property
979    def distance_matrix(self) -> np.ndarray:
980        """
981        Returns the distance matrix between all sites in the structure. For
982        periodic structures, this should return the nearest image distance.
983        """
984        return self.lattice.get_all_distances(self.frac_coords, self.frac_coords)
985
986    @property
987    def sites(self) -> Tuple[PeriodicSite, ...]:
988        """
989        Returns an iterator for the sites in the Structure.
990        """
991        return self._sites
992
993    @property
994    def lattice(self) -> Lattice:
995        """
996        Lattice of the structure.
997        """
998        return self._lattice
999
1000    @property
1001    def density(self) -> float:
1002        """
1003        Returns the density in units of g/cc
1004        """
1005        m = Mass(self.composition.weight, "amu")
1006        return m.to("g") / (self.volume * Length(1, "ang").to("cm") ** 3)
1007
1008    def get_space_group_info(self, symprec=1e-2, angle_tolerance=5.0) -> Tuple[str, int]:
1009        """
1010        Convenience method to quickly get the spacegroup of a structure.
1011
1012        Args:
1013            symprec (float): Same definition as in SpacegroupAnalyzer.
1014                Defaults to 1e-2.
1015            angle_tolerance (float): Same definition as in SpacegroupAnalyzer.
1016                Defaults to 5 degrees.
1017
1018        Returns:
1019            spacegroup_symbol, international_number
1020        """
1021        # Import within method needed to avoid cyclic dependency.
1022        from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
1023
1024        a = SpacegroupAnalyzer(self, symprec=symprec, angle_tolerance=angle_tolerance)
1025        return a.get_space_group_symbol(), a.get_space_group_number()
1026
1027    def matches(self, other, anonymous=False, **kwargs):
1028        """
1029        Check whether this structure is similar to another structure.
1030        Basically a convenience method to call structure matching.
1031
1032        Args:
1033            other (IStructure/Structure): Another structure.
1034            **kwargs: Same **kwargs as in
1035                :class:`pymatgen.analysis.structure_matcher.StructureMatcher`.
1036
1037        Returns:
1038            (bool) True is the structures are similar under some affine
1039            transformation.
1040        """
1041        from pymatgen.analysis.structure_matcher import StructureMatcher
1042
1043        m = StructureMatcher(**kwargs)
1044        if not anonymous:
1045            return m.fit(self, other)
1046        return m.fit_anonymous(self, other)
1047
1048    def __eq__(self, other):
1049        if other is self:
1050            return True
1051        if other is None:
1052            return False
1053        if len(self) != len(other):
1054            return False
1055        if self.lattice != other.lattice:
1056            return False
1057        for site in self:
1058            if site not in other:
1059                return False
1060        return True
1061
1062    def __ne__(self, other):
1063        return not self.__eq__(other)
1064
1065    def __hash__(self):
1066        # For now, just use the composition hash code.
1067        return self.composition.__hash__()
1068
1069    def __mul__(self, scaling_matrix):
1070        """
1071        Makes a supercell. Allowing to have sites outside the unit cell
1072
1073        Args:
1074            scaling_matrix: A scaling matrix for transforming the lattice
1075                vectors. Has to be all integers. Several options are possible:
1076
1077                a. A full 3x3 scaling matrix defining the linear combination
1078                   the old lattice vectors. E.g., [[2,1,0],[0,3,0],[0,0,
1079                   1]] generates a new structure with lattice vectors a' =
1080                   2a + b, b' = 3b, c' = c where a, b, and c are the lattice
1081                   vectors of the original structure.
1082                b. An sequence of three scaling factors. E.g., [2, 1, 1]
1083                   specifies that the supercell should have dimensions 2a x b x
1084                   c.
1085                c. A number, which simply scales all lattice vectors by the
1086                   same factor.
1087
1088        Returns:
1089            Supercell structure. Note that a Structure is always returned,
1090            even if the input structure is a subclass of Structure. This is
1091            to avoid different arguments signatures from causing problems. If
1092            you prefer a subclass to return its own type, you need to override
1093            this method in the subclass.
1094        """
1095        scale_matrix = np.array(scaling_matrix, np.int16)
1096        if scale_matrix.shape != (3, 3):
1097            scale_matrix = np.array(scale_matrix * np.eye(3), np.int16)
1098        new_lattice = Lattice(np.dot(scale_matrix, self._lattice.matrix))
1099
1100        f_lat = lattice_points_in_supercell(scale_matrix)
1101        c_lat = new_lattice.get_cartesian_coords(f_lat)
1102
1103        new_sites = []
1104        for site in self:
1105            for v in c_lat:
1106                s = PeriodicSite(
1107                    site.species,
1108                    site.coords + v,
1109                    new_lattice,
1110                    properties=site.properties,
1111                    coords_are_cartesian=True,
1112                    to_unit_cell=False,
1113                    skip_checks=True,
1114                )
1115                new_sites.append(s)
1116
1117        new_charge = self._charge * np.linalg.det(scale_matrix) if self._charge else None
1118        return Structure.from_sites(new_sites, charge=new_charge)
1119
1120    def __rmul__(self, scaling_matrix):
1121        """
1122        Similar to __mul__ to preserve commutativeness.
1123        """
1124        return self.__mul__(scaling_matrix)
1125
1126    @property
1127    def frac_coords(self):
1128        """
1129        Fractional coordinates as a Nx3 numpy array.
1130        """
1131        return np.array([site.frac_coords for site in self._sites])
1132
1133    @property
1134    def volume(self):
1135        """
1136        Returns the volume of the structure.
1137        """
1138        return self._lattice.volume
1139
1140    def get_distance(self, i: int, j: int, jimage=None) -> float:
1141        """
1142        Get distance between site i and j assuming periodic boundary
1143        conditions. If the index jimage of two sites atom j is not specified it
1144        selects the jimage nearest to the i atom and returns the distance and
1145        jimage indices in terms of lattice vector translations if the index
1146        jimage of atom j is specified it returns the distance between the i
1147        atom and the specified jimage atom.
1148
1149        Args:
1150            i (int): Index of first site
1151            j (int): Index of second site
1152            jimage: Number of lattice translations in each lattice direction.
1153                Default is None for nearest image.
1154
1155        Returns:
1156            distance
1157        """
1158        return self[i].distance(self[j], jimage)
1159
1160    def get_sites_in_sphere(
1161        self,
1162        pt: ArrayLike,
1163        r: float,
1164        include_index: bool = False,
1165        include_image: bool = False,
1166    ) -> List[PeriodicNeighbor]:
1167        """
1168        Find all sites within a sphere from the point, including a site (if any)
1169        sitting on the point itself. This includes sites in other periodic
1170        images.
1171
1172        Algorithm:
1173
1174        1. place sphere of radius r in crystal and determine minimum supercell
1175           (parallelpiped) which would contain a sphere of radius r. for this
1176           we need the projection of a_1 on a unit vector perpendicular
1177           to a_2 & a_3 (i.e. the unit vector in the direction b_1) to
1178           determine how many a_1"s it will take to contain the sphere.
1179
1180           Nxmax = r * length_of_b_1 / (2 Pi)
1181
1182        2. keep points falling within r.
1183
1184        Args:
1185            pt (3x1 array): cartesian coordinates of center of sphere.
1186            r (float): Radius of sphere.
1187            include_index (bool): Whether the non-supercell site index
1188                is included in the returned data
1189            include_image (bool): Whether to include the supercell image
1190                is included in the returned data
1191
1192        Returns:
1193            [:class:`pymatgen.core.structure.PeriodicNeighbor`]
1194        """
1195        site_fcoords = np.mod(self.frac_coords, 1)
1196        neighbors = []  # type: List[PeriodicNeighbor]
1197        for fcoord, dist, i, img in self._lattice.get_points_in_sphere(site_fcoords, pt, r):
1198            nnsite = PeriodicNeighbor(
1199                self[i].species,
1200                fcoord,
1201                self._lattice,
1202                properties=self[i].properties,
1203                nn_distance=dist,
1204                image=img,  # type: ignore
1205                index=i,
1206            )
1207            neighbors.append(nnsite)
1208        return neighbors
1209
1210    def get_neighbors(
1211        self,
1212        site: PeriodicSite,
1213        r: float,
1214        include_index: bool = False,
1215        include_image: bool = False,
1216    ) -> List[PeriodicNeighbor]:
1217        """
1218        Get all neighbors to a site within a sphere of radius r.  Excludes the
1219        site itself.
1220
1221        Args:
1222            site (Site): Which is the center of the sphere.
1223            r (float): Radius of sphere.
1224            include_index (bool): Deprecated. Now, the non-supercell site index
1225                is always included in the returned data.
1226            include_image (bool): Deprecated. Now the supercell image
1227                is always included in the returned data.
1228
1229        Returns:
1230            [:class:`pymatgen.core.structure.PeriodicNeighbor`]
1231        """
1232        return self.get_all_neighbors(r, include_index=include_index, include_image=include_image, sites=[site])[0]
1233
1234    @deprecated(get_neighbors, "This is retained purely for checking purposes.")
1235    def get_neighbors_old(self, site, r, include_index=False, include_image=False):
1236        """
1237        Get all neighbors to a site within a sphere of radius r.  Excludes the
1238        site itself.
1239
1240        Args:
1241            site (Site): Which is the center of the sphere.
1242            r (float): Radius of sphere.
1243            include_index (bool): Whether the non-supercell site index
1244                is included in the returned data
1245            include_image (bool): Whether to include the supercell image
1246                is included in the returned data
1247
1248        Returns:
1249            [:class:`pymatgen.core.structure.PeriodicNeighbor`]
1250        """
1251        nn = self.get_sites_in_sphere(site.coords, r, include_index=include_index, include_image=include_image)
1252        return [d for d in nn if site != d[0]]
1253
1254    def _get_neighbor_list_py(
1255        self,
1256        r: float,
1257        sites: List[PeriodicSite] = None,
1258        numerical_tol: float = 1e-8,
1259        exclude_self: bool = True,
1260    ) -> Tuple[np.ndarray, ...]:
1261        """
1262        A python version of getting neighbor_list. The returned values are a tuple of
1263        numpy arrays (center_indices, points_indices, offset_vectors, distances).
1264        Atom `center_indices[i]` has neighbor atom `points_indices[i]` that is
1265        translated by `offset_vectors[i]` lattice vectors, and the distance is
1266        `distances[i]`.
1267
1268        Args:
1269            r (float): Radius of sphere
1270            sites (list of Sites or None): sites for getting all neighbors,
1271                default is None, which means neighbors will be obtained for all
1272                sites. This is useful in the situation where you are interested
1273                only in one subspecies type, and makes it a lot faster.
1274            numerical_tol (float): This is a numerical tolerance for distances.
1275                Sites which are < numerical_tol are determined to be conincident
1276                with the site. Sites which are r + numerical_tol away is deemed
1277                to be within r from the site. The default of 1e-8 should be
1278                ok in most instances.
1279            exclude_self (bool): whether to exclude atom neighboring with itself within
1280                numerical tolerance distance, default to True
1281        Returns: (center_indices, points_indices, offset_vectors, distances)
1282        """
1283        neighbors = self.get_all_neighbors_py(
1284            r=r, include_index=True, include_image=True, sites=sites, numerical_tol=1e-8
1285        )
1286        center_indices = []
1287        points_indices = []
1288        offsets = []
1289        distances = []
1290        for i, nns in enumerate(neighbors):
1291            if len(nns) > 0:
1292                for n in nns:
1293                    if exclude_self and (i == n.index) and (n.nn_distance <= numerical_tol):
1294                        continue
1295                    center_indices.append(i)
1296                    points_indices.append(n.index)
1297                    offsets.append(n.image)
1298                    distances.append(n.nn_distance)
1299        return tuple(
1300            (
1301                np.array(center_indices),
1302                np.array(points_indices),
1303                np.array(offsets),
1304                np.array(distances),
1305            )
1306        )
1307
1308    def get_neighbor_list(
1309        self,
1310        r: float,
1311        sites: Sequence[PeriodicSite] = None,
1312        numerical_tol: float = 1e-8,
1313        exclude_self: bool = True,
1314    ) -> Tuple[np.ndarray, ...]:
1315        """
1316        Get neighbor lists using numpy array representations without constructing
1317        Neighbor objects. If the cython extension is installed, this method will
1318        be orders of magnitude faster than `get_all_neighbors_old` and 2-3x faster
1319        than `get_all_neighbors`.
1320        The returned values are a tuple of numpy arrays
1321        (center_indices, points_indices, offset_vectors, distances).
1322        Atom `center_indices[i]` has neighbor atom `points_indices[i]` that is
1323        translated by `offset_vectors[i]` lattice vectors, and the distance is
1324        `distances[i]`.
1325
1326        Args:
1327            r (float): Radius of sphere
1328            sites (list of Sites or None): sites for getting all neighbors,
1329                default is None, which means neighbors will be obtained for all
1330                sites. This is useful in the situation where you are interested
1331                only in one subspecies type, and makes it a lot faster.
1332            numerical_tol (float): This is a numerical tolerance for distances.
1333                Sites which are < numerical_tol are determined to be conincident
1334                with the site. Sites which are r + numerical_tol away is deemed
1335                to be within r from the site. The default of 1e-8 should be
1336                ok in most instances.
1337            exclude_self (bool): whether to exclude atom neighboring with itself within
1338                numerical tolerance distance, default to True
1339        Returns: (center_indices, points_indices, offset_vectors, distances)
1340
1341        """
1342        try:
1343            from pymatgen.optimization.neighbors import (
1344                find_points_in_spheres,  # type: ignore
1345            )
1346        except ImportError:
1347            return self._get_neighbor_list_py(r, sites, exclude_self=exclude_self)  # type: ignore
1348        else:
1349            if sites is None:
1350                sites = self.sites
1351            site_coords = np.array([site.coords for site in sites], dtype=float)
1352            cart_coords = np.ascontiguousarray(np.array(self.cart_coords), dtype=float)
1353            lattice_matrix = np.ascontiguousarray(np.array(self.lattice.matrix), dtype=float)
1354            r = float(r)
1355            center_indices, points_indices, images, distances = find_points_in_spheres(
1356                cart_coords,
1357                site_coords,
1358                r=r,
1359                pbc=np.array([1, 1, 1], dtype=int),
1360                lattice=lattice_matrix,
1361                tol=numerical_tol,
1362            )
1363            cond = np.array([True] * len(center_indices))
1364            if exclude_self:
1365                self_pair = (center_indices == points_indices) & (distances <= numerical_tol)
1366                cond = ~self_pair
1367            return tuple(
1368                (
1369                    center_indices[cond],
1370                    points_indices[cond],
1371                    images[cond],
1372                    distances[cond],
1373                )
1374            )
1375
1376    def get_all_neighbors(
1377        self,
1378        r: float,
1379        include_index: bool = False,
1380        include_image: bool = False,
1381        sites: Sequence[PeriodicSite] = None,
1382        numerical_tol: float = 1e-8,
1383    ) -> List[List[PeriodicNeighbor]]:
1384        """
1385        Get neighbors for each atom in the unit cell, out to a distance r
1386        Returns a list of list of neighbors for each site in structure.
1387        Use this method if you are planning on looping over all sites in the
1388        crystal. If you only want neighbors for a particular site, use the
1389        method get_neighbors as it may not have to build such a large supercell
1390        However if you are looping over all sites in the crystal, this method
1391        is more efficient since it only performs one pass over a large enough
1392        supercell to contain all possible atoms out to a distance r.
1393        The return type is a [(site, dist) ...] since most of the time,
1394        subsequent processing requires the distance.
1395
1396        A note about periodic images: Before computing the neighbors, this
1397        operation translates all atoms to within the unit cell (having
1398        fractional coordinates within [0,1)). This means that the "image" of a
1399        site does not correspond to how much it has been translates from its
1400        current position, but which image of the unit cell it resides.
1401
1402        Args:
1403            r (float): Radius of sphere.
1404            include_index (bool): Deprecated. Now, the non-supercell site index
1405                is always included in the returned data.
1406            include_image (bool): Deprecated. Now the supercell image
1407                is always included in the returned data.
1408            sites (list of Sites or None): sites for getting all neighbors,
1409                default is None, which means neighbors will be obtained for all
1410                sites. This is useful in the situation where you are interested
1411                only in one subspecies type, and makes it a lot faster.
1412            numerical_tol (float): This is a numerical tolerance for distances.
1413                Sites which are < numerical_tol are determined to be conincident
1414                with the site. Sites which are r + numerical_tol away is deemed
1415                to be within r from the site. The default of 1e-8 should be
1416                ok in most instances.
1417
1418        Returns:
1419            [[:class:`pymatgen.core.structure.PeriodicNeighbor`], ..]
1420        """
1421        if sites is None:
1422            sites = self.sites
1423        center_indices, points_indices, images, distances = self.get_neighbor_list(
1424            r=r, sites=sites, numerical_tol=numerical_tol
1425        )
1426        if len(points_indices) < 1:
1427            return [[]] * len(sites)
1428        f_coords = self.frac_coords[points_indices] + images
1429        neighbor_dict: Dict[int, List] = collections.defaultdict(list)
1430        lattice = self.lattice
1431        atol = Site.position_atol
1432        all_sites = self.sites
1433        for cindex, pindex, image, f_coord, d in zip(center_indices, points_indices, images, f_coords, distances):
1434            psite = all_sites[pindex]
1435            csite = sites[cindex]
1436            if (
1437                d > numerical_tol
1438                or
1439                # This simply compares the psite and csite. The reason why manual comparison is done is
1440                # for speed. This does not check the lattice since they are always equal. Also, the or construct
1441                # returns True immediately once one of the conditions are satisfied.
1442                psite.species != csite.species
1443                or (not np.allclose(psite.coords, csite.coords, atol=atol))
1444                or (not psite.properties == csite.properties)
1445            ):
1446                neighbor_dict[cindex].append(
1447                    PeriodicNeighbor(
1448                        species=psite.species,
1449                        coords=f_coord,
1450                        lattice=lattice,
1451                        properties=psite.properties,
1452                        nn_distance=d,
1453                        index=pindex,
1454                        image=tuple(image),
1455                    )
1456                )
1457
1458        neighbors: List[List[PeriodicNeighbor]] = []
1459
1460        for i in range(len(sites)):
1461            neighbors.append(neighbor_dict[i])
1462        return neighbors
1463
1464    def get_all_neighbors_py(
1465        self,
1466        r: float,
1467        include_index: bool = False,
1468        include_image: bool = False,
1469        sites: Sequence[PeriodicSite] = None,
1470        numerical_tol: float = 1e-8,
1471    ) -> List[List[PeriodicNeighbor]]:
1472        """
1473        Get neighbors for each atom in the unit cell, out to a distance r
1474        Returns a list of list of neighbors for each site in structure.
1475        Use this method if you are planning on looping over all sites in the
1476        crystal. If you only want neighbors for a particular site, use the
1477        method get_neighbors as it may not have to build such a large supercell
1478        However if you are looping over all sites in the crystal, this method
1479        is more efficient since it only performs one pass over a large enough
1480        supercell to contain all possible atoms out to a distance r.
1481        The return type is a [(site, dist) ...] since most of the time,
1482        subsequent processing requires the distance.
1483
1484        A note about periodic images: Before computing the neighbors, this
1485        operation translates all atoms to within the unit cell (having
1486        fractional coordinates within [0,1)). This means that the "image" of a
1487        site does not correspond to how much it has been translates from its
1488        current position, but which image of the unit cell it resides.
1489
1490        Args:
1491            r (float): Radius of sphere.
1492            include_index (bool): Deprecated. Now, the non-supercell site index
1493                is always included in the returned data.
1494            include_image (bool): Deprecated. Now the supercell image
1495                is always included in the returned data.
1496            sites (list of Sites or None): sites for getting all neighbors,
1497                default is None, which means neighbors will be obtained for all
1498                sites. This is useful in the situation where you are interested
1499                only in one subspecies type, and makes it a lot faster.
1500            numerical_tol (float): This is a numerical tolerance for distances.
1501                Sites which are < numerical_tol are determined to be conincident
1502                with the site. Sites which are r + numerical_tol away is deemed
1503                to be within r from the site. The default of 1e-8 should be
1504                ok in most instances.
1505
1506        Returns:
1507            [[:class:`pymatgen.core.structure.PeriodicNeighbor`],...]
1508        """
1509
1510        if sites is None:
1511            sites = self.sites
1512        site_coords = np.array([site.coords for site in sites])
1513        point_neighbors = get_points_in_spheres(
1514            self.cart_coords,
1515            site_coords,
1516            r=r,
1517            pbc=True,
1518            numerical_tol=numerical_tol,
1519            lattice=self.lattice,
1520        )
1521        neighbors: List[List[PeriodicNeighbor]] = []
1522        for point_neighbor, site in zip(point_neighbors, sites):
1523            nns: List[PeriodicNeighbor] = []
1524            if len(point_neighbor) < 1:
1525                neighbors.append([])
1526                continue
1527            for n in point_neighbor:
1528                coord, d, index, image = n
1529                if (d > numerical_tol) or (self[index] != site):
1530                    neighbor = PeriodicNeighbor(
1531                        species=self[index].species,
1532                        coords=coord,
1533                        lattice=self.lattice,
1534                        properties=self[index].properties,
1535                        nn_distance=d,
1536                        index=index,
1537                        image=tuple(image),
1538                    )
1539                    nns.append(neighbor)
1540            neighbors.append(nns)
1541        return neighbors
1542
1543    @deprecated(get_all_neighbors, "This is retained purely for checking purposes.")
1544    def get_all_neighbors_old(self, r, include_index=False, include_image=False, include_site=True):
1545        """
1546        Get neighbors for each atom in the unit cell, out to a distance r
1547        Returns a list of list of neighbors for each site in structure.
1548        Use this method if you are planning on looping over all sites in the
1549        crystal. If you only want neighbors for a particular site, use the
1550        method get_neighbors as it may not have to build such a large supercell
1551        However if you are looping over all sites in the crystal, this method
1552        is more efficient since it only performs one pass over a large enough
1553        supercell to contain all possible atoms out to a distance r.
1554        The return type is a [(site, dist) ...] since most of the time,
1555        subsequent processing requires the distance.
1556
1557        A note about periodic images: Before computing the neighbors, this
1558        operation translates all atoms to within the unit cell (having
1559        fractional coordinates within [0,1)). This means that the "image" of a
1560        site does not correspond to how much it has been translates from its
1561        current position, but which image of the unit cell it resides.
1562
1563        Args:
1564            r (float): Radius of sphere.
1565            include_index (bool): Whether to include the non-supercell site
1566                in the returned data
1567            include_image (bool): Whether to include the supercell image
1568                in the returned data
1569            include_site (bool): Whether to include the site in the returned
1570                data. Defaults to True.
1571
1572        Returns:
1573            [:class:`pymatgen.core.structure.PeriodicNeighbor`]
1574        """
1575        # Use same algorithm as get_sites_in_sphere to determine supercell but
1576        # loop over all atoms in crystal
1577        recp_len = np.array(self.lattice.reciprocal_lattice.abc)
1578        maxr = np.ceil((r + 0.15) * recp_len / (2 * math.pi))
1579        nmin = np.floor(np.min(self.frac_coords, axis=0)) - maxr
1580        nmax = np.ceil(np.max(self.frac_coords, axis=0)) + maxr
1581
1582        all_ranges = [np.arange(x, y) for x, y in zip(nmin, nmax)]
1583        latt = self._lattice
1584        matrix = latt.matrix
1585        neighbors = [[] for _ in range(len(self._sites))]
1586        all_fcoords = np.mod(self.frac_coords, 1)
1587        coords_in_cell = np.dot(all_fcoords, matrix)
1588        site_coords = self.cart_coords
1589
1590        indices = np.arange(len(self))
1591
1592        for image in itertools.product(*all_ranges):
1593            coords = np.dot(image, matrix) + coords_in_cell
1594            all_dists = all_distances(coords, site_coords)
1595            all_within_r = np.bitwise_and(all_dists <= r, all_dists > 1e-8)
1596
1597            for (j, d, within_r) in zip(indices, all_dists, all_within_r):
1598                if include_site:
1599                    nnsite = PeriodicSite(
1600                        self[j].species,
1601                        coords[j],
1602                        latt,
1603                        properties=self[j].properties,
1604                        coords_are_cartesian=True,
1605                        skip_checks=True,
1606                    )
1607
1608                for i in indices[within_r]:
1609                    item = []
1610                    if include_site:
1611                        item.append(nnsite)
1612                    item.append(d[i])
1613                    if include_index:
1614                        item.append(j)
1615                    # Add the image, if requested
1616                    if include_image:
1617                        item.append(image)
1618                    neighbors[i].append(item)
1619        return neighbors
1620
1621    def get_neighbors_in_shell(
1622        self, origin: ArrayLike, r: float, dr: float, include_index: bool = False, include_image: bool = False
1623    ) -> List[PeriodicNeighbor]:
1624        """
1625        Returns all sites in a shell centered on origin (coords) between radii
1626        r-dr and r+dr.
1627
1628        Args:
1629            origin (3x1 array): Cartesian coordinates of center of sphere.
1630            r (float): Inner radius of shell.
1631            dr (float): Width of shell.
1632            include_index (bool): Deprecated. Now, the non-supercell site index
1633                is always included in the returned data.
1634            include_image (bool): Deprecated. Now the supercell image
1635                is always included in the returned data.
1636
1637        Returns:
1638            [NearestNeighbor] where Nearest Neighbor is a named tuple containing
1639            (site, distance, index, image).
1640        """
1641        outer = self.get_sites_in_sphere(origin, r + dr, include_index=include_index, include_image=include_image)
1642        inner = r - dr
1643        return [t for t in outer if t.nn_distance > inner]
1644
1645    def get_sorted_structure(
1646        self, key: Optional[Callable] = None, reverse: bool = False
1647    ) -> Union["IStructure", "Structure"]:
1648        """
1649        Get a sorted copy of the structure. The parameters have the same
1650        meaning as in list.sort. By default, sites are sorted by the
1651        electronegativity of the species.
1652
1653        Args:
1654            key: Specifies a function of one argument that is used to extract
1655                a comparison key from each list element: key=str.lower. The
1656                default value is None (compare the elements directly).
1657            reverse (bool): If set to True, then the list elements are sorted
1658                as if each comparison were reversed.
1659        """
1660        sites = sorted(self, key=key, reverse=reverse)
1661        return self.__class__.from_sites(sites, charge=self._charge)
1662
1663    def get_reduced_structure(
1664        self, reduction_algo: Literal["niggli", "LLL"] = "niggli"
1665    ) -> Union["IStructure", "Structure"]:
1666        """
1667        Get a reduced structure.
1668
1669        Args:
1670            reduction_algo ("niggli" | "LLL"): The lattice reduction algorithm to use.
1671                Defaults to "niggli".
1672        """
1673        if reduction_algo == "niggli":
1674            reduced_latt = self._lattice.get_niggli_reduced_lattice()
1675        elif reduction_algo == "LLL":
1676            reduced_latt = self._lattice.get_lll_reduced_lattice()
1677        else:
1678            raise ValueError("Invalid reduction algo : {}".format(reduction_algo))
1679
1680        if reduced_latt != self.lattice:
1681            return self.__class__(  # type: ignore
1682                reduced_latt,
1683                self.species_and_occu,
1684                self.cart_coords,
1685                coords_are_cartesian=True,
1686                to_unit_cell=True,
1687                site_properties=self.site_properties,
1688                charge=self._charge,
1689            )
1690        return self.copy()
1691
1692    def copy(self, site_properties=None, sanitize=False):
1693        """
1694        Convenience method to get a copy of the structure, with options to add
1695        site properties.
1696
1697        Args:
1698            site_properties (dict): Properties to add or override. The
1699                properties are specified in the same way as the constructor,
1700                i.e., as a dict of the form {property: [values]}. The
1701                properties should be in the order of the *original* structure
1702                if you are performing sanitization.
1703            sanitize (bool): If True, this method will return a sanitized
1704                structure. Sanitization performs a few things: (i) The sites are
1705                sorted by electronegativity, (ii) a LLL lattice reduction is
1706                carried out to obtain a relatively orthogonalized cell,
1707                (iii) all fractional coords for sites are mapped into the
1708                unit cell.
1709
1710        Returns:
1711            A copy of the Structure, with optionally new site_properties and
1712            optionally sanitized.
1713        """
1714        props = self.site_properties
1715        if site_properties:
1716            props.update(site_properties)
1717        if not sanitize:
1718            return self.__class__(
1719                self._lattice,
1720                self.species_and_occu,
1721                self.frac_coords,
1722                charge=self._charge,
1723                site_properties=props,
1724            )
1725        reduced_latt = self._lattice.get_lll_reduced_lattice()
1726        new_sites = []
1727        for i, site in enumerate(self):
1728            frac_coords = reduced_latt.get_fractional_coords(site.coords)
1729            site_props = {}
1730            for p, v in props.items():
1731                site_props[p] = v[i]
1732            new_sites.append(
1733                PeriodicSite(
1734                    site.species,
1735                    frac_coords,
1736                    reduced_latt,
1737                    to_unit_cell=True,
1738                    properties=site_props,
1739                    skip_checks=True,
1740                )
1741            )
1742        new_sites = sorted(new_sites)
1743        return self.__class__.from_sites(new_sites, charge=self._charge)
1744
1745    def interpolate(
1746        self,
1747        end_structure: Union["IStructure", "Structure"],
1748        nimages: Union[int, Iterable] = 10,
1749        interpolate_lattices: bool = False,
1750        pbc: bool = True,
1751        autosort_tol: float = 0,
1752    ) -> List[Union["IStructure", "Structure"]]:
1753        """
1754        Interpolate between this structure and end_structure. Useful for
1755        construction of NEB inputs.
1756
1757        Args:
1758            end_structure (Structure): structure to interpolate between this
1759                structure and end.
1760            nimages (int,list): No. of interpolation images or a list of
1761                interpolation images. Defaults to 10 images.
1762            interpolate_lattices (bool): Whether to interpolate the lattices.
1763                Interpolates the lengths and angles (rather than the matrix)
1764                so orientation may be affected.
1765            pbc (bool): Whether to use periodic boundary conditions to find
1766                the shortest path between endpoints.
1767            autosort_tol (float): A distance tolerance in angstrom in
1768                which to automatically sort end_structure to match to the
1769                closest points in this particular structure. This is usually
1770                what you want in a NEB calculation. 0 implies no sorting.
1771                Otherwise, a 0.5 value usually works pretty well.
1772
1773        Returns:
1774            List of interpolated structures. The starting and ending
1775            structures included as the first and last structures respectively.
1776            A total of (nimages + 1) structures are returned.
1777        """
1778        # Check length of structures
1779        if len(self) != len(end_structure):
1780            raise ValueError("Structures have different lengths!")
1781
1782        if not (interpolate_lattices or self.lattice == end_structure.lattice):
1783            raise ValueError("Structures with different lattices!")
1784
1785        if not isinstance(nimages, collections.abc.Iterable):
1786            images = np.arange(nimages + 1) / nimages
1787        else:
1788            images = nimages
1789
1790        # Check that both structures have the same species
1791        for i, site in enumerate(self):
1792            if site.species != end_structure[i].species:
1793                raise ValueError(
1794                    "Different species!\nStructure 1:\n" + str(self) + "\nStructure 2\n" + str(end_structure)
1795                )
1796
1797        start_coords = np.array(self.frac_coords)
1798        end_coords = np.array(end_structure.frac_coords)
1799
1800        if autosort_tol:
1801            dist_matrix = self.lattice.get_all_distances(start_coords, end_coords)
1802            site_mappings = collections.defaultdict(list)  # type: Dict[int, List[int]]
1803            unmapped_start_ind = []
1804            for i, row in enumerate(dist_matrix):
1805                ind = np.where(row < autosort_tol)[0]
1806                if len(ind) == 1:
1807                    site_mappings[i].append(ind[0])
1808                else:
1809                    unmapped_start_ind.append(i)
1810
1811            if len(unmapped_start_ind) > 1:
1812                raise ValueError(
1813                    "Unable to reliably match structures "
1814                    "with auto_sort_tol = %f. unmapped indices "
1815                    "= %s" % (autosort_tol, unmapped_start_ind)
1816                )
1817
1818            sorted_end_coords = np.zeros_like(end_coords)
1819            matched = []
1820            for i, j in site_mappings.items():
1821                if len(j) > 1:
1822                    raise ValueError(
1823                        "Unable to reliably match structures "
1824                        "with auto_sort_tol = %f. More than one "
1825                        "site match!" % autosort_tol
1826                    )
1827                sorted_end_coords[i] = end_coords[j[0]]
1828                matched.append(j[0])
1829
1830            if len(unmapped_start_ind) == 1:
1831                i = unmapped_start_ind[0]
1832                j = list(set(range(len(start_coords))).difference(matched))[0]  # type: ignore
1833                sorted_end_coords[i] = end_coords[j]
1834
1835            end_coords = sorted_end_coords
1836
1837        vec = end_coords - start_coords
1838        if pbc:
1839            vec -= np.round(vec)
1840        sp = self.species_and_occu
1841        structs = []
1842
1843        if interpolate_lattices:
1844            # interpolate lattice matrices using polar decomposition
1845            from scipy.linalg import polar
1846
1847            # u is unitary (rotation), p is stretch
1848            u, p = polar(np.dot(end_structure.lattice.matrix.T, np.linalg.inv(self.lattice.matrix.T)))
1849            lvec = p - np.identity(3)
1850            lstart = self.lattice.matrix.T
1851
1852        for x in images:
1853            if interpolate_lattices:
1854                l_a = np.dot(np.identity(3) + x * lvec, lstart).T
1855                lat = Lattice(l_a)
1856            else:
1857                lat = self.lattice
1858            fcoords = start_coords + x * vec
1859            structs.append(self.__class__(lat, sp, fcoords, site_properties=self.site_properties))  # type: ignore
1860        return structs
1861
1862    def get_miller_index_from_site_indexes(self, site_ids, round_dp=4, verbose=True):
1863        """
1864        Get the Miller index of a plane from a set of sites indexes.
1865
1866        A minimum of 3 sites are required. If more than 3 sites are given
1867        the best plane that minimises the distance to all points will be
1868        calculated.
1869
1870        Args:
1871            site_ids (list of int): A list of site indexes to consider. A
1872                minimum of three site indexes are required. If more than three
1873                sites are provided, the best plane that minimises the distance
1874                to all sites will be calculated.
1875            round_dp (int, optional): The number of decimal places to round the
1876                miller index to.
1877            verbose (bool, optional): Whether to print warnings.
1878
1879        Returns:
1880            (tuple): The Miller index.
1881        """
1882        return self.lattice.get_miller_index_from_coords(
1883            self.frac_coords[site_ids],
1884            coords_are_cartesian=False,
1885            round_dp=round_dp,
1886            verbose=verbose,
1887        )
1888
1889    def get_primitive_structure(
1890        self, tolerance: float = 0.25, use_site_props: bool = False, constrain_latt: Union[List, Dict] = None
1891    ):
1892        """
1893        This finds a smaller unit cell than the input. Sometimes it doesn"t
1894        find the smallest possible one, so this method is recursively called
1895        until it is unable to find a smaller cell.
1896
1897        NOTE: if the tolerance is greater than 1/2 the minimum inter-site
1898        distance in the primitive cell, the algorithm will reject this lattice.
1899
1900        Args:
1901            tolerance (float), Angstroms: Tolerance for each coordinate of a
1902                particular site. For example, [0.1, 0, 0.1] in cartesian
1903                coordinates will be considered to be on the same coordinates
1904                as [0, 0, 0] for a tolerance of 0.25. Defaults to 0.25.
1905            use_site_props (bool): Whether to account for site properties in
1906                differntiating sites.
1907            constrain_latt (list/dict): List of lattice parameters we want to
1908                preserve, e.g. ["alpha", "c"] or dict with the lattice
1909                parameter names as keys and values we want the parameters to
1910                be e.g. {"alpha": 90, "c": 2.5}.
1911
1912        Returns:
1913            The most primitive structure found.
1914        """
1915        if constrain_latt is None:
1916            constrain_latt = []
1917
1918        def site_label(site):
1919            if not use_site_props:
1920                return site.species_string
1921            d = [site.species_string]
1922            for k in sorted(site.properties.keys()):
1923                d.append(k + "=" + str(site.properties[k]))
1924            return ", ".join(d)
1925
1926        # group sites by species string
1927        sites = sorted(self._sites, key=site_label)
1928
1929        grouped_sites = [list(a[1]) for a in itertools.groupby(sites, key=site_label)]
1930        grouped_fcoords = [np.array([s.frac_coords for s in g]) for g in grouped_sites]
1931
1932        # min_vecs are approximate periodicities of the cell. The exact
1933        # periodicities from the supercell matrices are checked against these
1934        # first
1935        min_fcoords = min(grouped_fcoords, key=lambda x: len(x))
1936        min_vecs = min_fcoords - min_fcoords[0]
1937
1938        # fractional tolerance in the supercell
1939        super_ftol = np.divide(tolerance, self.lattice.abc)
1940        super_ftol_2 = super_ftol * 2
1941
1942        def pbc_coord_intersection(fc1, fc2, tol):
1943            """
1944            Returns the fractional coords in fc1 that have coordinates
1945            within tolerance to some coordinate in fc2
1946            """
1947            d = fc1[:, None, :] - fc2[None, :, :]
1948            d -= np.round(d)
1949            np.abs(d, d)
1950            return fc1[np.any(np.all(d < tol, axis=-1), axis=-1)]
1951
1952        # here we reduce the number of min_vecs by enforcing that every
1953        # vector in min_vecs approximately maps each site onto a similar site.
1954        # The subsequent processing is O(fu^3 * min_vecs) = O(n^4) if we do no
1955        # reduction.
1956        # This reduction is O(n^3) so usually is an improvement. Using double
1957        # the tolerance because both vectors are approximate
1958        for g in sorted(grouped_fcoords, key=lambda x: len(x)):
1959            for f in g:
1960                min_vecs = pbc_coord_intersection(min_vecs, g - f, super_ftol_2)
1961
1962        def get_hnf(fu):
1963            """
1964            Returns all possible distinct supercell matrices given a
1965            number of formula units in the supercell. Batches the matrices
1966            by the values in the diagonal (for less numpy overhead).
1967            Computational complexity is O(n^3), and difficult to improve.
1968            Might be able to do something smart with checking combinations of a
1969            and b first, though unlikely to reduce to O(n^2).
1970            """
1971
1972            def factors(n):
1973                for i in range(1, n + 1):
1974                    if n % i == 0:
1975                        yield i
1976
1977            for det in factors(fu):
1978                if det == 1:
1979                    continue
1980                for a in factors(det):
1981                    for e in factors(det // a):
1982                        g = det // a // e
1983                        yield det, np.array(
1984                            [
1985                                [[a, b, c], [0, e, f], [0, 0, g]]
1986                                for b, c, f in itertools.product(range(a), range(a), range(e))
1987                            ]
1988                        )
1989
1990        # we cant let sites match to their neighbors in the supercell
1991        grouped_non_nbrs = []
1992        for gfcoords in grouped_fcoords:
1993            fdist = gfcoords[None, :, :] - gfcoords[:, None, :]
1994            fdist -= np.round(fdist)
1995            np.abs(fdist, fdist)
1996            non_nbrs = np.any(fdist > 2 * super_ftol[None, None, :], axis=-1)
1997            # since we want sites to match to themselves
1998            np.fill_diagonal(non_nbrs, True)
1999            grouped_non_nbrs.append(non_nbrs)
2000
2001        num_fu = functools.reduce(math.gcd, map(len, grouped_sites))
2002        for size, ms in get_hnf(num_fu):
2003            inv_ms = np.linalg.inv(ms)
2004
2005            # find sets of lattice vectors that are are present in min_vecs
2006            dist = inv_ms[:, :, None, :] - min_vecs[None, None, :, :]
2007            dist -= np.round(dist)
2008            np.abs(dist, dist)
2009            is_close = np.all(dist < super_ftol, axis=-1)
2010            any_close = np.any(is_close, axis=-1)
2011            inds = np.all(any_close, axis=-1)
2012
2013            for inv_m, m in zip(inv_ms[inds], ms[inds]):
2014                new_m = np.dot(inv_m, self.lattice.matrix)
2015                ftol = np.divide(tolerance, np.sqrt(np.sum(new_m ** 2, axis=1)))
2016
2017                valid = True
2018                new_coords = []
2019                new_sp = []
2020                new_props = collections.defaultdict(list)
2021                for gsites, gfcoords, non_nbrs in zip(grouped_sites, grouped_fcoords, grouped_non_nbrs):
2022                    all_frac = np.dot(gfcoords, m)
2023
2024                    # calculate grouping of equivalent sites, represented by
2025                    # adjacency matrix
2026                    fdist = all_frac[None, :, :] - all_frac[:, None, :]
2027                    fdist = np.abs(fdist - np.round(fdist))
2028                    close_in_prim = np.all(fdist < ftol[None, None, :], axis=-1)
2029                    groups = np.logical_and(close_in_prim, non_nbrs)
2030
2031                    # check that groups are correct
2032                    if not np.all(np.sum(groups, axis=0) == size):
2033                        valid = False
2034                        break
2035
2036                    # check that groups are all cliques
2037                    for g in groups:
2038                        if not np.all(groups[g][:, g]):
2039                            valid = False
2040                            break
2041                    if not valid:
2042                        break
2043
2044                    # add the new sites, averaging positions
2045                    added = np.zeros(len(gsites))
2046                    new_fcoords = all_frac % 1
2047                    for i, group in enumerate(groups):
2048                        if not added[i]:
2049                            added[group] = True
2050                            inds = np.where(group)[0]
2051                            coords = new_fcoords[inds[0]]
2052                            for n, j in enumerate(inds[1:]):
2053                                offset = new_fcoords[j] - coords
2054                                coords += (offset - np.round(offset)) / (n + 2)
2055                            new_sp.append(gsites[inds[0]].species)
2056                            for k in gsites[inds[0]].properties:
2057                                new_props[k].append(gsites[inds[0]].properties[k])
2058                            new_coords.append(coords)
2059
2060                if valid:
2061                    inv_m = np.linalg.inv(m)
2062                    new_l = Lattice(np.dot(inv_m, self.lattice.matrix))
2063                    s = Structure(
2064                        new_l,
2065                        new_sp,
2066                        new_coords,
2067                        site_properties=new_props,
2068                        coords_are_cartesian=False,
2069                    )
2070
2071                    # Default behavior
2072                    p = s.get_primitive_structure(
2073                        tolerance=tolerance,
2074                        use_site_props=use_site_props,
2075                        constrain_latt=constrain_latt,
2076                    ).get_reduced_structure()
2077                    if not constrain_latt:
2078                        return p
2079
2080                    # Only return primitive structures that
2081                    # satisfy the restriction condition
2082                    p_latt, s_latt = p.lattice, self.lattice
2083                    if type(constrain_latt).__name__ == "list":
2084                        if all(getattr(p_latt, p) == getattr(s_latt, p) for p in constrain_latt):
2085                            return p
2086                    elif type(constrain_latt).__name__ == "dict":
2087                        if all(getattr(p_latt, p) == constrain_latt[p] for p in constrain_latt.keys()):  # type: ignore
2088                            return p
2089
2090        return self.copy()
2091
2092    def __repr__(self):
2093        outs = ["Structure Summary", repr(self.lattice)]
2094        if self._charge:
2095            if self._charge >= 0:
2096                outs.append("Overall Charge: +{}".format(self._charge))
2097            else:
2098                outs.append("Overall Charge: -{}".format(self._charge))
2099        for s in self:
2100            outs.append(repr(s))
2101        return "\n".join(outs)
2102
2103    def __str__(self):
2104        outs = [
2105            "Full Formula ({s})".format(s=self.composition.formula),
2106            "Reduced Formula: {}".format(self.composition.reduced_formula),
2107        ]
2108
2109        def to_s(x):
2110            return "%0.6f" % x
2111
2112        outs.append("abc   : " + " ".join([to_s(i).rjust(10) for i in self.lattice.abc]))
2113        outs.append("angles: " + " ".join([to_s(i).rjust(10) for i in self.lattice.angles]))
2114        if self._charge:
2115            if self._charge >= 0:
2116                outs.append("Overall Charge: +{}".format(self._charge))
2117            else:
2118                outs.append("Overall Charge: -{}".format(self._charge))
2119        outs.append("Sites ({i})".format(i=len(self)))
2120        data = []
2121        props = self.site_properties
2122        keys = sorted(props.keys())
2123        for i, site in enumerate(self):
2124            row = [str(i), site.species_string]
2125            row.extend([to_s(j) for j in site.frac_coords])
2126            for k in keys:
2127                row.append(props[k][i])
2128            data.append(row)
2129        outs.append(
2130            tabulate(
2131                data,
2132                headers=["#", "SP", "a", "b", "c"] + keys,
2133            )
2134        )
2135        return "\n".join(outs)
2136
2137    def get_orderings(self, mode: Literal["enum", "sqs"] = "enum", **kwargs) -> List["Structure"]:
2138        r"""
2139        Returns list of orderings for a disordered structure. If structure
2140        does not contain disorder, the default structure is returned.
2141
2142        Args:
2143            mode ("enum" | "sqs"): Either "enum" or "sqs". If enum,
2144                the enumlib will be used to return all distinct
2145                orderings. If sqs, mcsqs will be used to return
2146                an sqs structure.
2147            kwargs: kwargs passed to either
2148                pymatgen.command_line..enumlib_caller.EnumlibAdaptor
2149                or pymatgen.command_line.mcsqs_caller.run_mcsqs.
2150                For run_mcsqs, a default cluster search of 2 cluster interactions
2151                with 1NN distance and 3 cluster interactions with 2NN distance
2152                is set.
2153
2154        Returns:
2155            List[Structure]
2156        """
2157        if self.is_ordered:
2158            return [self]
2159        if mode.startswith("enum"):
2160            from pymatgen.command_line.enumlib_caller import EnumlibAdaptor
2161
2162            adaptor = EnumlibAdaptor(self, **kwargs)
2163            adaptor.run()
2164            return adaptor.structures
2165        if mode == "sqs":
2166            from pymatgen.command_line.mcsqs_caller import run_mcsqs
2167
2168            if "clusters" not in kwargs:
2169                disordered_sites = [site for site in self if not site.is_ordered]
2170                subset_structure = Structure.from_sites(disordered_sites)
2171                dist_matrix = subset_structure.distance_matrix
2172                dists = sorted(set(dist_matrix.ravel()))
2173                unique_dists = []
2174                for i in range(1, len(dists)):
2175                    if dists[i] - dists[i - 1] > 0.1:
2176                        unique_dists.append(dists[i])
2177                clusters = {(i + 2): d + 0.01 for i, d in enumerate(unique_dists) if i < 2}
2178                kwargs["clusters"] = clusters
2179            return [run_mcsqs(self, **kwargs).bestsqs]
2180        raise ValueError()
2181
2182    def as_dict(self, verbosity=1, fmt=None, **kwargs):
2183        """
2184        Dict representation of Structure.
2185
2186        Args:
2187            verbosity (int): Verbosity level. Default of 1 includes both
2188                direct and cartesian coordinates for all sites, lattice
2189                parameters, etc. Useful for reading and for insertion into a
2190                database. Set to 0 for an extremely lightweight version
2191                that only includes sufficient information to reconstruct the
2192                object.
2193            fmt (str): Specifies a format for the dict. Defaults to None,
2194                which is the default format used in pymatgen. Other options
2195                include "abivars".
2196            **kwargs: Allow passing of other kwargs needed for certain
2197            formats, e.g., "abivars".
2198
2199        Returns:
2200            JSON serializable dict representation.
2201        """
2202        if fmt == "abivars":
2203            """Returns a dictionary with the ABINIT variables."""
2204            from pymatgen.io.abinit.abiobjects import structure_to_abivars
2205
2206            return structure_to_abivars(self, **kwargs)
2207
2208        latt_dict = self._lattice.as_dict(verbosity=verbosity)
2209        del latt_dict["@module"]
2210        del latt_dict["@class"]
2211
2212        d = {
2213            "@module": self.__class__.__module__,
2214            "@class": self.__class__.__name__,
2215            "charge": self._charge,
2216            "lattice": latt_dict,
2217            "sites": [],
2218        }
2219        for site in self:
2220            site_dict = site.as_dict(verbosity=verbosity)
2221            del site_dict["lattice"]
2222            del site_dict["@module"]
2223            del site_dict["@class"]
2224            d["sites"].append(site_dict)
2225        return d
2226
2227    def as_dataframe(self):
2228        """
2229        Returns a Pandas dataframe of the sites. Structure level attributes are stored in DataFrame.attrs. Example:
2230
2231        Species    a    b             c    x             y             z  magmom
2232        0    (Si)  0.0  0.0  0.000000e+00  0.0  0.000000e+00  0.000000e+00       5
2233        1    (Si)  0.0  0.0  1.000000e-07  0.0 -2.217138e-07  3.135509e-07      -5
2234        """
2235        data = []
2236        site_properties = self.site_properties
2237        prop_keys = list(site_properties.keys())
2238        for site in self:
2239            row = [site.species] + list(site.frac_coords) + list(site.coords)
2240            for k in prop_keys:
2241                row.append(site.properties.get(k))
2242            data.append(row)
2243        import pandas as pd
2244
2245        df = pd.DataFrame(data, columns=["Species", "a", "b", "c", "x", "y", "z"] + prop_keys)
2246        df.attrs["Reduced Formula"] = self.composition.reduced_formula
2247        df.attrs["Lattice"] = self.lattice
2248        return df
2249
2250    @classmethod
2251    def from_dict(cls, d, fmt=None):
2252        """
2253        Reconstitute a Structure object from a dict representation of Structure
2254        created using as_dict().
2255
2256        Args:
2257            d (dict): Dict representation of structure.
2258
2259        Returns:
2260            Structure object
2261        """
2262        if fmt == "abivars":
2263            from pymatgen.io.abinit.abiobjects import structure_from_abivars
2264
2265            return structure_from_abivars(cls=cls, **d)
2266
2267        lattice = Lattice.from_dict(d["lattice"])
2268        sites = [PeriodicSite.from_dict(sd, lattice) for sd in d["sites"]]
2269        charge = d.get("charge", None)
2270        return cls.from_sites(sites, charge=charge)
2271
2272    def to(self, fmt: str = None, filename=None, **kwargs) -> Optional[str]:
2273        r"""
2274        Outputs the structure to a file or string.
2275
2276        Args:
2277            fmt (str): Format to output to. Defaults to JSON unless filename
2278                is provided. If fmt is specifies, it overrides whatever the
2279                filename is. Options include "cif", "poscar", "cssr", "json".
2280                Non-case sensitive.
2281            filename (str): If provided, output will be written to a file. If
2282                fmt is not specified, the format is determined from the
2283                filename. Defaults is None, i.e. string output.
2284            **kwargs: Kwargs passthru to relevant methods. E.g., This allows
2285                the passing of parameters like symprec to the
2286                CifWriter.__init__ method for generation of symmetric cifs.
2287
2288        Returns:
2289            (str) if filename is None. None otherwise.
2290        """
2291        filename = filename or ""
2292        fmt = "" if fmt is None else fmt.lower()
2293        fname = os.path.basename(filename)
2294
2295        if fmt == "cif" or fnmatch(fname.lower(), "*.cif*"):
2296            from pymatgen.io.cif import CifWriter
2297
2298            writer = CifWriter(self, **kwargs)
2299        elif fmt == "mcif" or fnmatch(fname.lower(), "*.mcif*"):
2300            from pymatgen.io.cif import CifWriter
2301
2302            writer = CifWriter(self, write_magmoms=True, **kwargs)
2303        elif fmt == "poscar" or fnmatch(fname, "*POSCAR*"):
2304            from pymatgen.io.vasp import Poscar
2305
2306            writer = Poscar(self, **kwargs)
2307        elif fmt == "cssr" or fnmatch(fname.lower(), "*.cssr*"):
2308            from pymatgen.io.cssr import Cssr
2309
2310            writer = Cssr(self)  # type: ignore
2311        elif fmt == "json" or fnmatch(fname.lower(), "*.json"):
2312            s = json.dumps(self.as_dict())
2313            if filename:
2314                with zopen(filename, "wt") as f:
2315                    f.write("%s" % s)
2316            return s
2317        elif fmt == "xsf" or fnmatch(fname.lower(), "*.xsf*"):
2318            from pymatgen.io.xcrysden import XSF
2319
2320            s = XSF(self).to_string()
2321            if filename:
2322                with zopen(fname, "wt", encoding="utf8") as f:
2323                    f.write(s)
2324            return s
2325        elif (
2326            fmt == "mcsqs" or fnmatch(fname, "*rndstr.in*") or fnmatch(fname, "*lat.in*") or fnmatch(fname, "*bestsqs*")
2327        ):
2328            from pymatgen.io.atat import Mcsqs
2329
2330            s = Mcsqs(self).to_string()
2331            if filename:
2332                with zopen(fname, "wt", encoding="ascii") as f:
2333                    f.write(s)
2334            return s
2335        elif fmt == "prismatic" or fnmatch(fname, "*prismatic*"):
2336            from pymatgen.io.prismatic import Prismatic
2337
2338            s = Prismatic(self).to_string()
2339            return s
2340        elif fmt == "yaml" or fnmatch(fname, "*.yaml*") or fnmatch(fname, "*.yml*"):
2341            if filename:
2342                with zopen(filename, "wt") as f:
2343                    yaml.safe_dump(self.as_dict(), f)
2344                return None
2345            return yaml.safe_dump(self.as_dict())
2346        elif fmt == "fleur-inpgen" or fnmatch(fname, "*.in*"):
2347            from pymatgen.io.fleur import FleurInput
2348
2349            writer = FleurInput(self, **kwargs)
2350        else:
2351            raise ValueError("Invalid format: `%s`" % str(fmt))
2352
2353        if filename:
2354            writer.write_file(filename)
2355            return None
2356        return writer.__str__()
2357
2358    @classmethod
2359    def from_str(
2360        cls,
2361        input_string: str,
2362        fmt: Literal["cif", "poscar", "cssr", "json", "yaml", "xsf", "mcsqs"],
2363        primitive=False,
2364        sort=False,
2365        merge_tol=0.0,
2366    ):
2367        """
2368        Reads a structure from a string.
2369
2370        Args:
2371            input_string (str): String to parse.
2372            fmt (str): A file format specification. One of "cif", "poscar", "cssr",
2373                "json", "yaml", "xsf", "mcsqs".
2374            primitive (bool): Whether to find a primitive cell. Defaults to
2375                False.
2376            sort (bool): Whether to sort the sites in accordance to the default
2377                ordering criteria, i.e., electronegativity.
2378            merge_tol (float): If this is some positive number, sites that
2379                are within merge_tol from each other will be merged. Usually
2380                0.01 should be enough to deal with common numerical issues.
2381
2382        Returns:
2383            IStructure / Structure
2384        """
2385
2386        fmt_low = fmt.lower()
2387        if fmt_low == "cif":
2388            from pymatgen.io.cif import CifParser
2389
2390            parser = CifParser.from_string(input_string)
2391            s = parser.get_structures(primitive=primitive)[0]
2392        elif fmt_low == "poscar":
2393            from pymatgen.io.vasp import Poscar
2394
2395            s = Poscar.from_string(input_string, False, read_velocities=False).structure
2396        elif fmt_low == "cssr":
2397            from pymatgen.io.cssr import Cssr
2398
2399            cssr = Cssr.from_string(input_string)
2400            s = cssr.structure
2401        elif fmt_low == "json":
2402            d = json.loads(input_string)
2403            s = Structure.from_dict(d)
2404        elif fmt_low == "yaml":
2405            d = yaml.safe_load(input_string)
2406            s = Structure.from_dict(d)
2407        elif fmt_low == "xsf":
2408            from pymatgen.io.xcrysden import XSF
2409
2410            s = XSF.from_string(input_string).structure
2411        elif fmt_low == "mcsqs":
2412            from pymatgen.io.atat import Mcsqs
2413
2414            s = Mcsqs.structure_from_string(input_string)
2415        elif fmt == "fleur-inpgen":
2416            from pymatgen.io.fleur import FleurInput
2417
2418            s = FleurInput.from_string(input_string, inpgen_input=True).structure
2419        elif fmt == "fleur":
2420            from pymatgen.io.fleur import FleurInput
2421
2422            s = FleurInput.from_string(input_string, inpgen_input=False).structure
2423        else:
2424            raise ValueError(f"Unrecognized format `{fmt}`!")
2425
2426        if sort:
2427            s = s.get_sorted_structure()
2428        if merge_tol:
2429            s.merge_sites(merge_tol)
2430        return cls.from_sites(s)
2431
2432    @classmethod
2433    def from_file(cls, filename, primitive=False, sort=False, merge_tol=0.0):
2434        """
2435        Reads a structure from a file. For example, anything ending in
2436        a "cif" is assumed to be a Crystallographic Information Format file.
2437        Supported formats include CIF, POSCAR/CONTCAR, CHGCAR, LOCPOT,
2438        vasprun.xml, CSSR, Netcdf and pymatgen's JSON serialized structures.
2439
2440        Args:
2441            filename (str): The filename to read from.
2442            primitive (bool): Whether to convert to a primitive cell
2443                Only available for cifs. Defaults to False.
2444            sort (bool): Whether to sort sites. Default to False.
2445            merge_tol (float): If this is some positive number, sites that
2446                are within merge_tol from each other will be merged. Usually
2447                0.01 should be enough to deal with common numerical issues.
2448
2449        Returns:
2450            Structure.
2451        """
2452        filename = str(filename)
2453        if filename.endswith(".nc"):
2454            # Read Structure from a netcdf file.
2455            from pymatgen.io.abinit.netcdf import structure_from_ncdata
2456
2457            s = structure_from_ncdata(filename, cls=cls)
2458            if sort:
2459                s = s.get_sorted_structure()
2460            return s
2461
2462        from pymatgen.io.exciting import ExcitingInput
2463        from pymatgen.io.lmto import LMTOCtrl
2464        from pymatgen.io.vasp import Chgcar, Vasprun
2465
2466        fname = os.path.basename(filename)
2467        with zopen(filename, "rt") as f:
2468            contents = f.read()
2469        if fnmatch(fname.lower(), "*.cif*") or fnmatch(fname.lower(), "*.mcif*"):
2470            return cls.from_str(contents, fmt="cif", primitive=primitive, sort=sort, merge_tol=merge_tol)
2471        if fnmatch(fname, "*POSCAR*") or fnmatch(fname, "*CONTCAR*") or fnmatch(fname, "*.vasp"):
2472            s = cls.from_str(
2473                contents,
2474                fmt="poscar",
2475                primitive=primitive,
2476                sort=sort,
2477                merge_tol=merge_tol,
2478            )
2479
2480        elif fnmatch(fname, "CHGCAR*") or fnmatch(fname, "LOCPOT*"):
2481            s = Chgcar.from_file(filename).structure
2482        elif fnmatch(fname, "vasprun*.xml*"):
2483            s = Vasprun(filename).final_structure
2484        elif fnmatch(fname.lower(), "*.cssr*"):
2485            return cls.from_str(
2486                contents,
2487                fmt="cssr",
2488                primitive=primitive,
2489                sort=sort,
2490                merge_tol=merge_tol,
2491            )
2492        elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"):
2493            return cls.from_str(
2494                contents,
2495                fmt="json",
2496                primitive=primitive,
2497                sort=sort,
2498                merge_tol=merge_tol,
2499            )
2500        elif fnmatch(fname, "*.yaml*"):
2501            return cls.from_str(
2502                contents,
2503                fmt="yaml",
2504                primitive=primitive,
2505                sort=sort,
2506                merge_tol=merge_tol,
2507            )
2508        elif fnmatch(fname, "*.xsf"):
2509            return cls.from_str(contents, fmt="xsf", primitive=primitive, sort=sort, merge_tol=merge_tol)
2510        elif fnmatch(fname, "input*.xml"):
2511            return ExcitingInput.from_file(fname).structure
2512        elif fnmatch(fname, "*rndstr.in*") or fnmatch(fname, "*lat.in*") or fnmatch(fname, "*bestsqs*"):
2513            return cls.from_str(
2514                contents,
2515                fmt="mcsqs",
2516                primitive=primitive,
2517                sort=sort,
2518                merge_tol=merge_tol,
2519            )
2520        elif fnmatch(fname, "CTRL*"):
2521            return LMTOCtrl.from_file(filename=filename).structure
2522        elif fnmatch(fname, "inp*.xml") or fnmatch(fname, "*.in*") or fnmatch(fname, "inp_*"):
2523            from pymatgen.io.fleur import FleurInput
2524
2525            s = FleurInput.from_file(filename).structure
2526        else:
2527            raise ValueError("Unrecognized file extension!")
2528        if sort:
2529            s = s.get_sorted_structure()
2530        if merge_tol:
2531            s.merge_sites(merge_tol)
2532
2533        s.__class__ = cls
2534        return s
2535
2536
2537class IMolecule(SiteCollection, MSONable):
2538    """
2539    Basic immutable Molecule object without periodicity. Essentially a
2540    sequence of sites. IMolecule is made to be immutable so that they can
2541    function as keys in a dict. For a mutable molecule,
2542    use the :class:Molecule.
2543
2544    Molecule extends Sequence and Hashable, which means that in many cases,
2545    it can be used like any Python sequence. Iterating through a molecule is
2546    equivalent to going through the sites in sequence.
2547    """
2548
2549    def __init__(
2550        self,
2551        species: Sequence[CompositionLike],
2552        coords: Sequence[ArrayLike],
2553        charge: float = 0.0,
2554        spin_multiplicity: float = None,
2555        validate_proximity: bool = False,
2556        site_properties: dict = None,
2557    ):
2558        """
2559        Creates a Molecule.
2560
2561        Args:
2562            species: list of atomic species. Possible kinds of input include a
2563                list of dict of elements/species and occupancies, a List of
2564                elements/specie specified as actual Element/Species, Strings
2565                ("Fe", "Fe2+") or atomic numbers (1,56).
2566            coords (3x1 array): list of cartesian coordinates of each species.
2567            charge (float): Charge for the molecule. Defaults to 0.
2568            spin_multiplicity (int): Spin multiplicity for molecule.
2569                Defaults to None, which means that the spin multiplicity is
2570                set to 1 if the molecule has no unpaired electrons and to 2
2571                if there are unpaired electrons.
2572            validate_proximity (bool): Whether to check if there are sites
2573                that are less than 1 Ang apart. Defaults to False.
2574            site_properties (dict): Properties associated with the sites as
2575                a dict of sequences, e.g., {"magmom":[5,5,5,5]}. The
2576                sequences have to be the same length as the atomic species
2577                and fractional_coords. Defaults to None for no properties.
2578        """
2579        if len(species) != len(coords):
2580            raise StructureError(
2581                (
2582                    "The list of atomic species must be of the",
2583                    " same length as the list of fractional ",
2584                    "coordinates.",
2585                )
2586            )
2587
2588        sites = []
2589        for i, _ in enumerate(species):
2590            prop = None
2591            if site_properties:
2592                prop = {k: v[i] for k, v in site_properties.items()}
2593            sites.append(Site(species[i], coords[i], properties=prop))
2594
2595        self._sites = tuple(sites)
2596        if validate_proximity and not self.is_valid():
2597            raise StructureError(("Molecule contains sites that are ", "less than 0.01 Angstrom apart!"))
2598
2599        self._charge = charge
2600        nelectrons = 0.0
2601        for site in sites:
2602            for sp, amt in site.species.items():
2603                if not isinstance(sp, DummySpecies):
2604                    nelectrons += sp.Z * amt  # type: ignore
2605        nelectrons -= charge
2606        self._nelectrons = nelectrons
2607        if spin_multiplicity:
2608            if (nelectrons + spin_multiplicity) % 2 != 1:
2609                raise ValueError(
2610                    "Charge of %d and spin multiplicity of %d is"
2611                    " not possible for this molecule" % (self._charge, spin_multiplicity)
2612                )
2613            self._spin_multiplicity = spin_multiplicity
2614        else:
2615            self._spin_multiplicity = 1 if nelectrons % 2 == 0 else 2
2616
2617    @property
2618    def charge(self) -> float:
2619        """
2620        Charge of molecule
2621        """
2622        return self._charge
2623
2624    @property
2625    def spin_multiplicity(self) -> float:
2626        """
2627        Spin multiplicity of molecule.
2628        """
2629        return self._spin_multiplicity
2630
2631    @property
2632    def nelectrons(self) -> float:
2633        """
2634        Number of electrons in the molecule.
2635        """
2636        return self._nelectrons
2637
2638    @property
2639    def center_of_mass(self) -> float:
2640        """
2641        Center of mass of molecule.
2642        """
2643        center = np.zeros(3)
2644        total_weight = 0
2645        for site in self:
2646            wt = site.species.weight
2647            center += site.coords * wt
2648            total_weight += wt
2649        return center / total_weight  # type: ignore
2650
2651    @property
2652    def sites(self) -> Tuple[Site, ...]:
2653        """
2654        Returns a tuple of sites in the Molecule.
2655        """
2656        return self._sites
2657
2658    @classmethod
2659    def from_sites(
2660        cls, sites: Sequence[Site], charge: float = 0, spin_multiplicity: float = None, validate_proximity: bool = False
2661    ) -> Union["IMolecule", "Molecule"]:
2662        """
2663        Convenience constructor to make a Molecule from a list of sites.
2664
2665        Args:
2666            sites ([Site]): Sequence of Sites.
2667            charge (int): Charge of molecule. Defaults to 0.
2668            spin_multiplicity (int): Spin multicipity. Defaults to None,
2669                in which it is determined automatically.
2670            validate_proximity (bool): Whether to check that atoms are too
2671                close.
2672        """
2673        props = collections.defaultdict(list)
2674        for site in sites:
2675            for k, v in site.properties.items():
2676                props[k].append(v)
2677        return cls(
2678            [site.species for site in sites],
2679            [site.coords for site in sites],
2680            charge=charge,
2681            spin_multiplicity=spin_multiplicity,
2682            validate_proximity=validate_proximity,
2683            site_properties=props,
2684        )
2685
2686    def break_bond(self, ind1: int, ind2: int, tol: float = 0.2) -> Tuple[Union["IMolecule", "Molecule"], ...]:
2687        """
2688        Returns two molecules based on breaking the bond between atoms at index
2689        ind1 and ind2.
2690
2691        Args:
2692            ind1 (int): Index of first site.
2693            ind2 (int): Index of second site.
2694            tol (float): Relative tolerance to test. Basically, the code
2695                checks if the distance between the sites is less than (1 +
2696                tol) * typical bond distances. Defaults to 0.2, i.e.,
2697                20% longer.
2698
2699        Returns:
2700            Two Molecule objects representing the two clusters formed from
2701            breaking the bond.
2702        """
2703        clusters = [[self._sites[ind1]], [self._sites[ind2]]]
2704
2705        sites = [site for i, site in enumerate(self._sites) if i not in (ind1, ind2)]
2706
2707        def belongs_to_cluster(site, cluster):
2708            for test_site in cluster:
2709                if CovalentBond.is_bonded(site, test_site, tol=tol):
2710                    return True
2711            return False
2712
2713        while len(sites) > 0:
2714            unmatched = []
2715            for site in sites:
2716                for cluster in clusters:
2717                    if belongs_to_cluster(site, cluster):
2718                        cluster.append(site)
2719                        break
2720                else:
2721                    unmatched.append(site)
2722
2723            if len(unmatched) == len(sites):
2724                raise ValueError("Not all sites are matched!")
2725            sites = unmatched
2726
2727        return tuple(self.__class__.from_sites(cluster) for cluster in clusters)
2728
2729    def get_covalent_bonds(self, tol: float = 0.2) -> List[CovalentBond]:
2730        """
2731        Determines the covalent bonds in a molecule.
2732
2733        Args:
2734            tol (float): The tol to determine bonds in a structure. See
2735                CovalentBond.is_bonded.
2736
2737        Returns:
2738            List of bonds
2739        """
2740        bonds = []
2741        for site1, site2 in itertools.combinations(self._sites, 2):
2742            if CovalentBond.is_bonded(site1, site2, tol):
2743                bonds.append(CovalentBond(site1, site2))
2744        return bonds
2745
2746    def __eq__(self, other):
2747        if other is None:
2748            return False
2749        if len(self) != len(other):
2750            return False
2751        if self.charge != other.charge:
2752            return False
2753        if self.spin_multiplicity != other.spin_multiplicity:
2754            return False
2755        for site in self:
2756            if site not in other:
2757                return False
2758        return True
2759
2760    def __ne__(self, other):
2761        return not self.__eq__(other)
2762
2763    def __hash__(self):
2764        # For now, just use the composition hash code.
2765        return self.composition.__hash__()
2766
2767    def __repr__(self):
2768        outs = ["Molecule Summary"]
2769        for s in self:
2770            outs.append(s.__repr__())
2771        return "\n".join(outs)
2772
2773    def __str__(self):
2774        outs = [
2775            "Full Formula (%s)" % self.composition.formula,
2776            "Reduced Formula: " + self.composition.reduced_formula,
2777            "Charge = %s, Spin Mult = %s" % (self._charge, self._spin_multiplicity),
2778            "Sites (%d)" % len(self),
2779        ]
2780        for i, site in enumerate(self):
2781            outs.append(
2782                " ".join(
2783                    [
2784                        str(i),
2785                        site.species_string,
2786                        " ".join([("%0.6f" % j).rjust(12) for j in site.coords]),
2787                    ]
2788                )
2789            )
2790        return "\n".join(outs)
2791
2792    def as_dict(self):
2793        """
2794        Json-serializable dict representation of Molecule
2795        """
2796        d = {
2797            "@module": self.__class__.__module__,
2798            "@class": self.__class__.__name__,
2799            "charge": self._charge,
2800            "spin_multiplicity": self._spin_multiplicity,
2801            "sites": [],
2802        }
2803        for site in self:
2804            site_dict = site.as_dict()
2805            del site_dict["@module"]
2806            del site_dict["@class"]
2807            d["sites"].append(site_dict)
2808        return d
2809
2810    @classmethod
2811    def from_dict(cls, d) -> dict:
2812        """
2813        Reconstitute a Molecule object from a dict representation created using
2814        as_dict().
2815
2816        Args:
2817            d (dict): dict representation of Molecule.
2818
2819        Returns:
2820            Molecule object
2821        """
2822        sites = [Site.from_dict(sd) for sd in d["sites"]]
2823        charge = d.get("charge", 0)
2824        spin_multiplicity = d.get("spin_multiplicity")
2825        return cls.from_sites(sites, charge=charge, spin_multiplicity=spin_multiplicity)
2826
2827    def get_distance(self, i: int, j: int) -> float:
2828        """
2829        Get distance between site i and j.
2830
2831        Args:
2832            i (int): Index of first site
2833            j (int): Index of second site
2834
2835        Returns:
2836            Distance between the two sites.
2837        """
2838        return self[i].distance(self[j])
2839
2840    def get_sites_in_sphere(self, pt: ArrayLike, r: float) -> List[Neighbor]:
2841        """
2842        Find all sites within a sphere from a point.
2843
2844        Args:
2845            pt (3x1 array): Cartesian coordinates of center of sphere
2846            r (float): Radius of sphere.
2847
2848        Returns:
2849            [:class:`pymatgen.core.structure.Neighbor`]
2850        """
2851        neighbors = []
2852        for i, site in enumerate(self._sites):
2853            dist = site.distance_from_point(pt)
2854            if dist <= r:
2855                neighbors.append(Neighbor(site.species, site.coords, site.properties, dist, i))
2856        return neighbors
2857
2858    def get_neighbors(self, site: Site, r: float) -> List[Neighbor]:
2859        """
2860        Get all neighbors to a site within a sphere of radius r.  Excludes the
2861        site itself.
2862
2863        Args:
2864            site (Site): Site at the center of the sphere.
2865            r (float): Radius of sphere.
2866
2867        Returns:
2868            [:class:`pymatgen.core.structure.Neighbor`]
2869        """
2870        nns = self.get_sites_in_sphere(site.coords, r)
2871        return [nn for nn in nns if nn != site]
2872
2873    def get_neighbors_in_shell(self, origin: ArrayLike, r: float, dr: float) -> List[Neighbor]:
2874        """
2875        Returns all sites in a shell centered on origin (coords) between radii
2876        r-dr and r+dr.
2877
2878        Args:
2879            origin (3x1 array): Cartesian coordinates of center of sphere.
2880            r (float): Inner radius of shell.
2881            dr (float): Width of shell.
2882
2883        Returns:
2884            [:class:`pymatgen.core.structure.Neighbor`]
2885        """
2886        outer = self.get_sites_in_sphere(origin, r + dr)
2887        inner = r - dr
2888        return [nn for nn in outer if nn.nn_distance > inner]
2889
2890    def get_boxed_structure(
2891        self,
2892        a: float,
2893        b: float,
2894        c: float,
2895        images: ArrayLike = (1, 1, 1),
2896        random_rotation: bool = False,
2897        min_dist: float = 1.0,
2898        cls=None,
2899        offset: ArrayLike = None,
2900        no_cross: bool = False,
2901        reorder: bool = True,
2902    ) -> Union["IStructure", "Structure"]:
2903        """
2904        Creates a Structure from a Molecule by putting the Molecule in the
2905        center of a orthorhombic box. Useful for creating Structure for
2906        calculating molecules using periodic codes.
2907
2908        Args:
2909            a (float): a-lattice parameter.
2910            b (float): b-lattice parameter.
2911            c (float): c-lattice parameter.
2912            images: No. of boxed images in each direction. Defaults to
2913                (1, 1, 1), meaning single molecule with 1 lattice parameter
2914                in each direction.
2915            random_rotation (bool): Whether to apply a random rotation to
2916                each molecule. This jumbles all the molecules so that they
2917                are not exact images of each other.
2918            min_dist (float): The minimum distance that atoms should be from
2919                each other. This is only used if random_rotation is True.
2920                The randomized rotations are searched such that no two atoms
2921                are less than min_dist from each other.
2922            cls: The Structure class to instantiate (defaults to pymatgen
2923                structure)
2924            offset: Translation to offset molecule from center of mass coords
2925            no_cross: Whether to forbid molecule coords from extending beyond
2926                boundary of box.
2927            reorder: Whether to reorder the sites to be in electronegativity
2928                order.
2929
2930        Returns:
2931            Structure containing molecule in a box.
2932        """
2933        if offset is None:
2934            offset = np.array([0, 0, 0])
2935
2936        coords = np.array(self.cart_coords)
2937        x_range = max(coords[:, 0]) - min(coords[:, 0])
2938        y_range = max(coords[:, 1]) - min(coords[:, 1])
2939        z_range = max(coords[:, 2]) - min(coords[:, 2])
2940
2941        if a <= x_range or b <= y_range or c <= z_range:
2942            raise ValueError("Box is not big enough to contain Molecule.")
2943        lattice = Lattice.from_parameters(a * images[0], b * images[1], c * images[2], 90, 90, 90)  # type: ignore
2944        nimages = images[0] * images[1] * images[2]  # type: ignore
2945        all_coords: List[ArrayLike] = []
2946
2947        centered_coords = self.cart_coords - self.center_of_mass + offset
2948
2949        for i, j, k in itertools.product(
2950            list(range(images[0])), list(range(images[1])), list(range(images[2]))  # type: ignore
2951        ):
2952            box_center = [(i + 0.5) * a, (j + 0.5) * b, (k + 0.5) * c]  # type: ignore
2953            if random_rotation:
2954                while True:
2955                    op = SymmOp.from_origin_axis_angle(
2956                        (0, 0, 0),
2957                        axis=np.random.rand(3),
2958                        angle=random.uniform(-180, 180),
2959                    )
2960                    m = op.rotation_matrix
2961                    new_coords = np.dot(m, centered_coords.T).T + box_center
2962                    if no_cross:
2963                        x_max, x_min = max(new_coords[:, 0]), min(new_coords[:, 0])
2964                        y_max, y_min = max(new_coords[:, 1]), min(new_coords[:, 1])
2965                        z_max, z_min = max(new_coords[:, 2]), min(new_coords[:, 2])
2966                        if x_max > a or x_min < 0 or y_max > b or y_min < 0 or z_max > c or z_min < 0:
2967                            raise ValueError("Molecule crosses boundary of box.")
2968                    if len(all_coords) == 0:
2969                        break
2970                    distances = lattice.get_all_distances(
2971                        lattice.get_fractional_coords(new_coords),
2972                        lattice.get_fractional_coords(all_coords),
2973                    )
2974                    if np.amin(distances) > min_dist:
2975                        break
2976            else:
2977                new_coords = centered_coords + box_center
2978                if no_cross:
2979                    x_max, x_min = max(new_coords[:, 0]), min(new_coords[:, 0])
2980                    y_max, y_min = max(new_coords[:, 1]), min(new_coords[:, 1])
2981                    z_max, z_min = max(new_coords[:, 2]), min(new_coords[:, 2])
2982                    if x_max > a or x_min < 0 or y_max > b or y_min < 0 or z_max > c or z_min < 0:
2983                        raise ValueError("Molecule crosses boundary of box.")
2984            all_coords.extend(new_coords)
2985        sprops = {k: v * nimages for k, v in self.site_properties.items()}  # type: ignore
2986
2987        if cls is None:
2988            cls = Structure
2989
2990        if reorder:
2991            return cls(
2992                lattice,
2993                self.species * nimages,  # type: ignore
2994                all_coords,
2995                coords_are_cartesian=True,
2996                site_properties=sprops,
2997            ).get_sorted_structure()
2998
2999        return cls(
3000            lattice,
3001            self.species * nimages,  # type: ignore
3002            coords,
3003            coords_are_cartesian=True,
3004            site_properties=sprops,
3005        )
3006
3007    def get_centered_molecule(self) -> Union["IMolecule", "Molecule"]:
3008        """
3009        Returns a Molecule centered at the center of mass.
3010
3011        Returns:
3012            Molecule centered with center of mass at origin.
3013        """
3014        center = self.center_of_mass
3015        new_coords = np.array(self.cart_coords) - center
3016        return self.__class__(  # type: ignore
3017            self.species_and_occu,
3018            new_coords,  # type: ignore
3019            charge=self._charge,
3020            spin_multiplicity=self._spin_multiplicity,
3021            site_properties=self.site_properties,
3022        )
3023
3024    def to(self, fmt=None, filename=None):
3025        """
3026        Outputs the molecule to a file or string.
3027
3028        Args:
3029            fmt (str): Format to output to. Defaults to JSON unless filename
3030                is provided. If fmt is specifies, it overrides whatever the
3031                filename is. Options include "xyz", "gjf", "g03", "json". If
3032                you have OpenBabel installed, any of the formats supported by
3033                OpenBabel. Non-case sensitive.
3034            filename (str): If provided, output will be written to a file. If
3035                fmt is not specified, the format is determined from the
3036                filename. Defaults is None, i.e. string output.
3037
3038        Returns:
3039            (str) if filename is None. None otherwise.
3040        """
3041        from pymatgen.io.babel import BabelMolAdaptor
3042        from pymatgen.io.gaussian import GaussianInput
3043        from pymatgen.io.xyz import XYZ
3044
3045        fmt = "" if fmt is None else fmt.lower()
3046        fname = os.path.basename(filename or "")
3047        if fmt == "xyz" or fnmatch(fname.lower(), "*.xyz*"):
3048            writer = XYZ(self)
3049        elif any(fmt == r or fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"]):
3050            writer = GaussianInput(self)
3051        elif fmt == "json" or fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"):
3052            if filename:
3053                with zopen(filename, "wt", encoding="utf8") as f:
3054                    return json.dump(self.as_dict(), f)
3055            else:
3056                return json.dumps(self.as_dict())
3057        elif fmt == "yaml" or fnmatch(fname, "*.yaml*"):
3058            if filename:
3059                with zopen(fname, "wt", encoding="utf8") as f:
3060                    return yaml.safe_dump(self.as_dict(), f)
3061            else:
3062                return yaml.safe_dump(self.as_dict())
3063
3064        else:
3065            m = re.search(r"\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", fname.lower())
3066            if (not fmt) and m:
3067                fmt = m.group(1)
3068            writer = BabelMolAdaptor(self)
3069            return writer.write_file(filename, file_format=fmt)
3070
3071        if filename:
3072            writer.write_file(filename)
3073        return str(writer)
3074
3075    @classmethod
3076    def from_str(cls, input_string: str, fmt: str):
3077        """
3078        Reads the molecule from a string.
3079
3080        Args:
3081            input_string (str): String to parse.
3082            fmt (str): Format to output to. Defaults to JSON unless filename
3083                is provided. If fmt is specifies, it overrides whatever the
3084                filename is. Options include "xyz", "gjf", "g03", "json". If
3085                you have OpenBabel installed, any of the formats supported by
3086                OpenBabel. Non-case sensitive.
3087
3088        Returns:
3089            IMolecule or Molecule.
3090        """
3091        from pymatgen.io.gaussian import GaussianInput
3092        from pymatgen.io.xyz import XYZ
3093
3094        if fmt.lower() == "xyz":
3095            m = XYZ.from_string(input_string).molecule
3096        elif fmt in ["gjf", "g03", "g09", "com", "inp"]:
3097            m = GaussianInput.from_string(input_string).molecule
3098        elif fmt == "json":
3099            d = json.loads(input_string)
3100            return cls.from_dict(d)
3101        elif fmt == "yaml":
3102            d = yaml.safe_load(input_string)
3103            return cls.from_dict(d)
3104        else:
3105            from pymatgen.io.babel import BabelMolAdaptor
3106
3107            m = BabelMolAdaptor.from_string(input_string, file_format=fmt).pymatgen_mol
3108        return cls.from_sites(m)
3109
3110    @classmethod
3111    def from_file(cls, filename):
3112        """
3113        Reads a molecule from a file. Supported formats include xyz,
3114        gaussian input (gjf|g03|g09|com|inp), Gaussian output (.out|and
3115        pymatgen's JSON serialized molecules. Using openbabel,
3116        many more extensions are supported but requires openbabel to be
3117        installed.
3118
3119        Args:
3120            filename (str): The filename to read from.
3121
3122        Returns:
3123            Molecule
3124        """
3125        filename = str(filename)
3126        from pymatgen.io.gaussian import GaussianOutput
3127
3128        with zopen(filename) as f:
3129            contents = f.read()
3130        fname = filename.lower()
3131        if fnmatch(fname, "*.xyz*"):
3132            return cls.from_str(contents, fmt="xyz")
3133        if any(fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"]):
3134            return cls.from_str(contents, fmt="g09")
3135        if any(fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["out", "lis", "log"]):
3136            return GaussianOutput(filename).final_structure
3137        if fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"):
3138            return cls.from_str(contents, fmt="json")
3139        if fnmatch(fname, "*.yaml*"):
3140            return cls.from_str(contents, fmt="yaml")
3141        from pymatgen.io.babel import BabelMolAdaptor
3142
3143        m = re.search(r"\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower())
3144        if m:
3145            new = BabelMolAdaptor.from_file(filename, m.group(1)).pymatgen_mol
3146            new.__class__ = cls
3147            return new
3148        raise ValueError("Cannot determine file type.")
3149
3150
3151class Structure(IStructure, collections.abc.MutableSequence):
3152    """
3153    Mutable version of structure.
3154    """
3155
3156    __hash__ = None  # type: ignore
3157
3158    def __init__(
3159        self,
3160        lattice: Union[ArrayLike, Lattice],
3161        species: Sequence[CompositionLike],
3162        coords: Sequence[ArrayLike],
3163        charge: float = None,
3164        validate_proximity: bool = False,
3165        to_unit_cell: bool = False,
3166        coords_are_cartesian: bool = False,
3167        site_properties: dict = None,
3168    ):
3169        """
3170        Create a periodic structure.
3171
3172        Args:
3173            lattice: The lattice, either as a pymatgen.core.lattice.Lattice or
3174                simply as any 2D array. Each row should correspond to a lattice
3175                vector. E.g., [[10,0,0], [20,10,0], [0,0,30]] specifies a
3176                lattice with lattice vectors [10,0,0], [20,10,0] and [0,0,30].
3177            species: List of species on each site. Can take in flexible input,
3178                including:
3179
3180                i.  A sequence of element / species specified either as string
3181                    symbols, e.g. ["Li", "Fe2+", "P", ...] or atomic numbers,
3182                    e.g., (3, 56, ...) or actual Element or Species objects.
3183
3184                ii. List of dict of elements/species and occupancies, e.g.,
3185                    [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of
3186                    disordered structures.
3187            coords (Nx3 array): list of fractional/cartesian coordinates of
3188                each species.
3189            charge (int): overall charge of the structure. Defaults to behavior
3190                in SiteCollection where total charge is the sum of the oxidation
3191                states.
3192            validate_proximity (bool): Whether to check if there are sites
3193                that are less than 0.01 Ang apart. Defaults to False.
3194            to_unit_cell (bool): Whether to map all sites into the unit cell,
3195                i.e., fractional coords between 0 and 1. Defaults to False.
3196            coords_are_cartesian (bool): Set to True if you are providing
3197                coordinates in cartesian coordinates. Defaults to False.
3198            site_properties (dict): Properties associated with the sites as a
3199                dict of sequences, e.g., {"magmom":[5,5,5,5]}. The sequences
3200                have to be the same length as the atomic species and
3201                fractional_coords. Defaults to None for no properties.
3202        """
3203        super().__init__(
3204            lattice,
3205            species,
3206            coords,
3207            charge=charge,
3208            validate_proximity=validate_proximity,
3209            to_unit_cell=to_unit_cell,
3210            coords_are_cartesian=coords_are_cartesian,
3211            site_properties=site_properties,
3212        )
3213
3214        self._sites: List[PeriodicSite] = list(self._sites)  # type: ignore
3215
3216    def __setitem__(  # type: ignore
3217        self, i: Union[int, slice, Sequence[int], SpeciesLike], site: Union[SpeciesLike, PeriodicSite, Sequence]
3218    ):
3219        """
3220        Modify a site in the structure.
3221
3222        Args:
3223            i (int, [int], slice, Species-like): Indices to change. You can
3224                specify these as an int, a list of int, or a species-like
3225                string.
3226            site (PeriodicSite/Species/Sequence): Three options exist. You
3227                can provide a PeriodicSite directly (lattice will be
3228                checked). Or more conveniently, you can provide a
3229                specie-like object or a tuple of up to length 3.
3230
3231            Examples:
3232                s[0] = "Fe"
3233                s[0] = Element("Fe")
3234                both replaces the species only.
3235                s[0] = "Fe", [0.5, 0.5, 0.5]
3236                Replaces site and *fractional* coordinates. Any properties
3237                are inherited from current site.
3238                s[0] = "Fe", [0.5, 0.5, 0.5], {"spin": 2}
3239                Replaces site and *fractional* coordinates and properties.
3240
3241                s[(0, 2, 3)] = "Fe"
3242                Replaces sites 0, 2 and 3 with Fe.
3243
3244                s[0::2] = "Fe"
3245                Replaces all even index sites with Fe.
3246
3247                s["Mn"] = "Fe"
3248                Replaces all Mn in the structure with Fe. This is
3249                a short form for the more complex replace_species.
3250
3251                s["Mn"] = "Fe0.5Co0.5"
3252                Replaces all Mn in the structure with Fe: 0.5, Co: 0.5, i.e.,
3253                creates a disordered structure!
3254        """
3255
3256        if isinstance(i, int):
3257            indices = [i]
3258        elif isinstance(i, (str, Element, Species)):
3259            self.replace_species({i: site})  # type: ignore
3260            return
3261        elif isinstance(i, slice):
3262            to_mod = self[i]
3263            indices = [ii for ii, s in enumerate(self._sites) if s in to_mod]
3264        else:
3265            indices = list(i)
3266
3267        for ii in indices:
3268            if isinstance(site, PeriodicSite):
3269                if site.lattice != self._lattice:
3270                    raise ValueError("PeriodicSite added must have same lattice " "as Structure!")
3271                if len(indices) != 1:
3272                    raise ValueError("Site assignments makes sense only for " "single int indices!")
3273                self._sites[ii] = site  # type: ignore
3274            else:
3275                if isinstance(site, str) or (not isinstance(site, collections.abc.Sequence)):
3276                    self._sites[ii].species = site  # type: ignore
3277                else:
3278                    self._sites[ii].species = site[0]  # type: ignore
3279                    if len(site) > 1:
3280                        self._sites[ii].frac_coords = site[1]  # type: ignore
3281                    if len(site) > 2:
3282                        self._sites[ii].properties = site[2]  # type: ignore
3283
3284    def __delitem__(self, i):
3285        """
3286        Deletes a site from the Structure.
3287        """
3288        self._sites.__delitem__(i)
3289
3290    @property
3291    def lattice(self) -> Lattice:
3292        """
3293        :return: Lattice assciated with structure.
3294        """
3295        return self._lattice
3296
3297    @lattice.setter
3298    def lattice(self, lattice: Union[ArrayLike, Lattice]):
3299        if not isinstance(lattice, Lattice):
3300            lattice = Lattice(lattice)
3301        self._lattice = lattice
3302        for site in self._sites:
3303            site.lattice = lattice
3304
3305    def append(  # type: ignore
3306        self,
3307        species: CompositionLike,
3308        coords: ArrayLike,
3309        coords_are_cartesian: bool = False,
3310        validate_proximity: bool = False,
3311        properties: dict = None,
3312    ):
3313        """
3314        Append a site to the structure.
3315
3316        Args:
3317            species: Species of inserted site
3318            coords (3x1 array): Coordinates of inserted site
3319            coords_are_cartesian (bool): Whether coordinates are cartesian.
3320                Defaults to False.
3321            validate_proximity (bool): Whether to check if inserted site is
3322                too close to an existing site. Defaults to False.
3323            properties (dict): Properties of the site.
3324
3325        Returns:
3326            New structure with inserted site.
3327        """
3328        return self.insert(
3329            len(self),
3330            species,
3331            coords,
3332            coords_are_cartesian=coords_are_cartesian,
3333            validate_proximity=validate_proximity,
3334            properties=properties,
3335        )
3336
3337    def insert(  # type: ignore
3338        self,
3339        i: int,
3340        species: CompositionLike,
3341        coords: ArrayLike,
3342        coords_are_cartesian: bool = False,
3343        validate_proximity: bool = False,
3344        properties: dict = None,
3345    ):
3346        """
3347        Insert a site to the structure.
3348
3349        Args:
3350            i (int): Index to insert site
3351            species (species-like): Species of inserted site
3352            coords (3x1 array): Coordinates of inserted site
3353            coords_are_cartesian (bool): Whether coordinates are cartesian.
3354                Defaults to False.
3355            validate_proximity (bool): Whether to check if inserted site is
3356                too close to an existing site. Defaults to False.
3357            properties (dict): Properties associated with the site.
3358
3359        Returns:
3360            New structure with inserted site.
3361        """
3362        if not coords_are_cartesian:
3363            new_site = PeriodicSite(species, coords, self._lattice, properties=properties)
3364        else:
3365            frac_coords = self._lattice.get_fractional_coords(coords)
3366            new_site = PeriodicSite(species, frac_coords, self._lattice, properties=properties)
3367
3368        if validate_proximity:
3369            for site in self:
3370                if site.distance(new_site) < self.DISTANCE_TOLERANCE:
3371                    raise ValueError("New site is too close to an existing " "site!")
3372
3373        self._sites.insert(i, new_site)
3374
3375    def replace(
3376        self,
3377        i: int,
3378        species: CompositionLike,
3379        coords: ArrayLike = None,
3380        coords_are_cartesian: bool = False,
3381        properties: dict = None,
3382    ):
3383        """
3384        Replace a single site. Takes either a species or a dict of species and
3385        occupations.
3386
3387        Args:
3388            i (int): Index of the site in the _sites list.
3389            species (species-like): Species of replacement site
3390            coords (3x1 array): Coordinates of replacement site. If None,
3391                the current coordinates are assumed.
3392            coords_are_cartesian (bool): Whether coordinates are cartesian.
3393                Defaults to False.
3394            properties (dict): Properties associated with the site.
3395        """
3396        if coords is None:
3397            frac_coords = self[i].frac_coords
3398        elif coords_are_cartesian:
3399            frac_coords = self._lattice.get_fractional_coords(coords)
3400        else:
3401            frac_coords = coords  # type: ignore
3402
3403        new_site = PeriodicSite(species, frac_coords, self._lattice, properties=properties)
3404        self._sites[i] = new_site
3405
3406    def substitute(self, index: int, func_group: Union["IMolecule", "Molecule", str], bond_order: int = 1):
3407        """
3408        Substitute atom at index with a functional group.
3409
3410        Args:
3411            index (int): Index of atom to substitute.
3412            func_group: Substituent molecule. There are two options:
3413
3414                1. Providing an actual Molecule as the input. The first atom
3415                   must be a DummySpecies X, indicating the position of
3416                   nearest neighbor. The second atom must be the next
3417                   nearest atom. For example, for a methyl group
3418                   substitution, func_grp should be X-CH3, where X is the
3419                   first site and C is the second site. What the code will
3420                   do is to remove the index site, and connect the nearest
3421                   neighbor to the C atom in CH3. The X-C bond indicates the
3422                   directionality to connect the atoms.
3423                2. A string name. The molecule will be obtained from the
3424                   relevant template in func_groups.json.
3425            bond_order (int): A specified bond order to calculate the bond
3426                length between the attached functional group and the nearest
3427                neighbor site. Defaults to 1.
3428        """
3429
3430        # Find the nearest neighbor that is not a terminal atom.
3431        all_non_terminal_nn = []
3432        for nn, dist, _, _ in self.get_neighbors(self[index], 3):
3433            # Check that the nn has neighbors within a sensible distance but
3434            # is not the site being substituted.
3435            for inn, dist2, _, _ in self.get_neighbors(nn, 3):
3436                if inn != self[index] and dist2 < 1.2 * get_bond_length(nn.specie, inn.specie):
3437                    all_non_terminal_nn.append((nn, dist))
3438                    break
3439
3440        if len(all_non_terminal_nn) == 0:
3441            raise RuntimeError("Can't find a non-terminal neighbor to attach" " functional group to.")
3442
3443        non_terminal_nn = min(all_non_terminal_nn, key=lambda d: d[1])[0]
3444
3445        # Set the origin point to be the coordinates of the nearest
3446        # non-terminal neighbor.
3447        origin = non_terminal_nn.coords
3448
3449        # Pass value of functional group--either from user-defined or from
3450        # functional.json
3451        if not isinstance(func_group, Molecule):
3452            # Check to see whether the functional group is in database.
3453            if func_group not in FunctionalGroups:
3454                raise RuntimeError("Can't find functional group in list. " "Provide explicit coordinate instead")
3455            fgroup = FunctionalGroups[func_group]
3456        else:
3457            fgroup = func_group
3458
3459        # If a bond length can be found, modify func_grp so that the X-group
3460        # bond length is equal to the bond length.
3461        try:
3462            bl = get_bond_length(non_terminal_nn.specie, fgroup[1].specie, bond_order=bond_order)
3463        # Catches for case of incompatibility between Element(s) and Species(s)
3464        except TypeError:
3465            bl = None
3466
3467        if bl is not None:
3468            fgroup = fgroup.copy()
3469            vec = fgroup[0].coords - fgroup[1].coords
3470            vec /= np.linalg.norm(vec)
3471            fgroup[0] = "X", fgroup[1].coords + float(bl) * vec
3472
3473        # Align X to the origin.
3474        x = fgroup[0]
3475        fgroup.translate_sites(list(range(len(fgroup))), origin - x.coords)
3476
3477        # Find angle between the attaching bond and the bond to be replaced.
3478        v1 = fgroup[1].coords - origin
3479        v2 = self[index].coords - origin
3480        angle = get_angle(v1, v2)
3481
3482        if 1 < abs(angle % 180) < 179:
3483            # For angles which are not 0 or 180, we perform a rotation about
3484            # the origin along an axis perpendicular to both bonds to align
3485            # bonds.
3486            axis = np.cross(v1, v2)
3487            op = SymmOp.from_origin_axis_angle(origin, axis, angle)
3488            fgroup.apply_operation(op)
3489        elif abs(abs(angle) - 180) < 1:
3490            # We have a 180 degree angle. Simply do an inversion about the
3491            # origin
3492            for i, fg in enumerate(fgroup):
3493                fgroup[i] = (fg.species, origin - (fg.coords - origin))
3494
3495        # Remove the atom to be replaced, and add the rest of the functional
3496        # group.
3497        del self[index]
3498        for site in fgroup[1:]:
3499            s_new = PeriodicSite(site.species, site.coords, self.lattice, coords_are_cartesian=True)
3500            self._sites.append(s_new)
3501
3502    def remove_species(self, species: Sequence[SpeciesLike]):
3503        """
3504        Remove all occurrences of several species from a structure.
3505
3506        Args:
3507            species: Sequence of species to remove, e.g., ["Li", "Na"].
3508        """
3509        new_sites = []
3510        species = [get_el_sp(s) for s in species]
3511
3512        for site in self._sites:
3513            new_sp_occu = {sp: amt for sp, amt in site.species.items() if sp not in species}
3514            if len(new_sp_occu) > 0:
3515                new_sites.append(
3516                    PeriodicSite(
3517                        new_sp_occu,
3518                        site.frac_coords,
3519                        self._lattice,
3520                        properties=site.properties,
3521                    )
3522                )
3523        self._sites = new_sites
3524
3525    def remove_sites(self, indices: Sequence[int]):
3526        """
3527        Delete sites with at indices.
3528
3529        Args:
3530            indices: Sequence of indices of sites to delete.
3531        """
3532        self._sites = [s for i, s in enumerate(self._sites) if i not in indices]
3533
3534    def apply_operation(self, symmop: SymmOp, fractional: bool = False):
3535        """
3536        Apply a symmetry operation to the structure and return the new
3537        structure. The lattice is operated by the rotation matrix only.
3538        Coords are operated in full and then transformed to the new lattice.
3539
3540        Args:
3541            symmop (SymmOp): Symmetry operation to apply.
3542            fractional (bool): Whether the symmetry operation is applied in
3543                fractional space. Defaults to False, i.e., symmetry operation
3544                is applied in cartesian coordinates.
3545        """
3546        if not fractional:
3547            self._lattice = Lattice([symmop.apply_rotation_only(row) for row in self._lattice.matrix])
3548
3549            def operate_site(site):
3550                new_cart = symmop.operate(site.coords)
3551                new_frac = self._lattice.get_fractional_coords(new_cart)
3552                return PeriodicSite(
3553                    site.species,
3554                    new_frac,
3555                    self._lattice,
3556                    properties=site.properties,
3557                    skip_checks=True,
3558                )
3559
3560        else:
3561            new_latt = np.dot(symmop.rotation_matrix, self._lattice.matrix)
3562            self._lattice = Lattice(new_latt)
3563
3564            def operate_site(site):
3565                return PeriodicSite(
3566                    site.species,
3567                    symmop.operate(site.frac_coords),
3568                    self._lattice,
3569                    properties=site.properties,
3570                    skip_checks=True,
3571                )
3572
3573        self._sites = [operate_site(s) for s in self._sites]
3574
3575    def apply_strain(self, strain: ArrayLike):
3576        """
3577        Apply a strain to the lattice.
3578
3579        Args:
3580            strain (float or list): Amount of strain to apply. Can be a float,
3581                or a sequence of 3 numbers. E.g., 0.01 means all lattice
3582                vectors are increased by 1%. This is equivalent to calling
3583                modify_lattice with a lattice with lattice parameters that
3584                are 1% larger.
3585        """
3586        s = (1 + np.array(strain)) * np.eye(3)
3587        self.lattice = Lattice(np.dot(self._lattice.matrix.T, s).T)
3588
3589    def sort(self, key: Callable = None, reverse: bool = False):
3590        """
3591        Sort a structure in place. The parameters have the same meaning as in
3592        list.sort. By default, sites are sorted by the electronegativity of
3593        the species. The difference between this method and
3594        get_sorted_structure (which also works in IStructure) is that the
3595        latter returns a new Structure, while this just sorts the Structure
3596        in place.
3597
3598        Args:
3599            key: Specifies a function of one argument that is used to extract
3600                a comparison key from each list element: key=str.lower. The
3601                default value is None (compare the elements directly).
3602            reverse (bool): If set to True, then the list elements are sorted
3603                as if each comparison were reversed.
3604        """
3605        self._sites.sort(key=key, reverse=reverse)
3606
3607    def translate_sites(
3608        self, indices: Union[int, Sequence[int]], vector: ArrayLike, frac_coords: bool = True, to_unit_cell: bool = True
3609    ):
3610        """
3611        Translate specific sites by some vector, keeping the sites within the
3612        unit cell.
3613
3614        Args:
3615            indices: Integer or List of site indices on which to perform the
3616                translation.
3617            vector: Translation vector for sites.
3618            frac_coords (bool): Whether the vector corresponds to fractional or
3619                cartesian coordinates.
3620            to_unit_cell (bool): Whether new sites are transformed to unit
3621                cell
3622        """
3623        if not isinstance(indices, collections.abc.Iterable):
3624            indices = [indices]
3625
3626        for i in indices:
3627            site = self._sites[i]
3628            if frac_coords:
3629                fcoords = site.frac_coords + vector
3630            else:
3631                fcoords = self._lattice.get_fractional_coords(site.coords + vector)
3632            if to_unit_cell:
3633                fcoords = np.mod(fcoords, 1)
3634            self._sites[i].frac_coords = fcoords
3635
3636    def rotate_sites(
3637        self,
3638        indices: List[int] = None,
3639        theta: float = 0.0,
3640        axis: ArrayLike = None,
3641        anchor: ArrayLike = None,
3642        to_unit_cell: bool = True,
3643    ):
3644        """
3645        Rotate specific sites by some angle around vector at anchor.
3646
3647        Args:
3648            indices (list): List of site indices on which to perform the
3649                translation.
3650            theta (float): Angle in radians
3651            axis (3x1 array): Rotation axis vector.
3652            anchor (3x1 array): Point of rotation.
3653            to_unit_cell (bool): Whether new sites are transformed to unit
3654                cell
3655        """
3656
3657        from numpy import cross, eye
3658        from numpy.linalg import norm
3659        from scipy.linalg import expm
3660
3661        if indices is None:
3662            indices = list(range(len(self)))
3663
3664        if axis is None:
3665            axis = [0, 0, 1]
3666
3667        if anchor is None:
3668            anchor = [0, 0, 0]
3669
3670        anchor = np.array(anchor)
3671        axis = np.array(axis)
3672
3673        theta %= 2 * np.pi
3674
3675        rm = expm(cross(eye(3), axis / norm(axis)) * theta)
3676        for i in indices:
3677            site = self._sites[i]
3678            coords = ((np.dot(rm, np.array(site.coords - anchor).T)).T + anchor).ravel()
3679            new_site = PeriodicSite(
3680                site.species,
3681                coords,
3682                self._lattice,
3683                to_unit_cell=to_unit_cell,
3684                coords_are_cartesian=True,
3685                properties=site.properties,
3686                skip_checks=True,
3687            )
3688            self._sites[i] = new_site
3689
3690    def perturb(self, distance: float, min_distance: float = None):
3691        """
3692        Performs a random perturbation of the sites in a structure to break
3693        symmetries.
3694
3695        Args:
3696            distance (float): Distance in angstroms by which to perturb each
3697                site.
3698            min_distance (None, int, or float): if None, all displacements will
3699                be equal amplitude. If int or float, perturb each site a
3700                distance drawn from the uniform distribution between
3701                'min_distance' and 'distance'.
3702
3703        """
3704
3705        def get_rand_vec():
3706            # deals with zero vectors.
3707            vector = np.random.randn(3)
3708            vnorm = np.linalg.norm(vector)
3709            dist = distance
3710            if isinstance(min_distance, (float, int)):
3711                dist = np.random.uniform(min_distance, dist)
3712            return vector / vnorm * dist if vnorm != 0 else get_rand_vec()
3713
3714        for i in range(len(self._sites)):
3715            self.translate_sites([i], get_rand_vec(), frac_coords=False)
3716
3717    def make_supercell(self, scaling_matrix: ArrayLike, to_unit_cell: bool = True):
3718        """
3719        Create a supercell.
3720
3721        Args:
3722            scaling_matrix: A scaling matrix for transforming the lattice
3723                vectors. Has to be all integers. Several options are possible:
3724
3725                a. A full 3x3 scaling matrix defining the linear combination
3726                   the old lattice vectors. E.g., [[2,1,0],[0,3,0],[0,0,
3727                   1]] generates a new structure with lattice vectors a' =
3728                   2a + b, b' = 3b, c' = c where a, b, and c are the lattice
3729                   vectors of the original structure.
3730                b. An sequence of three scaling factors. E.g., [2, 1, 1]
3731                   specifies that the supercell should have dimensions 2a x b x
3732                   c.
3733                c. A number, which simply scales all lattice vectors by the
3734                   same factor.
3735            to_unit_cell: Whether or not to fall back sites into the unit cell
3736        """
3737        s = self * scaling_matrix
3738        if to_unit_cell:
3739            for site in s:
3740                site.to_unit_cell(in_place=True)
3741        self._sites = s.sites
3742        self._lattice = s.lattice
3743
3744    def scale_lattice(self, volume: float):
3745        """
3746        Performs a scaling of the lattice vectors so that length proportions
3747        and angles are preserved.
3748
3749        Args:
3750            volume (float): New volume of the unit cell in A^3.
3751        """
3752        self.lattice = self._lattice.scale(volume)
3753
3754    def merge_sites(self, tol: float = 0.01, mode: Literal["sum", "delete", "average"] = "sum"):
3755        """
3756        Merges sites (adding occupancies) within tol of each other.
3757        Removes site properties.
3758
3759        Args:
3760            tol (float): Tolerance for distance to merge sites.
3761            mode ('sum' | 'delete' | 'average'): "delete" means duplicate sites are
3762                deleted. "sum" means the occupancies are summed for the sites.
3763                "average" means that the site is deleted but the properties are averaged
3764                Only first letter is considered.
3765
3766        """
3767        from scipy.cluster.hierarchy import fcluster, linkage
3768        from scipy.spatial.distance import squareform
3769
3770        d = self.distance_matrix
3771        np.fill_diagonal(d, 0)
3772        clusters = fcluster(linkage(squareform((d + d.T) / 2)), tol, "distance")
3773        sites = []
3774        for c in np.unique(clusters):
3775            inds = np.where(clusters == c)[0]
3776            species = self[inds[0]].species
3777            coords = self[inds[0]].frac_coords
3778            props = self[inds[0]].properties
3779            for n, i in enumerate(inds[1:]):
3780                sp = self[i].species
3781                if mode.lower()[0] == "s":
3782                    species += sp
3783                offset = self[i].frac_coords - coords
3784                coords = coords + ((offset - np.round(offset)) / (n + 2)).astype(coords.dtype)
3785                for key in props.keys():
3786                    if props[key] is not None and self[i].properties[key] != props[key]:
3787                        if mode.lower()[0] == "a" and isinstance(props[key], float):
3788                            # update a running total
3789                            props[key] = props[key] * (n + 1) / (n + 2) + self[i].properties[key] / (n + 2)
3790                        else:
3791                            props[key] = None
3792                            warnings.warn(
3793                                "Sites with different site property %s are merged. " "So property is set to none" % key
3794                            )
3795            sites.append(PeriodicSite(species, coords, self.lattice, properties=props))
3796
3797        self._sites = sites
3798
3799    def set_charge(self, new_charge: float = 0.0):
3800        """
3801        Sets the overall structure charge
3802
3803        Args:
3804            new_charge (float): new charge to set
3805        """
3806        self._charge = new_charge
3807
3808
3809class Molecule(IMolecule, collections.abc.MutableSequence):
3810    """
3811    Mutable Molecule. It has all the methods in IMolecule, but in addition,
3812    it allows a user to perform edits on the molecule.
3813    """
3814
3815    __hash__ = None  # type: ignore
3816
3817    def __init__(
3818        self,
3819        species: Sequence[SpeciesLike],
3820        coords: Sequence[ArrayLike],
3821        charge: float = 0.0,
3822        spin_multiplicity: float = None,
3823        validate_proximity: bool = False,
3824        site_properties: dict = None,
3825    ):
3826        """
3827        Creates a MutableMolecule.
3828
3829        Args:
3830            species: list of atomic species. Possible kinds of input include a
3831                list of dict of elements/species and occupancies, a List of
3832                elements/specie specified as actual Element/Species, Strings
3833                ("Fe", "Fe2+") or atomic numbers (1,56).
3834            coords (3x1 array): list of cartesian coordinates of each species.
3835            charge (float): Charge for the molecule. Defaults to 0.
3836            spin_multiplicity (int): Spin multiplicity for molecule.
3837                Defaults to None, which means that the spin multiplicity is
3838                set to 1 if the molecule has no unpaired electrons and to 2
3839                if there are unpaired electrons.
3840            validate_proximity (bool): Whether to check if there are sites
3841                that are less than 1 Ang apart. Defaults to False.
3842            site_properties (dict): Properties associated with the sites as
3843                a dict of sequences, e.g., {"magmom":[5,5,5,5]}. The
3844                sequences have to be the same length as the atomic species
3845                and fractional_coords. Defaults to None for no properties.
3846        """
3847        super().__init__(
3848            species,
3849            coords,
3850            charge=charge,
3851            spin_multiplicity=spin_multiplicity,
3852            validate_proximity=validate_proximity,
3853            site_properties=site_properties,
3854        )
3855        self._sites: List[Site] = list(self._sites)  # type: ignore
3856
3857    def __setitem__(  # type: ignore
3858        self, i: Union[int, slice, Sequence[int], SpeciesLike], site: Union[SpeciesLike, Site, Sequence]
3859    ):
3860        """
3861        Modify a site in the molecule.
3862
3863        Args:
3864            i (int, [int], slice, Species-like): Indices to change. You can
3865                specify these as an int, a list of int, or a species-like
3866                string.
3867            site (PeriodicSite/Species/Sequence): Three options exist. You can
3868                provide a Site directly, or for convenience, you can provide
3869                simply a Species-like string/object, or finally a (Species,
3870                coords) sequence, e.g., ("Fe", [0.5, 0.5, 0.5]).
3871        """
3872
3873        if isinstance(i, int):
3874            indices = [i]
3875        elif isinstance(i, (str, Element, Species)):
3876            self.replace_species({i: site})  # type: ignore
3877            return
3878        elif isinstance(i, slice):
3879            to_mod = self[i]
3880            indices = [ii for ii, s in enumerate(self._sites) if s in to_mod]
3881        else:
3882            indices = list(i)
3883
3884        for ii in indices:
3885            if isinstance(site, Site):
3886                self._sites[ii] = site
3887            else:
3888                if isinstance(site, str) or (not isinstance(site, collections.abc.Sequence)):
3889                    self._sites[ii].species = site  # type: ignore
3890                else:
3891                    self._sites[ii].species = site[0]  # type: ignore
3892                    if len(site) > 1:
3893                        self._sites[ii].coords = site[1]  # type: ignore
3894                    if len(site) > 2:
3895                        self._sites[ii].properties = site[2]  # type: ignore
3896
3897    def __delitem__(self, i):
3898        """
3899        Deletes a site from the Structure.
3900        """
3901        self._sites.__delitem__(i)
3902
3903    def append(  # type: ignore
3904        self,
3905        species: CompositionLike,
3906        coords: ArrayLike,
3907        validate_proximity: bool = False,
3908        properties: dict = None,
3909    ):
3910        """
3911        Appends a site to the molecule.
3912
3913        Args:
3914            species: Species of inserted site
3915            coords: Coordinates of inserted site
3916            validate_proximity (bool): Whether to check if inserted site is
3917                too close to an existing site. Defaults to False.
3918            properties (dict): A dict of properties for the Site.
3919
3920        Returns:
3921            New molecule with inserted site.
3922        """
3923        return self.insert(
3924            len(self),
3925            species,
3926            coords,
3927            validate_proximity=validate_proximity,
3928            properties=properties,
3929        )
3930
3931    def set_charge_and_spin(self, charge: float, spin_multiplicity: Optional[float] = None):
3932        """
3933        Set the charge and spin multiplicity.
3934
3935        Args:
3936            charge (int): Charge for the molecule. Defaults to 0.
3937            spin_multiplicity (int): Spin multiplicity for molecule.
3938                Defaults to None, which means that the spin multiplicity is
3939                set to 1 if the molecule has no unpaired electrons and to 2
3940                if there are unpaired electrons.
3941        """
3942        self._charge = charge
3943        nelectrons = 0.0
3944        for site in self._sites:
3945            for sp, amt in site.species.items():
3946                if not isinstance(sp, DummySpecies):
3947                    nelectrons += sp.Z * amt
3948        nelectrons -= charge
3949        self._nelectrons = nelectrons
3950        if spin_multiplicity:
3951            if (nelectrons + spin_multiplicity) % 2 != 1:
3952                raise ValueError(
3953                    "Charge of {} and spin multiplicity of {} is"
3954                    " not possible for this molecule".format(self._charge, spin_multiplicity)
3955                )
3956            self._spin_multiplicity = spin_multiplicity
3957        else:
3958            self._spin_multiplicity = 1 if nelectrons % 2 == 0 else 2
3959
3960    def insert(  # type: ignore
3961        self,
3962        i: int,
3963        species: CompositionLike,
3964        coords: ArrayLike,
3965        validate_proximity: bool = False,
3966        properties: dict = None,
3967    ):
3968        """
3969        Insert a site to the molecule.
3970
3971        Args:
3972            i (int): Index to insert site
3973            species: species of inserted site
3974            coords (3x1 array): coordinates of inserted site
3975            validate_proximity (bool): Whether to check if inserted site is
3976                too close to an existing site. Defaults to True.
3977            properties (dict): Dict of properties for the Site.
3978
3979        Returns:
3980            New molecule with inserted site.
3981        """
3982        new_site = Site(species, coords, properties=properties)
3983        if validate_proximity:
3984            for site in self:
3985                if site.distance(new_site) < self.DISTANCE_TOLERANCE:
3986                    raise ValueError("New site is too close to an existing " "site!")
3987        self._sites.insert(i, new_site)
3988
3989    def remove_species(self, species: Sequence[SpeciesLike]):
3990        """
3991        Remove all occurrences of a species from a molecule.
3992
3993        Args:
3994            species: Species to remove.
3995        """
3996        new_sites = []
3997        species = [get_el_sp(sp) for sp in species]
3998        for site in self._sites:
3999            new_sp_occu = {sp: amt for sp, amt in site.species.items() if sp not in species}
4000            if len(new_sp_occu) > 0:
4001                new_sites.append(Site(new_sp_occu, site.coords, properties=site.properties))
4002        self._sites = new_sites
4003
4004    def remove_sites(self, indices: Sequence[int]):
4005        """
4006        Delete sites with at indices.
4007
4008        Args:
4009            indices: Sequence of indices of sites to delete.
4010        """
4011        self._sites = [self._sites[i] for i in range(len(self._sites)) if i not in indices]
4012
4013    def translate_sites(self, indices: Sequence[int] = None, vector: ArrayLike = None):
4014        """
4015        Translate specific sites by some vector, keeping the sites within the
4016        unit cell.
4017
4018        Args:
4019            indices (list): List of site indices on which to perform the
4020                translation.
4021            vector (3x1 array): Translation vector for sites.
4022        """
4023        if indices is None:
4024            indices = range(len(self))
4025        if vector is None:
4026            vector = [0, 0, 0]
4027        for i in indices:
4028            site = self._sites[i]
4029            new_site = Site(site.species, site.coords + vector, properties=site.properties)  # type: ignore
4030            self._sites[i] = new_site
4031
4032    def rotate_sites(
4033        self, indices: Sequence[int] = None, theta: float = 0.0, axis: ArrayLike = None, anchor: ArrayLike = None
4034    ):
4035        """
4036        Rotate specific sites by some angle around vector at anchor.
4037
4038        Args:
4039            indices (list): List of site indices on which to perform the
4040                translation.
4041            theta (float): Angle in radians
4042            axis (3x1 array): Rotation axis vector.
4043            anchor (3x1 array): Point of rotation.
4044        """
4045
4046        from numpy import cross, eye
4047        from numpy.linalg import norm
4048        from scipy.linalg import expm
4049
4050        if indices is None:
4051            indices = range(len(self))
4052
4053        if axis is None:
4054            axis = [0, 0, 1]
4055
4056        if anchor is None:
4057            anchor = [0, 0, 0]
4058
4059        anchor = np.array(anchor)
4060        axis = np.array(axis)
4061
4062        theta %= 2 * np.pi
4063
4064        rm = expm(cross(eye(3), axis / norm(axis)) * theta)
4065
4066        for i in indices:
4067            site = self._sites[i]
4068            s = ((np.dot(rm, (site.coords - anchor).T)).T + anchor).ravel()
4069            new_site = Site(site.species, s, properties=site.properties)
4070            self._sites[i] = new_site
4071
4072    def perturb(self, distance: float):
4073        """
4074        Performs a random perturbation of the sites in a structure to break
4075        symmetries.
4076
4077        Args:
4078            distance (float): Distance in angstroms by which to perturb each
4079                site.
4080        """
4081
4082        def get_rand_vec():
4083            # deals with zero vectors.
4084            vector = np.random.randn(3)
4085            vnorm = np.linalg.norm(vector)
4086            return vector / vnorm * distance if vnorm != 0 else get_rand_vec()
4087
4088        for i in range(len(self._sites)):
4089            self.translate_sites([i], get_rand_vec())
4090
4091    def apply_operation(self, symmop: SymmOp):
4092        """
4093        Apply a symmetry operation to the molecule.
4094
4095        Args:
4096            symmop (SymmOp): Symmetry operation to apply.
4097        """
4098
4099        def operate_site(site):
4100            new_cart = symmop.operate(site.coords)
4101            return Site(site.species, new_cart, properties=site.properties)
4102
4103        self._sites = [operate_site(s) for s in self._sites]
4104
4105    def copy(self):
4106        """
4107        Convenience method to get a copy of the molecule.
4108
4109        Returns:
4110            A copy of the Molecule.
4111        """
4112        return self.__class__.from_sites(self)
4113
4114    def substitute(self, index: int, func_group: Union["IMolecule", "Molecule", str], bond_order: int = 1):
4115        """
4116        Substitute atom at index with a functional group.
4117
4118        Args:
4119            index (int): Index of atom to substitute.
4120            func_grp: Substituent molecule. There are two options:
4121
4122                1. Providing an actual molecule as the input. The first atom
4123                   must be a DummySpecies X, indicating the position of
4124                   nearest neighbor. The second atom must be the next
4125                   nearest atom. For example, for a methyl group
4126                   substitution, func_grp should be X-CH3, where X is the
4127                   first site and C is the second site. What the code will
4128                   do is to remove the index site, and connect the nearest
4129                   neighbor to the C atom in CH3. The X-C bond indicates the
4130                   directionality to connect the atoms.
4131                2. A string name. The molecule will be obtained from the
4132                   relevant template in func_groups.json.
4133            bond_order (int): A specified bond order to calculate the bond
4134                length between the attached functional group and the nearest
4135                neighbor site. Defaults to 1.
4136        """
4137
4138        # Find the nearest neighbor that is not a terminal atom.
4139        all_non_terminal_nn = []
4140        for nn in self.get_neighbors(self[index], 3):
4141            # Check that the nn has neighbors within a sensible distance but
4142            # is not the site being substituted.
4143            for nn2 in self.get_neighbors(nn, 3):
4144                if nn2 != self[index] and nn2.nn_distance < 1.2 * get_bond_length(nn.specie, nn2.specie):
4145                    all_non_terminal_nn.append(nn)
4146                    break
4147
4148        if len(all_non_terminal_nn) == 0:
4149            raise RuntimeError("Can't find a non-terminal neighbor to attach" " functional group to.")
4150
4151        non_terminal_nn = min(all_non_terminal_nn, key=lambda nn: nn.nn_distance)
4152
4153        # Set the origin point to be the coordinates of the nearest
4154        # non-terminal neighbor.
4155        origin = non_terminal_nn.coords
4156
4157        # Pass value of functional group--either from user-defined or from
4158        # functional.json
4159        if isinstance(func_group, Molecule):
4160            func_grp = func_group
4161        else:
4162            # Check to see whether the functional group is in database.
4163            if func_group not in FunctionalGroups:
4164                raise RuntimeError("Can't find functional group in list. " "Provide explicit coordinate instead")
4165            func_grp = FunctionalGroups[func_group]
4166
4167        # If a bond length can be found, modify func_grp so that the X-group
4168        # bond length is equal to the bond length.
4169        bl = get_bond_length(non_terminal_nn.specie, func_grp[1].specie, bond_order=bond_order)
4170        if bl is not None:
4171            func_grp = func_grp.copy()
4172            vec = func_grp[0].coords - func_grp[1].coords
4173            vec /= np.linalg.norm(vec)
4174            func_grp[0] = "X", func_grp[1].coords + float(bl) * vec
4175
4176        # Align X to the origin.
4177        x = func_grp[0]
4178        func_grp.translate_sites(list(range(len(func_grp))), origin - x.coords)
4179
4180        # Find angle between the attaching bond and the bond to be replaced.
4181        v1 = func_grp[1].coords - origin
4182        v2 = self[index].coords - origin
4183        angle = get_angle(v1, v2)
4184
4185        if 1 < abs(angle % 180) < 179:
4186            # For angles which are not 0 or 180, we perform a rotation about
4187            # the origin along an axis perpendicular to both bonds to align
4188            # bonds.
4189            axis = np.cross(v1, v2)
4190            op = SymmOp.from_origin_axis_angle(origin, axis, angle)
4191            func_grp.apply_operation(op)
4192        elif abs(abs(angle) - 180) < 1:
4193            # We have a 180 degree angle. Simply do an inversion about the
4194            # origin
4195            for i, fg in enumerate(func_grp):
4196                func_grp[i] = (fg.species, origin - (fg.coords - origin))
4197
4198        # Remove the atom to be replaced, and add the rest of the functional
4199        # group.
4200        del self[index]
4201        for site in func_grp[1:]:
4202            self._sites.append(site)
4203
4204
4205class StructureError(Exception):
4206    """
4207    Exception class for Structure.
4208    Raised when the structure has problems, e.g., atoms that are too close.
4209    """
4210
4211    pass
4212
4213
4214with open(os.path.join(os.path.dirname(__file__), "func_groups.json"), "rt") as f:
4215    FunctionalGroups = {k: Molecule(v["species"], v["coords"]) for k, v in json.load(f).items()}
4216