1# coding: utf-8
2# Copyright (c) Pymatgen Development Team.
3# Distributed under the terms of the MIT License.
4
5"""
6Wrapper classes for Cif input and output from Structures.
7"""
8
9import math
10import os
11import re
12import textwrap
13import warnings
14from collections import OrderedDict, deque
15from functools import partial
16from inspect import getfullargspec as getargspec
17from io import StringIO
18from itertools import groupby
19from pathlib import Path
20
21import numpy as np
22from monty.io import zopen
23from monty.string import remove_non_ascii
24
25from pymatgen.core.composition import Composition
26from pymatgen.core.lattice import Lattice
27from pymatgen.core.operations import MagSymmOp, SymmOp
28from pymatgen.core.periodic_table import DummySpecies, Element, Species, get_el_sp
29from pymatgen.core.structure import Structure
30from pymatgen.electronic_structure.core import Magmom
31from pymatgen.symmetry.analyzer import SpacegroupAnalyzer, SpacegroupOperations
32from pymatgen.symmetry.groups import SYMM_DATA, SpaceGroup
33from pymatgen.symmetry.maggroups import MagneticSpaceGroup
34from pymatgen.symmetry.structure import SymmetrizedStructure
35from pymatgen.util.coord import find_in_coord_list_pbc, in_coord_list_pbc
36
37__author__ = "Shyue Ping Ong, Will Richards, Matthew Horton"
38
39sub_spgrp = partial(re.sub, r"[\s_]", "")
40
41space_groups = {sub_spgrp(k): k for k in SYMM_DATA["space_group_encoding"].keys()}  # type: ignore
42
43space_groups.update({sub_spgrp(k): k for k in SYMM_DATA["space_group_encoding"].keys()})  # type: ignore
44
45_COD_DATA = None
46
47
48def _get_cod_data():
49    global _COD_DATA
50    if _COD_DATA is None:
51        import pymatgen
52
53        with open(os.path.join(pymatgen.symmetry.__path__[0], "symm_ops.json")) as f:
54            import json
55
56            _COD_DATA = json.load(f)
57
58    return _COD_DATA
59
60
61class CifBlock:
62    """
63    Object for storing cif data. All data is stored in a single dictionary.
64    Data inside loops are stored in lists in the data dictionary, and
65    information on which keys are grouped together are stored in the loops
66    attribute.
67    """
68
69    maxlen = 70  # not quite 80 so we can deal with semicolons and things
70
71    def __init__(self, data, loops, header):
72        """
73        Args:
74            data: dict or OrderedDict of data to go into the cif. Values should
75                    be convertible to string, or lists of these if the key is
76                    in a loop
77            loops: list of lists of keys, grouped by which loop they should
78                    appear in
79            header: name of the block (appears after the data_ on the first
80                line)
81        """
82        self.loops = loops
83        self.data = data
84        # AJ says: CIF Block names cannot be more than 75 characters or you
85        # get an Exception
86        self.header = header[:74]
87
88    def __eq__(self, other):
89        return self.loops == other.loops and self.data == other.data and self.header == other.header
90
91    def __getitem__(self, key):
92        return self.data[key]
93
94    def __str__(self):
95        """
96        Returns the cif string for the data block
97        """
98        s = ["data_{}".format(self.header)]
99        keys = self.data.keys()
100        written = []
101        for k in keys:
102            if k in written:
103                continue
104            for l in self.loops:
105                # search for a corresponding loop
106                if k in l:
107                    s.append(self._loop_to_string(l))
108                    written.extend(l)
109                    break
110            if k not in written:
111                # k didn't belong to a loop
112                v = self._format_field(self.data[k])
113                if len(k) + len(v) + 3 < self.maxlen:
114                    s.append("{}   {}".format(k, v))
115                else:
116                    s.extend([k, v])
117        return "\n".join(s)
118
119    def _loop_to_string(self, loop):
120        s = "loop_"
121        for l in loop:
122            s += "\n " + l
123        for fields in zip(*[self.data[k] for k in loop]):
124            line = "\n"
125            for val in map(self._format_field, fields):
126                if val[0] == ";":
127                    s += line + "\n" + val
128                    line = "\n"
129                elif len(line) + len(val) + 2 < self.maxlen:
130                    line += "  " + val
131                else:
132                    s += line
133                    line = "\n  " + val
134            s += line
135        return s
136
137    def _format_field(self, v):
138        v = v.__str__().strip()
139        if len(v) > self.maxlen:
140            return ";\n" + textwrap.fill(v, self.maxlen) + "\n;"
141        # add quotes if necessary
142        if v == "":
143            return '""'
144        if (" " in v or v[0] == "_") and not (v[0] == "'" and v[-1] == "'") and not (v[0] == '"' and v[-1] == '"'):
145            if "'" in v:
146                q = '"'
147            else:
148                q = "'"
149            v = q + v + q
150        return v
151
152    @classmethod
153    def _process_string(cls, string):
154        # remove comments
155        string = re.sub(r"(\s|^)#.*$", "", string, flags=re.MULTILINE)
156        # remove empty lines
157        string = re.sub(r"^\s*\n", "", string, flags=re.MULTILINE)
158        # remove non_ascii
159        string = remove_non_ascii(string)
160        # since line breaks in .cif files are mostly meaningless,
161        # break up into a stream of tokens to parse, rejoining multiline
162        # strings (between semicolons)
163        q = deque()
164        multiline = False
165        ml = []
166        # this regex splits on spaces, except when in quotes.
167        # starting quotes must not be preceded by non-whitespace
168        # (these get eaten by the first expression)
169        # ending quotes must not be followed by non-whitespace
170        p = re.compile(r"""([^'"\s][\S]*)|'(.*?)'(?!\S)|"(.*?)"(?!\S)""")
171        for l in string.splitlines():
172            if multiline:
173                if l.startswith(";"):
174                    multiline = False
175                    q.append(("", "", "", " ".join(ml)))
176                    ml = []
177                    l = l[1:].strip()
178                else:
179                    ml.append(l)
180                    continue
181            if l.startswith(";"):
182                multiline = True
183                ml.append(l[1:].strip())
184            else:
185                for s in p.findall(l):
186                    # s is tuple. location of the data in the tuple
187                    # depends on whether it was quoted in the input
188                    q.append(s)
189        return q
190
191    @classmethod
192    def from_string(cls, string):
193        """
194        Reads CifBlock from string.
195
196        :param string: String representation.
197        :return: CifBlock
198        """
199        q = cls._process_string(string)
200        header = q.popleft()[0][5:]
201        data = OrderedDict()
202        loops = []
203        while q:
204            s = q.popleft()
205            # cif keys aren't in quotes, so show up in s[0]
206            if s[0] == "_eof":
207                break
208            if s[0].startswith("_"):
209                try:
210                    data[s[0]] = "".join(q.popleft())
211                except IndexError:
212                    data[s[0]] = ""
213            elif s[0].startswith("loop_"):
214                columns = []
215                items = []
216                while q:
217                    s = q[0]
218                    if s[0].startswith("loop_") or not s[0].startswith("_"):
219                        break
220                    columns.append("".join(q.popleft()))
221                    data[columns[-1]] = []
222                while q:
223                    s = q[0]
224                    if s[0].startswith("loop_") or s[0].startswith("_"):
225                        break
226                    items.append("".join(q.popleft()))
227                n = len(items) // len(columns)
228                assert len(items) % n == 0
229                loops.append(columns)
230                for k, v in zip(columns * n, items):
231                    data[k].append(v.strip())
232            elif "".join(s).strip() != "":
233                warnings.warn("Possible issue in cif file" " at line: {}".format("".join(s).strip()))
234        return cls(data, loops, header)
235
236
237class CifFile:
238    """
239    Reads and parses CifBlocks from a .cif file or string
240    """
241
242    def __init__(self, data, orig_string=None, comment=None):
243        """
244        Args:
245            data (OrderedDict): Of CifBlock objects.å
246            orig_string (str): The original cif string.
247            comment (str): Comment string.
248        """
249        self.data = data
250        self.orig_string = orig_string
251        self.comment = comment or "# generated using pymatgen"
252
253    def __str__(self):
254        s = ["%s" % v for v in self.data.values()]
255        return self.comment + "\n" + "\n".join(s) + "\n"
256
257    @classmethod
258    def from_string(cls, string):
259        """
260        Reads CifFile from a string.
261
262        :param string: String representation.
263        :return: CifFile
264        """
265        d = OrderedDict()
266        for x in re.split(r"^\s*data_", "x\n" + string, flags=re.MULTILINE | re.DOTALL)[1:]:
267
268            # Skip over Cif block that contains powder diffraction data.
269            # Some elements in this block were missing from CIF files in
270            # Springer materials/Pauling file DBs.
271            # This block anyway does not contain any structure information, and
272            # CifParser was also not parsing it.
273            if "powder_pattern" in re.split(r"\n", x, 1)[0]:
274                continue
275            c = CifBlock.from_string("data_" + x)
276            d[c.header] = c
277        return cls(d, string)
278
279    @classmethod
280    def from_file(cls, filename):
281        """
282        Reads CifFile from a filename.
283
284        :param filename: Filename
285        :return: CifFile
286        """
287        with zopen(str(filename), "rt", errors="replace") as f:
288            return cls.from_string(f.read())
289
290
291class CifParser:
292    """
293    Parses a CIF file. Attempts to fix CIFs that are out-of-spec, but will
294    issue warnings if corrections applied. These are also stored in the
295    CifParser's errors attribute.
296    """
297
298    def __init__(self, filename, occupancy_tolerance=1.0, site_tolerance=1e-4):
299        """
300        Args:
301            filename (str): CIF filename, bzipped or gzipped CIF files are fine too.
302            occupancy_tolerance (float): If total occupancy of a site is between 1
303                and occupancy_tolerance, the occupancies will be scaled down to 1.
304            site_tolerance (float): This tolerance is used to determine if two
305                sites are sitting in the same position, in which case they will be
306                combined to a single disordered site. Defaults to 1e-4.
307        """
308        self._occupancy_tolerance = occupancy_tolerance
309        self._site_tolerance = site_tolerance
310        if isinstance(filename, (str, Path)):
311            self._cif = CifFile.from_file(filename)
312        else:
313            self._cif = CifFile.from_string(filename.read())
314
315        # store if CIF contains features from non-core CIF dictionaries
316        # e.g. magCIF
317        self.feature_flags = {}
318        self.warnings = []
319
320        def is_magcif():
321            """
322            Checks to see if file appears to be a magCIF file (heuristic).
323            """
324            # Doesn't seem to be a canonical way to test if file is magCIF or
325            # not, so instead check for magnetic symmetry datanames
326            prefixes = [
327                "_space_group_magn",
328                "_atom_site_moment",
329                "_space_group_symop_magn",
330            ]
331            for d in self._cif.data.values():
332                for k in d.data.keys():
333                    for prefix in prefixes:
334                        if prefix in k:
335                            return True
336            return False
337
338        self.feature_flags["magcif"] = is_magcif()
339
340        def is_magcif_incommensurate():
341            """
342            Checks to see if file contains an incommensurate magnetic
343            structure (heuristic).
344            """
345            # Doesn't seem to be a canonical way to test if magCIF file
346            # describes incommensurate strucure or not, so instead check
347            # for common datanames
348            if not self.feature_flags["magcif"]:
349                return False
350            prefixes = ["_cell_modulation_dimension", "_cell_wave_vector"]
351            for d in self._cif.data.values():
352                for k in d.data.keys():
353                    for prefix in prefixes:
354                        if prefix in k:
355                            return True
356            return False
357
358        self.feature_flags["magcif_incommensurate"] = is_magcif_incommensurate()
359
360        for k in self._cif.data.keys():
361            # pass individual CifBlocks to _sanitize_data
362            self._cif.data[k] = self._sanitize_data(self._cif.data[k])
363
364    @staticmethod
365    def from_string(cif_string, occupancy_tolerance=1.0):
366        """
367        Creates a CifParser from a string.
368
369        Args:
370            cif_string (str): String representation of a CIF.
371            occupancy_tolerance (float): If total occupancy of a site is
372                between 1 and occupancy_tolerance, the occupancies will be
373                scaled down to 1.
374
375        Returns:
376            CifParser
377        """
378        stream = StringIO(cif_string)
379        return CifParser(stream, occupancy_tolerance)
380
381    def _sanitize_data(self, data):
382        """
383        Some CIF files do not conform to spec. This function corrects
384        known issues, particular in regards to Springer materials/
385        Pauling files.
386
387        This function is here so that CifParser can assume its
388        input conforms to spec, simplifying its implementation.
389        :param data: CifBlock
390        :return: data CifBlock
391        """
392
393        """
394        This part of the code deals with handling formats of data as found in
395        CIF files extracted from the Springer Materials/Pauling File
396        databases, and that are different from standard ICSD formats.
397        """
398
399        # check for implicit hydrogens, warn if any present
400        if "_atom_site_attached_hydrogens" in data.data.keys():
401            attached_hydrogens = [str2float(x) for x in data.data["_atom_site_attached_hydrogens"] if str2float(x) != 0]
402            if len(attached_hydrogens) > 0:
403                self.warnings.append(
404                    "Structure has implicit hydrogens defined, "
405                    "parsed structure unlikely to be suitable for use "
406                    "in calculations unless hydrogens added."
407                )
408
409        # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do
410        # not contain this key.
411        if "_atom_site_type_symbol" in data.data.keys():
412
413            # Keep a track of which data row needs to be removed.
414            # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14
415            # 'rhombic dodecahedron, Nb<sub>14</sub>'
416            # Without this code, the above row in a structure would be parsed
417            # as an ordered site with only Nb (since
418            # CifParser would try to parse the first two characters of the
419            # label "Nb,Zr") and occupancy=1.
420            # However, this site is meant to be a disordered site with 0.8 of
421            # Nb and 0.2 of Zr.
422            idxs_to_remove = []
423
424            new_atom_site_label = []
425            new_atom_site_type_symbol = []
426            new_atom_site_occupancy = []
427            new_fract_x = []
428            new_fract_y = []
429            new_fract_z = []
430
431            for idx, el_row in enumerate(data["_atom_site_label"]):
432
433                # CIF files from the Springer Materials/Pauling File have
434                # switched the label and symbol. Thus, in the
435                # above shown example row, '0.8Nb + 0.2Zr' is the symbol.
436                # Below, we split the strings on ' + ' to
437                # check if the length (or number of elements) in the label and
438                # symbol are equal.
439                if len(data["_atom_site_type_symbol"][idx].split(" + ")) > len(
440                    data["_atom_site_label"][idx].split(" + ")
441                ):
442
443                    # Dictionary to hold extracted elements and occupancies
444                    els_occu = {}
445
446                    # parse symbol to get element names and occupancy and store
447                    # in "els_occu"
448                    symbol_str = data["_atom_site_type_symbol"][idx]
449                    symbol_str_lst = symbol_str.split(" + ")
450                    for elocc_idx, sym in enumerate(symbol_str_lst):
451                        # Remove any bracketed items in the string
452                        symbol_str_lst[elocc_idx] = re.sub(r"\([0-9]*\)", "", sym.strip())
453
454                        # Extract element name and its occupancy from the
455                        # string, and store it as a
456                        # key-value pair in "els_occ".
457                        els_occu[
458                            str(re.findall(r"\D+", symbol_str_lst[elocc_idx].strip())[1]).replace("<sup>", "")
459                        ] = float("0" + re.findall(r"\.?\d+", symbol_str_lst[elocc_idx].strip())[1])
460
461                    x = str2float(data["_atom_site_fract_x"][idx])
462                    y = str2float(data["_atom_site_fract_y"][idx])
463                    z = str2float(data["_atom_site_fract_z"][idx])
464
465                    for et, occu in els_occu.items():
466                        # new atom site labels have 'fix' appended
467                        new_atom_site_label.append(et + "_fix" + str(len(new_atom_site_label)))
468                        new_atom_site_type_symbol.append(et)
469                        new_atom_site_occupancy.append(str(occu))
470                        new_fract_x.append(str(x))
471                        new_fract_y.append(str(y))
472                        new_fract_z.append(str(z))
473
474                    idxs_to_remove.append(idx)
475
476            # Remove the original row by iterating over all keys in the CIF
477            # data looking for lists, which indicates
478            # multiple data items, one for each row, and remove items from the
479            # list that corresponds to the removed row,
480            # so that it's not processed by the rest of this function (which
481            # would result in an error).
482            for original_key in data.data:
483                if isinstance(data.data[original_key], list):
484                    for id in sorted(idxs_to_remove, reverse=True):
485                        del data.data[original_key][id]
486
487            if len(idxs_to_remove) > 0:
488                self.warnings.append("Pauling file corrections applied.")
489
490                data.data["_atom_site_label"] += new_atom_site_label
491                data.data["_atom_site_type_symbol"] += new_atom_site_type_symbol
492                data.data["_atom_site_occupancy"] += new_atom_site_occupancy
493                data.data["_atom_site_fract_x"] += new_fract_x
494                data.data["_atom_site_fract_y"] += new_fract_y
495                data.data["_atom_site_fract_z"] += new_fract_z
496
497        """
498        This fixes inconsistencies in naming of several magCIF tags
499        as a result of magCIF being in widespread use prior to
500        specification being finalized (on advice of Branton Campbell).
501        """
502
503        if self.feature_flags["magcif"]:
504
505            # CIF-1 style has all underscores, interim standard
506            # had period before magn instead of before the final
507            # component (e.g. xyz)
508            # we want to standardize on a specific key, to simplify
509            # parsing code
510            correct_keys = [
511                "_space_group_symop_magn_operation.xyz",
512                "_space_group_symop_magn_centering.xyz",
513                "_space_group_magn.name_BNS",
514                "_space_group_magn.number_BNS",
515                "_atom_site_moment_crystalaxis_x",
516                "_atom_site_moment_crystalaxis_y",
517                "_atom_site_moment_crystalaxis_z",
518                "_atom_site_moment_label",
519            ]
520
521            # cannot mutate OrderedDict during enumeration,
522            # so store changes we want to make
523            changes_to_make = {}
524
525            for original_key in data.data:
526                for correct_key in correct_keys:
527                    # convert to all underscore
528                    trial_key = "_".join(correct_key.split("."))
529                    test_key = "_".join(original_key.split("."))
530                    if trial_key == test_key:
531                        changes_to_make[correct_key] = original_key
532
533            # make changes
534            for correct_key, original_key in changes_to_make.items():
535                data.data[correct_key] = data.data[original_key]
536
537            # renamed_keys maps interim_keys to final_keys
538            renamed_keys = {
539                "_magnetic_space_group.transform_to_standard_Pp_abc": "_space_group_magn.transform_BNS_Pp_abc"
540            }
541            changes_to_make = {}
542
543            for interim_key, final_key in renamed_keys.items():
544                if data.data.get(interim_key):
545                    changes_to_make[final_key] = interim_key
546
547            if len(changes_to_make) > 0:
548                self.warnings.append("Keys changed to match new magCIF specification.")
549
550            for final_key, interim_key in changes_to_make.items():
551                data.data[final_key] = data.data[interim_key]
552
553        # check for finite precision frac co-ordinates (e.g. 0.6667 instead of 0.6666666...7)
554        # this can sometimes cause serious issues when applying symmetry operations
555        important_fracs = (1 / 3.0, 2 / 3.0)
556        fracs_to_change = {}
557        for label in ("_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z"):
558            if label in data.data.keys():
559                for idx, frac in enumerate(data.data[label]):
560                    try:
561                        frac = str2float(frac)
562                    except Exception:
563                        # co-ordinate might not be defined e.g. '?'
564                        continue
565                    for comparison_frac in important_fracs:
566                        if abs(1 - frac / comparison_frac) < 1e-4:
567                            fracs_to_change[(label, idx)] = str(comparison_frac)
568        if fracs_to_change:
569            self.warnings.append(
570                "Some fractional co-ordinates rounded to ideal values to " "avoid issues with finite precision."
571            )
572            for (label, idx), val in fracs_to_change.items():
573                data.data[label][idx] = val
574
575        return data
576
577    def _unique_coords(self, coords_in, magmoms_in=None, lattice=None):
578        """
579        Generate unique coordinates using coord and symmetry positions
580        and also their corresponding magnetic moments, if supplied.
581        """
582        coords = []
583        if magmoms_in:
584            magmoms = []
585            if len(magmoms_in) != len(coords_in):
586                raise ValueError
587            for tmp_coord, tmp_magmom in zip(coords_in, magmoms_in):
588                for op in self.symmetry_operations:
589                    coord = op.operate(tmp_coord)
590                    coord = np.array([i - math.floor(i) for i in coord])
591                    if isinstance(op, MagSymmOp):
592                        # Up to this point, magmoms have been defined relative
593                        # to crystal axis. Now convert to Cartesian and into
594                        # a Magmom object.
595                        magmom = Magmom.from_moment_relative_to_crystal_axes(
596                            op.operate_magmom(tmp_magmom), lattice=lattice
597                        )
598                    else:
599                        magmom = Magmom(tmp_magmom)
600                    if not in_coord_list_pbc(coords, coord, atol=self._site_tolerance):
601                        coords.append(coord)
602                        magmoms.append(magmom)
603            return coords, magmoms
604
605        for tmp_coord in coords_in:
606            for op in self.symmetry_operations:
607                coord = op.operate(tmp_coord)
608                coord = np.array([i - math.floor(i) for i in coord])
609                if not in_coord_list_pbc(coords, coord, atol=self._site_tolerance):
610                    coords.append(coord)
611        return coords, [Magmom(0)] * len(coords)  # return dummy magmoms
612
613    def get_lattice(
614        self,
615        data,
616        length_strings=("a", "b", "c"),
617        angle_strings=("alpha", "beta", "gamma"),
618        lattice_type=None,
619    ):
620        """
621        Generate the lattice from the provided lattice parameters. In
622        the absence of all six lattice parameters, the crystal system
623        and necessary parameters are parsed
624        """
625        try:
626
627            lengths = [str2float(data["_cell_length_" + i]) for i in length_strings]
628            angles = [str2float(data["_cell_angle_" + i]) for i in angle_strings]
629            if not lattice_type:
630                return Lattice.from_parameters(*lengths, *angles)
631
632            return getattr(Lattice, lattice_type)(*(lengths + angles))
633
634        except KeyError:
635            # Missing Key search for cell setting
636            for lattice_lable in [
637                "_symmetry_cell_setting",
638                "_space_group_crystal_system",
639            ]:
640                if data.data.get(lattice_lable):
641                    lattice_type = data.data.get(lattice_lable).lower()
642                    try:
643
644                        required_args = getargspec(getattr(Lattice, lattice_type)).args
645
646                        lengths = (l for l in length_strings if l in required_args)
647                        angles = (a for a in angle_strings if a in required_args)
648                        return self.get_lattice(data, lengths, angles, lattice_type=lattice_type)
649                    except AttributeError as exc:
650                        self.warnings.append(str(exc))
651                        warnings.warn(exc)
652
653                else:
654                    return None
655        return None
656
657    def get_symops(self, data):
658        """
659        In order to generate symmetry equivalent positions, the symmetry
660        operations are parsed. If the symops are not present, the space
661        group symbol is parsed, and symops are generated.
662        """
663        symops = []
664        for symmetry_label in [
665            "_symmetry_equiv_pos_as_xyz",
666            "_symmetry_equiv_pos_as_xyz_",
667            "_space_group_symop_operation_xyz",
668            "_space_group_symop_operation_xyz_",
669        ]:
670            if data.data.get(symmetry_label):
671                xyz = data.data.get(symmetry_label)
672                if isinstance(xyz, str):
673                    msg = "A 1-line symmetry op P1 CIF is detected!"
674                    warnings.warn(msg)
675                    self.warnings.append(msg)
676                    xyz = [xyz]
677                try:
678                    symops = [SymmOp.from_xyz_string(s) for s in xyz]
679                    break
680                except ValueError:
681                    continue
682        if not symops:
683            # Try to parse symbol
684            for symmetry_label in [
685                "_symmetry_space_group_name_H-M",
686                "_symmetry_space_group_name_H_M",
687                "_symmetry_space_group_name_H-M_",
688                "_symmetry_space_group_name_H_M_",
689                "_space_group_name_Hall",
690                "_space_group_name_Hall_",
691                "_space_group_name_H-M_alt",
692                "_space_group_name_H-M_alt_",
693                "_symmetry_space_group_name_hall",
694                "_symmetry_space_group_name_hall_",
695                "_symmetry_space_group_name_h-m",
696                "_symmetry_space_group_name_h-m_",
697            ]:
698                sg = data.data.get(symmetry_label)
699
700                if sg:
701                    sg = sub_spgrp(sg)
702                    try:
703                        spg = space_groups.get(sg)
704                        if spg:
705                            symops = SpaceGroup(spg).symmetry_ops
706                            msg = (
707                                "No _symmetry_equiv_pos_as_xyz type key found. "
708                                "Spacegroup from %s used." % symmetry_label
709                            )
710                            warnings.warn(msg)
711                            self.warnings.append(msg)
712                            break
713                    except ValueError:
714                        # Ignore any errors
715                        pass
716
717                    try:
718                        for d in _get_cod_data():
719                            if sg == re.sub(r"\s+", "", d["hermann_mauguin"]):
720                                xyz = d["symops"]
721                                symops = [SymmOp.from_xyz_string(s) for s in xyz]
722                                msg = (
723                                    "No _symmetry_equiv_pos_as_xyz type key found. "
724                                    "Spacegroup from %s used." % symmetry_label
725                                )
726                                warnings.warn(msg)
727                                self.warnings.append(msg)
728                                break
729                    except Exception:
730                        continue
731
732                    if symops:
733                        break
734        if not symops:
735            # Try to parse International number
736            for symmetry_label in [
737                "_space_group_IT_number",
738                "_space_group_IT_number_",
739                "_symmetry_Int_Tables_number",
740                "_symmetry_Int_Tables_number_",
741            ]:
742                if data.data.get(symmetry_label):
743                    try:
744                        i = int(str2float(data.data.get(symmetry_label)))
745                        symops = SpaceGroup.from_int_number(i).symmetry_ops
746                        break
747                    except ValueError:
748                        continue
749
750        if not symops:
751            msg = "No _symmetry_equiv_pos_as_xyz type key found. " "Defaulting to P1."
752            warnings.warn(msg)
753            self.warnings.append(msg)
754            symops = [SymmOp.from_xyz_string(s) for s in ["x", "y", "z"]]
755
756        return symops
757
758    def get_magsymops(self, data):
759        """
760        Equivalent to get_symops except for magnetic symmetry groups.
761        Separate function since additional operation for time reversal symmetry
762        (which changes magnetic moments on sites) needs to be returned.
763        """
764        magsymmops = []
765
766        # check to see if magCIF file explicitly contains magnetic symmetry operations
767        if data.data.get("_space_group_symop_magn_operation.xyz"):
768
769            xyzt = data.data.get("_space_group_symop_magn_operation.xyz")
770            if isinstance(xyzt, str):
771                xyzt = [xyzt]
772            magsymmops = [MagSymmOp.from_xyzt_string(s) for s in xyzt]
773
774            if data.data.get("_space_group_symop_magn_centering.xyz"):
775
776                xyzt = data.data.get("_space_group_symop_magn_centering.xyz")
777                if isinstance(xyzt, str):
778                    xyzt = [xyzt]
779                centering_symops = [MagSymmOp.from_xyzt_string(s) for s in xyzt]
780
781                all_ops = []
782                for op in magsymmops:
783                    for centering_op in centering_symops:
784                        new_translation = [
785                            i - np.floor(i) for i in op.translation_vector + centering_op.translation_vector
786                        ]
787                        new_time_reversal = op.time_reversal * centering_op.time_reversal
788                        all_ops.append(
789                            MagSymmOp.from_rotation_and_translation_and_time_reversal(
790                                rotation_matrix=op.rotation_matrix,
791                                translation_vec=new_translation,
792                                time_reversal=new_time_reversal,
793                            )
794                        )
795                magsymmops = all_ops
796
797        # else check to see if it specifies a magnetic space group
798        elif data.data.get("_space_group_magn.name_BNS") or data.data.get("_space_group_magn.number_BNS"):
799
800            if data.data.get("_space_group_magn.name_BNS"):
801                # get BNS label for MagneticSpaceGroup()
802                id = data.data.get("_space_group_magn.name_BNS")
803            else:
804                # get BNS number for MagneticSpaceGroup()
805                # by converting string to list of ints
806                id = list(map(int, (data.data.get("_space_group_magn.number_BNS").split("."))))
807
808            if data.data.get("_space_group_magn.transform_BNS_Pp_abc"):
809                if data.data.get("_space_group_magn.transform_BNS_Pp_abc") != "a,b,c;0,0,0":
810
811                    jf = data.data.get("_space_group_magn.transform_BNS_Pp_abc")
812                    msg = MagneticSpaceGroup(id, jf)
813
814            elif data.data.get("_space_group_magn.transform_BNS_Pp"):
815                return NotImplementedError("Incomplete specification to implement.")
816            else:
817                msg = MagneticSpaceGroup(id)
818
819            magsymmops = msg.symmetry_ops
820
821        if not magsymmops:
822            msg = "No magnetic symmetry detected, using primitive symmetry."
823            warnings.warn(msg)
824            self.warnings.append(msg)
825            magsymmops = [MagSymmOp.from_xyzt_string("x, y, z, 1")]
826
827        return magsymmops
828
829    @staticmethod
830    def parse_oxi_states(data):
831        """
832        Parse oxidation states from data dictionary
833        """
834        try:
835            oxi_states = {
836                data["_atom_type_symbol"][i]: str2float(data["_atom_type_oxidation_number"][i])
837                for i in range(len(data["_atom_type_symbol"]))
838            }
839            # attempt to strip oxidation state from _atom_type_symbol
840            # in case the label does not contain an oxidation state
841            for i, symbol in enumerate(data["_atom_type_symbol"]):
842                oxi_states[re.sub(r"\d?[\+,\-]?$", "", symbol)] = str2float(data["_atom_type_oxidation_number"][i])
843
844        except (ValueError, KeyError):
845            oxi_states = None
846        return oxi_states
847
848    @staticmethod
849    def parse_magmoms(data, lattice=None):
850        """
851        Parse atomic magnetic moments from data dictionary
852        """
853        if lattice is None:
854            raise Exception("Magmoms given in terms of crystal axes in magCIF spec.")
855        try:
856            magmoms = {
857                data["_atom_site_moment_label"][i]: np.array(
858                    [
859                        str2float(data["_atom_site_moment_crystalaxis_x"][i]),
860                        str2float(data["_atom_site_moment_crystalaxis_y"][i]),
861                        str2float(data["_atom_site_moment_crystalaxis_z"][i]),
862                    ]
863                )
864                for i in range(len(data["_atom_site_moment_label"]))
865            }
866        except (ValueError, KeyError):
867            return None
868        return magmoms
869
870    def _parse_symbol(self, sym):
871        """
872        Parse a string with a symbol to extract a string representing an element.
873
874        Args:
875            sym (str): A symbol to be parsed.
876
877        Returns:
878            A string with the parsed symbol. None if no parsing was possible.
879        """
880        # Common representations for elements/water in cif files
881        # TODO: fix inconsistent handling of water
882        special = {
883            "Hw": "H",
884            "Ow": "O",
885            "Wat": "O",
886            "wat": "O",
887            "OH": "",
888            "OH2": "",
889            "NO3": "N",
890        }
891
892        parsed_sym = None
893        # try with special symbols, otherwise check the first two letters,
894        # then the first letter alone. If everything fails try extracting the
895        # first letters.
896        m_sp = re.match("|".join(special.keys()), sym)
897        if m_sp:
898            parsed_sym = special[m_sp.group()]
899        elif Element.is_valid_symbol(sym[:2].title()):
900            parsed_sym = sym[:2].title()
901        elif Element.is_valid_symbol(sym[0].upper()):
902            parsed_sym = sym[0].upper()
903        else:
904            m = re.match(r"w?[A-Z][a-z]*", sym)
905            if m:
906                parsed_sym = m.group()
907
908        if parsed_sym is not None and (m_sp or not re.match(r"{}\d*".format(parsed_sym), sym)):
909            msg = "{} parsed as {}".format(sym, parsed_sym)
910            warnings.warn(msg)
911            self.warnings.append(msg)
912
913        return parsed_sym
914
915    def _get_structure(self, data, primitive, symmetrized):
916        """
917        Generate structure from part of the cif.
918        """
919
920        def get_num_implicit_hydrogens(sym):
921            num_h = {"Wat": 2, "wat": 2, "O-H": 1}
922            return num_h.get(sym[:3], 0)
923
924        lattice = self.get_lattice(data)
925
926        # if magCIF, get magnetic symmetry moments and magmoms
927        # else standard CIF, and use empty magmom dict
928        if self.feature_flags["magcif_incommensurate"]:
929            raise NotImplementedError("Incommensurate structures not currently supported.")
930        if self.feature_flags["magcif"]:
931            self.symmetry_operations = self.get_magsymops(data)
932            magmoms = self.parse_magmoms(data, lattice=lattice)
933        else:
934            self.symmetry_operations = self.get_symops(data)
935            magmoms = {}
936
937        oxi_states = self.parse_oxi_states(data)
938
939        coord_to_species = OrderedDict()
940        coord_to_magmoms = OrderedDict()
941
942        def get_matching_coord(coord):
943            keys = list(coord_to_species.keys())
944            coords = np.array(keys)
945            for op in self.symmetry_operations:
946                c = op.operate(coord)
947                inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance)
948                # cant use if inds, because python is dumb and np.array([0]) evaluates
949                # to False
950                if len(inds):
951                    return keys[inds[0]]
952            return False
953
954        for i in range(len(data["_atom_site_label"])):
955
956            try:
957                # If site type symbol exists, use it. Otherwise, we use the
958                # label.
959                symbol = self._parse_symbol(data["_atom_site_type_symbol"][i])
960                num_h = get_num_implicit_hydrogens(data["_atom_site_type_symbol"][i])
961            except KeyError:
962                symbol = self._parse_symbol(data["_atom_site_label"][i])
963                num_h = get_num_implicit_hydrogens(data["_atom_site_label"][i])
964            if not symbol:
965                continue
966
967            if oxi_states is not None:
968                o_s = oxi_states.get(symbol, 0)
969                # use _atom_site_type_symbol if possible for oxidation state
970                if "_atom_site_type_symbol" in data.data.keys():
971                    oxi_symbol = data["_atom_site_type_symbol"][i]
972                    o_s = oxi_states.get(oxi_symbol, o_s)
973                try:
974                    el = Species(symbol, o_s)
975                except Exception:
976                    el = DummySpecies(symbol, o_s)
977            else:
978                el = get_el_sp(symbol)
979
980            x = str2float(data["_atom_site_fract_x"][i])
981            y = str2float(data["_atom_site_fract_y"][i])
982            z = str2float(data["_atom_site_fract_z"][i])
983            magmom = magmoms.get(data["_atom_site_label"][i], np.array([0, 0, 0]))
984
985            try:
986                occu = str2float(data["_atom_site_occupancy"][i])
987            except (KeyError, ValueError):
988                occu = 1
989
990            if occu > 0:
991                coord = (x, y, z)
992                match = get_matching_coord(coord)
993                comp_d = {el: occu}
994                if num_h > 0:
995                    comp_d["H"] = num_h
996                    self.warnings.append(
997                        "Structure has implicit hydrogens defined, "
998                        "parsed structure unlikely to be suitable for use "
999                        "in calculations unless hydrogens added."
1000                    )
1001                comp = Composition(comp_d)
1002                if not match:
1003                    coord_to_species[coord] = comp
1004                    coord_to_magmoms[coord] = magmom
1005                else:
1006                    coord_to_species[match] += comp
1007                    # disordered magnetic not currently supported
1008                    coord_to_magmoms[match] = None
1009
1010        sum_occu = [
1011            sum(c.values()) for c in coord_to_species.values() if not set(c.elements) == {Element("O"), Element("H")}
1012        ]
1013        if any(o > 1 for o in sum_occu):
1014            msg = (
1015                "Some occupancies ({}) sum to > 1! If they are within "
1016                "the occupancy_tolerance, they will be rescaled. "
1017                "The current occupancy_tolerance is set to: {}".format(sum_occu, self._occupancy_tolerance)
1018            )
1019            warnings.warn(msg)
1020            self.warnings.append(msg)
1021
1022        allspecies = []
1023        allcoords = []
1024        allmagmoms = []
1025        allhydrogens = []
1026        equivalent_indices = []
1027
1028        # check to see if magCIF file is disordered
1029        if self.feature_flags["magcif"]:
1030            for k, v in coord_to_magmoms.items():
1031                if v is None:
1032                    # Proposed solution to this is to instead store magnetic
1033                    # moments as Species 'spin' property, instead of site
1034                    # property, but this introduces ambiguities for end user
1035                    # (such as unintended use of `spin` and Species will have
1036                    # fictious oxidation state).
1037                    raise NotImplementedError("Disordered magnetic structures not currently supported.")
1038
1039        if coord_to_species.items():
1040            for idx, (comp, group) in enumerate(
1041                groupby(
1042                    sorted(list(coord_to_species.items()), key=lambda x: x[1]),
1043                    key=lambda x: x[1],
1044                )
1045            ):
1046                tmp_coords = [site[0] for site in group]
1047                tmp_magmom = [coord_to_magmoms[tmp_coord] for tmp_coord in tmp_coords]
1048
1049                if self.feature_flags["magcif"]:
1050                    coords, magmoms = self._unique_coords(tmp_coords, magmoms_in=tmp_magmom, lattice=lattice)
1051                else:
1052                    coords, magmoms = self._unique_coords(tmp_coords)
1053
1054                if set(comp.elements) == {Element("O"), Element("H")}:
1055                    # O with implicit hydrogens
1056                    im_h = comp["H"]
1057                    species = Composition({"O": comp["O"]})
1058                else:
1059                    im_h = 0
1060                    species = comp
1061
1062                # The following might be a more natural representation of equivalent indicies,
1063                # but is not in the format expect by SymmetrizedStructure:
1064                #   equivalent_indices.append(list(range(len(allcoords), len(coords)+len(allcoords))))
1065                # The above gives a list like:
1066                #   [[0, 1, 2, 3], [4, 5, 6, 7, 8, 9, 10, 11]] where the
1067                # integers are site indices, whereas the version used below will give a version like:
1068                #   [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
1069                # which is a list in the same order as the sites, but where if a site has the same integer
1070                # it is equivalent.
1071                equivalent_indices += len(coords) * [idx]
1072
1073                allhydrogens.extend(len(coords) * [im_h])
1074                allcoords.extend(coords)
1075                allspecies.extend(len(coords) * [species])
1076                allmagmoms.extend(magmoms)
1077
1078            # rescale occupancies if necessary
1079            for i, species in enumerate(allspecies):
1080                totaloccu = sum(species.values())
1081                if 1 < totaloccu <= self._occupancy_tolerance:
1082                    allspecies[i] = species / totaloccu
1083
1084        if allspecies and len(allspecies) == len(allcoords) and len(allspecies) == len(allmagmoms):
1085            site_properties = {}
1086            if any(allhydrogens):
1087                assert len(allhydrogens) == len(allcoords)
1088                site_properties["implicit_hydrogens"] = allhydrogens
1089
1090            if self.feature_flags["magcif"]:
1091                site_properties["magmom"] = allmagmoms
1092
1093            if len(site_properties) == 0:
1094                site_properties = None
1095
1096            struct = Structure(lattice, allspecies, allcoords, site_properties=site_properties)
1097
1098            if symmetrized:
1099
1100                # Wyckoff labels not currently parsed, note that not all CIFs will contain Wyckoff labels
1101                # TODO: extract Wyckoff labels (or other CIF attributes) and include as site_properties
1102                wyckoffs = ["Not Parsed"] * len(struct)
1103
1104                # Names of space groups are likewise not parsed (again, not all CIFs will contain this information)
1105                # What is stored are the lists of symmetry operations used to generate the structure
1106                # TODO: ensure space group labels are stored if present
1107                sg = SpacegroupOperations("Not Parsed", -1, self.symmetry_operations)
1108
1109                return SymmetrizedStructure(struct, sg, equivalent_indices, wyckoffs)
1110
1111            struct = struct.get_sorted_structure()
1112
1113            if primitive and self.feature_flags["magcif"]:
1114                struct = struct.get_primitive_structure(use_site_props=True)
1115            elif primitive:
1116                struct = struct.get_primitive_structure()
1117                struct = struct.get_reduced_structure()
1118
1119            return struct
1120
1121    def get_structures(self, primitive=True, symmetrized=False):
1122        """
1123        Return list of structures in CIF file. primitive boolean sets whether a
1124        conventional cell structure or primitive cell structure is returned.
1125
1126        Args:
1127            primitive (bool): Set to False to return conventional unit cells.
1128                Defaults to True. With magnetic CIF files, will return primitive
1129                magnetic cell which may be larger than nuclear primitive cell.
1130            symmetrized (bool): If True, return a SymmetrizedStructure which will
1131                include the equivalent indices and symmetry operations used to
1132                create the Structure as provided by the CIF (if explicit symmetry
1133                operations are included in the CIF) or generated from information
1134                in the CIF (if only space group labels are provided). Note that
1135                currently Wyckoff labels and space group labels or numbers are
1136                not included in the generated SymmetrizedStructure, these will be
1137                notated as "Not Parsed" or -1 respectively.
1138
1139        Returns:
1140            List of Structures.
1141        """
1142
1143        if primitive and symmetrized:
1144            raise ValueError(
1145                "Using both 'primitive' and 'symmetrized' arguments is not currently supported "
1146                "since unexpected behavior might result."
1147            )
1148
1149        structures = []
1150        for i, d in enumerate(self._cif.data.values()):
1151            try:
1152                s = self._get_structure(d, primitive, symmetrized)
1153                if s:
1154                    structures.append(s)
1155            except (KeyError, ValueError) as exc:
1156                # Warn the user (Errors should never pass silently)
1157                # A user reported a problem with cif files produced by Avogadro
1158                # in which the atomic coordinates are in Cartesian coords.
1159                self.warnings.append(str(exc))
1160                warnings.warn("No structure parsed for %d structure in CIF. Section of CIF file below." % (i + 1))
1161                warnings.warn(str(d))
1162                warnings.warn("Error is %s." % str(exc))
1163
1164        if self.warnings:
1165            warnings.warn("Issues encountered while parsing CIF: %s" % "\n".join(self.warnings))
1166        if len(structures) == 0:
1167            raise ValueError("Invalid cif file with no structures!")
1168        return structures
1169
1170    def get_bibtex_string(self):
1171        """
1172        Get BibTeX reference from CIF file.
1173        :param data:
1174        :return: BibTeX string
1175        """
1176
1177        try:
1178            from pybtex.database import BibliographyData, Entry
1179        except ImportError:
1180            raise RuntimeError("Bibliographic data extraction requires pybtex.")
1181
1182        bibtex_keys = {
1183            "author": ("_publ_author_name", "_citation_author_name"),
1184            "title": ("_publ_section_title", "_citation_title"),
1185            "journal": (
1186                "_journal_name_full",
1187                "_journal_name_abbrev",
1188                "_citation_journal_full",
1189                "_citation_journal_abbrev",
1190            ),
1191            "volume": ("_journal_volume", "_citation_journal_volume"),
1192            "year": ("_journal_year", "_citation_year"),
1193            "number": ("_journal_number", "_citation_number"),
1194            "page_first": ("_journal_page_first", "_citation_page_first"),
1195            "page_last": ("_journal_page_last", "_citation_page_last"),
1196            "doi": ("_journal_DOI", "_citation_DOI"),
1197        }
1198
1199        entries = {}
1200
1201        # TODO: parse '_publ_section_references' when it exists?
1202        # TODO: CIF specification supports multiple citations.
1203
1204        for idx, data in enumerate(self._cif.data.values()):
1205
1206            # convert to lower-case keys, some cif files inconsistent
1207            data = {k.lower(): v for k, v in data.data.items()}
1208
1209            bibtex_entry = {}
1210
1211            for field, tags in bibtex_keys.items():
1212                for tag in tags:
1213                    if tag in data:
1214                        if isinstance(data[tag], list):
1215                            bibtex_entry[field] = data[tag][0]
1216                        else:
1217                            bibtex_entry[field] = data[tag]
1218
1219            # convert to bibtex author format ('and' delimited)
1220            if "author" in bibtex_entry:
1221                # separate out semicolon authors
1222                if isinstance(bibtex_entry["author"], str):
1223                    if ";" in bibtex_entry["author"]:
1224                        bibtex_entry["author"] = bibtex_entry["author"].split(";")
1225
1226                if isinstance(bibtex_entry["author"], list):
1227                    bibtex_entry["author"] = " and ".join(bibtex_entry["author"])
1228
1229            # convert to bibtex page range format, use empty string if not specified
1230            if ("page_first" in bibtex_entry) or ("page_last" in bibtex_entry):
1231                bibtex_entry["pages"] = "{0}--{1}".format(
1232                    bibtex_entry.get("page_first", ""),
1233                    bibtex_entry.get("page_last", ""),
1234                )
1235                bibtex_entry.pop("page_first", None)  # and remove page_first, page_list if present
1236                bibtex_entry.pop("page_last", None)
1237
1238            # cite keys are given as cif-reference-idx in order they are found
1239            entries["cifref{}".format(idx)] = Entry("article", list(bibtex_entry.items()))
1240
1241        return BibliographyData(entries).to_string(bib_format="bibtex")
1242
1243    def as_dict(self):
1244        """
1245        :return: MSONable dict
1246        """
1247        d = OrderedDict()
1248        for k, v in self._cif.data.items():
1249            d[k] = {}
1250            for k2, v2 in v.data.items():
1251                d[k][k2] = v2
1252        return d
1253
1254    @property
1255    def has_errors(self):
1256        """
1257        :return: Whether there are errors/warnings detected in CIF parsing.
1258        """
1259        return len(self.warnings) > 0
1260
1261
1262class CifWriter:
1263    """
1264    A wrapper around CifFile to write CIF files from pymatgen structures.
1265    """
1266
1267    def __init__(
1268        self,
1269        struct,
1270        symprec=None,
1271        write_magmoms=False,
1272        significant_figures=8,
1273        angle_tolerance=5.0,
1274        refine_struct=True,
1275    ):
1276        """
1277        Args:
1278            struct (Structure): structure to write
1279            symprec (float): If not none, finds the symmetry of the structure
1280                and writes the cif with symmetry information. Passes symprec
1281                to the SpacegroupAnalyzer. See also refine_struct.
1282            write_magmoms (bool): If True, will write magCIF file. Incompatible
1283                with symprec
1284            significant_figures (int): Specifies precision for formatting of floats.
1285                Defaults to 8.
1286            angle_tolerance (float): Angle tolerance for symmetry finding. Passes
1287                angle_tolerance to the SpacegroupAnalyzer. Used only if symprec
1288                is not None.
1289            refine_struct: Used only if symprec is not None. If True, get_refined_structure
1290                is invoked to convert input structure from primitive to conventional.
1291        """
1292
1293        if write_magmoms and symprec:
1294            warnings.warn("Magnetic symmetry cannot currently be detected by pymatgen," "disabling symmetry detection.")
1295            symprec = None
1296
1297        format_str = "{:.%df}" % significant_figures
1298
1299        block = OrderedDict()
1300        loops = []
1301        spacegroup = ("P 1", 1)
1302        if symprec is not None:
1303            sf = SpacegroupAnalyzer(struct, symprec, angle_tolerance=angle_tolerance)
1304            spacegroup = (sf.get_space_group_symbol(), sf.get_space_group_number())
1305
1306            if refine_struct:
1307                # Needs the refined structure when using symprec. This converts
1308                # primitive to conventional structures, the standard for CIF.
1309                struct = sf.get_refined_structure()
1310
1311        latt = struct.lattice
1312        comp = struct.composition
1313        no_oxi_comp = comp.element_composition
1314        block["_symmetry_space_group_name_H-M"] = spacegroup[0]
1315        for cell_attr in ["a", "b", "c"]:
1316            block["_cell_length_" + cell_attr] = format_str.format(getattr(latt, cell_attr))
1317        for cell_attr in ["alpha", "beta", "gamma"]:
1318            block["_cell_angle_" + cell_attr] = format_str.format(getattr(latt, cell_attr))
1319        block["_symmetry_Int_Tables_number"] = spacegroup[1]
1320        block["_chemical_formula_structural"] = no_oxi_comp.reduced_formula
1321        block["_chemical_formula_sum"] = no_oxi_comp.formula
1322        block["_cell_volume"] = format_str.format(latt.volume)
1323
1324        reduced_comp, fu = no_oxi_comp.get_reduced_composition_and_factor()
1325        block["_cell_formula_units_Z"] = str(int(fu))
1326
1327        if symprec is None:
1328            block["_symmetry_equiv_pos_site_id"] = ["1"]
1329            block["_symmetry_equiv_pos_as_xyz"] = ["x, y, z"]
1330        else:
1331            sf = SpacegroupAnalyzer(struct, symprec)
1332
1333            symmops = []
1334            for op in sf.get_symmetry_operations():
1335                v = op.translation_vector
1336                symmops.append(SymmOp.from_rotation_and_translation(op.rotation_matrix, v))
1337
1338            ops = [op.as_xyz_string() for op in symmops]
1339            block["_symmetry_equiv_pos_site_id"] = ["%d" % i for i in range(1, len(ops) + 1)]
1340            block["_symmetry_equiv_pos_as_xyz"] = ops
1341
1342        loops.append(["_symmetry_equiv_pos_site_id", "_symmetry_equiv_pos_as_xyz"])
1343
1344        try:
1345            symbol_to_oxinum = OrderedDict([(el.__str__(), float(el.oxi_state)) for el in sorted(comp.elements)])
1346            block["_atom_type_symbol"] = symbol_to_oxinum.keys()
1347            block["_atom_type_oxidation_number"] = symbol_to_oxinum.values()
1348            loops.append(["_atom_type_symbol", "_atom_type_oxidation_number"])
1349        except (TypeError, AttributeError):
1350            symbol_to_oxinum = OrderedDict([(el.symbol, 0) for el in sorted(comp.elements)])
1351
1352        atom_site_type_symbol = []
1353        atom_site_symmetry_multiplicity = []
1354        atom_site_fract_x = []
1355        atom_site_fract_y = []
1356        atom_site_fract_z = []
1357        atom_site_label = []
1358        atom_site_occupancy = []
1359        atom_site_moment_label = []
1360        atom_site_moment_crystalaxis_x = []
1361        atom_site_moment_crystalaxis_y = []
1362        atom_site_moment_crystalaxis_z = []
1363        count = 0
1364        if symprec is None:
1365            for site in struct:
1366                for sp, occu in sorted(site.species.items()):
1367                    atom_site_type_symbol.append(sp.__str__())
1368                    atom_site_symmetry_multiplicity.append("1")
1369                    atom_site_fract_x.append(format_str.format(site.a))
1370                    atom_site_fract_y.append(format_str.format(site.b))
1371                    atom_site_fract_z.append(format_str.format(site.c))
1372                    atom_site_label.append("{}{}".format(sp.symbol, count))
1373                    atom_site_occupancy.append(occu.__str__())
1374
1375                    magmom = Magmom(site.properties.get("magmom", getattr(sp, "spin", 0)))
1376                    if write_magmoms and abs(magmom) > 0:
1377                        moment = Magmom.get_moment_relative_to_crystal_axes(magmom, latt)
1378                        atom_site_moment_label.append("{}{}".format(sp.symbol, count))
1379                        atom_site_moment_crystalaxis_x.append(format_str.format(moment[0]))
1380                        atom_site_moment_crystalaxis_y.append(format_str.format(moment[1]))
1381                        atom_site_moment_crystalaxis_z.append(format_str.format(moment[2]))
1382
1383                    count += 1
1384        else:
1385            # The following just presents a deterministic ordering.
1386            unique_sites = [
1387                (
1388                    sorted(sites, key=lambda s: tuple(abs(x) for x in s.frac_coords))[0],
1389                    len(sites),
1390                )
1391                for sites in sf.get_symmetrized_structure().equivalent_sites
1392            ]
1393            for site, mult in sorted(
1394                unique_sites,
1395                key=lambda t: (
1396                    t[0].species.average_electroneg,
1397                    -t[1],
1398                    t[0].a,
1399                    t[0].b,
1400                    t[0].c,
1401                ),
1402            ):
1403                for sp, occu in site.species.items():
1404                    atom_site_type_symbol.append(sp.__str__())
1405                    atom_site_symmetry_multiplicity.append("%d" % mult)
1406                    atom_site_fract_x.append(format_str.format(site.a))
1407                    atom_site_fract_y.append(format_str.format(site.b))
1408                    atom_site_fract_z.append(format_str.format(site.c))
1409                    atom_site_label.append("{}{}".format(sp.symbol, count))
1410                    atom_site_occupancy.append(occu.__str__())
1411                    count += 1
1412
1413        block["_atom_site_type_symbol"] = atom_site_type_symbol
1414        block["_atom_site_label"] = atom_site_label
1415        block["_atom_site_symmetry_multiplicity"] = atom_site_symmetry_multiplicity
1416        block["_atom_site_fract_x"] = atom_site_fract_x
1417        block["_atom_site_fract_y"] = atom_site_fract_y
1418        block["_atom_site_fract_z"] = atom_site_fract_z
1419        block["_atom_site_occupancy"] = atom_site_occupancy
1420        loops.append(
1421            [
1422                "_atom_site_type_symbol",
1423                "_atom_site_label",
1424                "_atom_site_symmetry_multiplicity",
1425                "_atom_site_fract_x",
1426                "_atom_site_fract_y",
1427                "_atom_site_fract_z",
1428                "_atom_site_occupancy",
1429            ]
1430        )
1431        if write_magmoms:
1432            block["_atom_site_moment_label"] = atom_site_moment_label
1433            block["_atom_site_moment_crystalaxis_x"] = atom_site_moment_crystalaxis_x
1434            block["_atom_site_moment_crystalaxis_y"] = atom_site_moment_crystalaxis_y
1435            block["_atom_site_moment_crystalaxis_z"] = atom_site_moment_crystalaxis_z
1436            loops.append(
1437                [
1438                    "_atom_site_moment_label",
1439                    "_atom_site_moment_crystalaxis_x",
1440                    "_atom_site_moment_crystalaxis_y",
1441                    "_atom_site_moment_crystalaxis_z",
1442                ]
1443            )
1444        d = OrderedDict()
1445        d[comp.reduced_formula] = CifBlock(block, loops, comp.reduced_formula)
1446        self._cf = CifFile(d)
1447
1448    @property
1449    def ciffile(self):
1450        """
1451        Returns: CifFile associated with the CifWriter.
1452        """
1453        return self._cf
1454
1455    def __str__(self):
1456        """
1457        Returns the cif as a string.
1458        """
1459        return self._cf.__str__()
1460
1461    def write_file(self, filename):
1462        """
1463        Write the cif file.
1464        """
1465        with zopen(filename, "wt") as f:
1466            f.write(self.__str__())
1467
1468
1469def str2float(text):
1470    """
1471    Remove uncertainty brackets from strings and return the float.
1472    """
1473
1474    try:
1475        # Note that the ending ) is sometimes missing. That is why the code has
1476        # been modified to treat it as optional. Same logic applies to lists.
1477        return float(re.sub(r"\(.+\)*", "", text))
1478    except TypeError:
1479        if isinstance(text, list) and len(text) == 1:
1480            return float(re.sub(r"\(.+\)*", "", text[0]))
1481    except ValueError as ex:
1482        if text.strip() == ".":
1483            return 0
1484        raise ex
1485    raise ValueError(f"{text} cannot be converted to float")
1486