1# coding: utf-8 2# Copyright (c) Pymatgen Development Team. 3# Distributed under the terms of the MIT License. 4 5""" 6Wrapper classes for Cif input and output from Structures. 7""" 8 9import math 10import os 11import re 12import textwrap 13import warnings 14from collections import OrderedDict, deque 15from functools import partial 16from inspect import getfullargspec as getargspec 17from io import StringIO 18from itertools import groupby 19from pathlib import Path 20 21import numpy as np 22from monty.io import zopen 23from monty.string import remove_non_ascii 24 25from pymatgen.core.composition import Composition 26from pymatgen.core.lattice import Lattice 27from pymatgen.core.operations import MagSymmOp, SymmOp 28from pymatgen.core.periodic_table import DummySpecies, Element, Species, get_el_sp 29from pymatgen.core.structure import Structure 30from pymatgen.electronic_structure.core import Magmom 31from pymatgen.symmetry.analyzer import SpacegroupAnalyzer, SpacegroupOperations 32from pymatgen.symmetry.groups import SYMM_DATA, SpaceGroup 33from pymatgen.symmetry.maggroups import MagneticSpaceGroup 34from pymatgen.symmetry.structure import SymmetrizedStructure 35from pymatgen.util.coord import find_in_coord_list_pbc, in_coord_list_pbc 36 37__author__ = "Shyue Ping Ong, Will Richards, Matthew Horton" 38 39sub_spgrp = partial(re.sub, r"[\s_]", "") 40 41space_groups = {sub_spgrp(k): k for k in SYMM_DATA["space_group_encoding"].keys()} # type: ignore 42 43space_groups.update({sub_spgrp(k): k for k in SYMM_DATA["space_group_encoding"].keys()}) # type: ignore 44 45_COD_DATA = None 46 47 48def _get_cod_data(): 49 global _COD_DATA 50 if _COD_DATA is None: 51 import pymatgen 52 53 with open(os.path.join(pymatgen.symmetry.__path__[0], "symm_ops.json")) as f: 54 import json 55 56 _COD_DATA = json.load(f) 57 58 return _COD_DATA 59 60 61class CifBlock: 62 """ 63 Object for storing cif data. All data is stored in a single dictionary. 64 Data inside loops are stored in lists in the data dictionary, and 65 information on which keys are grouped together are stored in the loops 66 attribute. 67 """ 68 69 maxlen = 70 # not quite 80 so we can deal with semicolons and things 70 71 def __init__(self, data, loops, header): 72 """ 73 Args: 74 data: dict or OrderedDict of data to go into the cif. Values should 75 be convertible to string, or lists of these if the key is 76 in a loop 77 loops: list of lists of keys, grouped by which loop they should 78 appear in 79 header: name of the block (appears after the data_ on the first 80 line) 81 """ 82 self.loops = loops 83 self.data = data 84 # AJ says: CIF Block names cannot be more than 75 characters or you 85 # get an Exception 86 self.header = header[:74] 87 88 def __eq__(self, other): 89 return self.loops == other.loops and self.data == other.data and self.header == other.header 90 91 def __getitem__(self, key): 92 return self.data[key] 93 94 def __str__(self): 95 """ 96 Returns the cif string for the data block 97 """ 98 s = ["data_{}".format(self.header)] 99 keys = self.data.keys() 100 written = [] 101 for k in keys: 102 if k in written: 103 continue 104 for l in self.loops: 105 # search for a corresponding loop 106 if k in l: 107 s.append(self._loop_to_string(l)) 108 written.extend(l) 109 break 110 if k not in written: 111 # k didn't belong to a loop 112 v = self._format_field(self.data[k]) 113 if len(k) + len(v) + 3 < self.maxlen: 114 s.append("{} {}".format(k, v)) 115 else: 116 s.extend([k, v]) 117 return "\n".join(s) 118 119 def _loop_to_string(self, loop): 120 s = "loop_" 121 for l in loop: 122 s += "\n " + l 123 for fields in zip(*[self.data[k] for k in loop]): 124 line = "\n" 125 for val in map(self._format_field, fields): 126 if val[0] == ";": 127 s += line + "\n" + val 128 line = "\n" 129 elif len(line) + len(val) + 2 < self.maxlen: 130 line += " " + val 131 else: 132 s += line 133 line = "\n " + val 134 s += line 135 return s 136 137 def _format_field(self, v): 138 v = v.__str__().strip() 139 if len(v) > self.maxlen: 140 return ";\n" + textwrap.fill(v, self.maxlen) + "\n;" 141 # add quotes if necessary 142 if v == "": 143 return '""' 144 if (" " in v or v[0] == "_") and not (v[0] == "'" and v[-1] == "'") and not (v[0] == '"' and v[-1] == '"'): 145 if "'" in v: 146 q = '"' 147 else: 148 q = "'" 149 v = q + v + q 150 return v 151 152 @classmethod 153 def _process_string(cls, string): 154 # remove comments 155 string = re.sub(r"(\s|^)#.*$", "", string, flags=re.MULTILINE) 156 # remove empty lines 157 string = re.sub(r"^\s*\n", "", string, flags=re.MULTILINE) 158 # remove non_ascii 159 string = remove_non_ascii(string) 160 # since line breaks in .cif files are mostly meaningless, 161 # break up into a stream of tokens to parse, rejoining multiline 162 # strings (between semicolons) 163 q = deque() 164 multiline = False 165 ml = [] 166 # this regex splits on spaces, except when in quotes. 167 # starting quotes must not be preceded by non-whitespace 168 # (these get eaten by the first expression) 169 # ending quotes must not be followed by non-whitespace 170 p = re.compile(r"""([^'"\s][\S]*)|'(.*?)'(?!\S)|"(.*?)"(?!\S)""") 171 for l in string.splitlines(): 172 if multiline: 173 if l.startswith(";"): 174 multiline = False 175 q.append(("", "", "", " ".join(ml))) 176 ml = [] 177 l = l[1:].strip() 178 else: 179 ml.append(l) 180 continue 181 if l.startswith(";"): 182 multiline = True 183 ml.append(l[1:].strip()) 184 else: 185 for s in p.findall(l): 186 # s is tuple. location of the data in the tuple 187 # depends on whether it was quoted in the input 188 q.append(s) 189 return q 190 191 @classmethod 192 def from_string(cls, string): 193 """ 194 Reads CifBlock from string. 195 196 :param string: String representation. 197 :return: CifBlock 198 """ 199 q = cls._process_string(string) 200 header = q.popleft()[0][5:] 201 data = OrderedDict() 202 loops = [] 203 while q: 204 s = q.popleft() 205 # cif keys aren't in quotes, so show up in s[0] 206 if s[0] == "_eof": 207 break 208 if s[0].startswith("_"): 209 try: 210 data[s[0]] = "".join(q.popleft()) 211 except IndexError: 212 data[s[0]] = "" 213 elif s[0].startswith("loop_"): 214 columns = [] 215 items = [] 216 while q: 217 s = q[0] 218 if s[0].startswith("loop_") or not s[0].startswith("_"): 219 break 220 columns.append("".join(q.popleft())) 221 data[columns[-1]] = [] 222 while q: 223 s = q[0] 224 if s[0].startswith("loop_") or s[0].startswith("_"): 225 break 226 items.append("".join(q.popleft())) 227 n = len(items) // len(columns) 228 assert len(items) % n == 0 229 loops.append(columns) 230 for k, v in zip(columns * n, items): 231 data[k].append(v.strip()) 232 elif "".join(s).strip() != "": 233 warnings.warn("Possible issue in cif file" " at line: {}".format("".join(s).strip())) 234 return cls(data, loops, header) 235 236 237class CifFile: 238 """ 239 Reads and parses CifBlocks from a .cif file or string 240 """ 241 242 def __init__(self, data, orig_string=None, comment=None): 243 """ 244 Args: 245 data (OrderedDict): Of CifBlock objects.å 246 orig_string (str): The original cif string. 247 comment (str): Comment string. 248 """ 249 self.data = data 250 self.orig_string = orig_string 251 self.comment = comment or "# generated using pymatgen" 252 253 def __str__(self): 254 s = ["%s" % v for v in self.data.values()] 255 return self.comment + "\n" + "\n".join(s) + "\n" 256 257 @classmethod 258 def from_string(cls, string): 259 """ 260 Reads CifFile from a string. 261 262 :param string: String representation. 263 :return: CifFile 264 """ 265 d = OrderedDict() 266 for x in re.split(r"^\s*data_", "x\n" + string, flags=re.MULTILINE | re.DOTALL)[1:]: 267 268 # Skip over Cif block that contains powder diffraction data. 269 # Some elements in this block were missing from CIF files in 270 # Springer materials/Pauling file DBs. 271 # This block anyway does not contain any structure information, and 272 # CifParser was also not parsing it. 273 if "powder_pattern" in re.split(r"\n", x, 1)[0]: 274 continue 275 c = CifBlock.from_string("data_" + x) 276 d[c.header] = c 277 return cls(d, string) 278 279 @classmethod 280 def from_file(cls, filename): 281 """ 282 Reads CifFile from a filename. 283 284 :param filename: Filename 285 :return: CifFile 286 """ 287 with zopen(str(filename), "rt", errors="replace") as f: 288 return cls.from_string(f.read()) 289 290 291class CifParser: 292 """ 293 Parses a CIF file. Attempts to fix CIFs that are out-of-spec, but will 294 issue warnings if corrections applied. These are also stored in the 295 CifParser's errors attribute. 296 """ 297 298 def __init__(self, filename, occupancy_tolerance=1.0, site_tolerance=1e-4): 299 """ 300 Args: 301 filename (str): CIF filename, bzipped or gzipped CIF files are fine too. 302 occupancy_tolerance (float): If total occupancy of a site is between 1 303 and occupancy_tolerance, the occupancies will be scaled down to 1. 304 site_tolerance (float): This tolerance is used to determine if two 305 sites are sitting in the same position, in which case they will be 306 combined to a single disordered site. Defaults to 1e-4. 307 """ 308 self._occupancy_tolerance = occupancy_tolerance 309 self._site_tolerance = site_tolerance 310 if isinstance(filename, (str, Path)): 311 self._cif = CifFile.from_file(filename) 312 else: 313 self._cif = CifFile.from_string(filename.read()) 314 315 # store if CIF contains features from non-core CIF dictionaries 316 # e.g. magCIF 317 self.feature_flags = {} 318 self.warnings = [] 319 320 def is_magcif(): 321 """ 322 Checks to see if file appears to be a magCIF file (heuristic). 323 """ 324 # Doesn't seem to be a canonical way to test if file is magCIF or 325 # not, so instead check for magnetic symmetry datanames 326 prefixes = [ 327 "_space_group_magn", 328 "_atom_site_moment", 329 "_space_group_symop_magn", 330 ] 331 for d in self._cif.data.values(): 332 for k in d.data.keys(): 333 for prefix in prefixes: 334 if prefix in k: 335 return True 336 return False 337 338 self.feature_flags["magcif"] = is_magcif() 339 340 def is_magcif_incommensurate(): 341 """ 342 Checks to see if file contains an incommensurate magnetic 343 structure (heuristic). 344 """ 345 # Doesn't seem to be a canonical way to test if magCIF file 346 # describes incommensurate strucure or not, so instead check 347 # for common datanames 348 if not self.feature_flags["magcif"]: 349 return False 350 prefixes = ["_cell_modulation_dimension", "_cell_wave_vector"] 351 for d in self._cif.data.values(): 352 for k in d.data.keys(): 353 for prefix in prefixes: 354 if prefix in k: 355 return True 356 return False 357 358 self.feature_flags["magcif_incommensurate"] = is_magcif_incommensurate() 359 360 for k in self._cif.data.keys(): 361 # pass individual CifBlocks to _sanitize_data 362 self._cif.data[k] = self._sanitize_data(self._cif.data[k]) 363 364 @staticmethod 365 def from_string(cif_string, occupancy_tolerance=1.0): 366 """ 367 Creates a CifParser from a string. 368 369 Args: 370 cif_string (str): String representation of a CIF. 371 occupancy_tolerance (float): If total occupancy of a site is 372 between 1 and occupancy_tolerance, the occupancies will be 373 scaled down to 1. 374 375 Returns: 376 CifParser 377 """ 378 stream = StringIO(cif_string) 379 return CifParser(stream, occupancy_tolerance) 380 381 def _sanitize_data(self, data): 382 """ 383 Some CIF files do not conform to spec. This function corrects 384 known issues, particular in regards to Springer materials/ 385 Pauling files. 386 387 This function is here so that CifParser can assume its 388 input conforms to spec, simplifying its implementation. 389 :param data: CifBlock 390 :return: data CifBlock 391 """ 392 393 """ 394 This part of the code deals with handling formats of data as found in 395 CIF files extracted from the Springer Materials/Pauling File 396 databases, and that are different from standard ICSD formats. 397 """ 398 399 # check for implicit hydrogens, warn if any present 400 if "_atom_site_attached_hydrogens" in data.data.keys(): 401 attached_hydrogens = [str2float(x) for x in data.data["_atom_site_attached_hydrogens"] if str2float(x) != 0] 402 if len(attached_hydrogens) > 0: 403 self.warnings.append( 404 "Structure has implicit hydrogens defined, " 405 "parsed structure unlikely to be suitable for use " 406 "in calculations unless hydrogens added." 407 ) 408 409 # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do 410 # not contain this key. 411 if "_atom_site_type_symbol" in data.data.keys(): 412 413 # Keep a track of which data row needs to be removed. 414 # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14 415 # 'rhombic dodecahedron, Nb<sub>14</sub>' 416 # Without this code, the above row in a structure would be parsed 417 # as an ordered site with only Nb (since 418 # CifParser would try to parse the first two characters of the 419 # label "Nb,Zr") and occupancy=1. 420 # However, this site is meant to be a disordered site with 0.8 of 421 # Nb and 0.2 of Zr. 422 idxs_to_remove = [] 423 424 new_atom_site_label = [] 425 new_atom_site_type_symbol = [] 426 new_atom_site_occupancy = [] 427 new_fract_x = [] 428 new_fract_y = [] 429 new_fract_z = [] 430 431 for idx, el_row in enumerate(data["_atom_site_label"]): 432 433 # CIF files from the Springer Materials/Pauling File have 434 # switched the label and symbol. Thus, in the 435 # above shown example row, '0.8Nb + 0.2Zr' is the symbol. 436 # Below, we split the strings on ' + ' to 437 # check if the length (or number of elements) in the label and 438 # symbol are equal. 439 if len(data["_atom_site_type_symbol"][idx].split(" + ")) > len( 440 data["_atom_site_label"][idx].split(" + ") 441 ): 442 443 # Dictionary to hold extracted elements and occupancies 444 els_occu = {} 445 446 # parse symbol to get element names and occupancy and store 447 # in "els_occu" 448 symbol_str = data["_atom_site_type_symbol"][idx] 449 symbol_str_lst = symbol_str.split(" + ") 450 for elocc_idx, sym in enumerate(symbol_str_lst): 451 # Remove any bracketed items in the string 452 symbol_str_lst[elocc_idx] = re.sub(r"\([0-9]*\)", "", sym.strip()) 453 454 # Extract element name and its occupancy from the 455 # string, and store it as a 456 # key-value pair in "els_occ". 457 els_occu[ 458 str(re.findall(r"\D+", symbol_str_lst[elocc_idx].strip())[1]).replace("<sup>", "") 459 ] = float("0" + re.findall(r"\.?\d+", symbol_str_lst[elocc_idx].strip())[1]) 460 461 x = str2float(data["_atom_site_fract_x"][idx]) 462 y = str2float(data["_atom_site_fract_y"][idx]) 463 z = str2float(data["_atom_site_fract_z"][idx]) 464 465 for et, occu in els_occu.items(): 466 # new atom site labels have 'fix' appended 467 new_atom_site_label.append(et + "_fix" + str(len(new_atom_site_label))) 468 new_atom_site_type_symbol.append(et) 469 new_atom_site_occupancy.append(str(occu)) 470 new_fract_x.append(str(x)) 471 new_fract_y.append(str(y)) 472 new_fract_z.append(str(z)) 473 474 idxs_to_remove.append(idx) 475 476 # Remove the original row by iterating over all keys in the CIF 477 # data looking for lists, which indicates 478 # multiple data items, one for each row, and remove items from the 479 # list that corresponds to the removed row, 480 # so that it's not processed by the rest of this function (which 481 # would result in an error). 482 for original_key in data.data: 483 if isinstance(data.data[original_key], list): 484 for id in sorted(idxs_to_remove, reverse=True): 485 del data.data[original_key][id] 486 487 if len(idxs_to_remove) > 0: 488 self.warnings.append("Pauling file corrections applied.") 489 490 data.data["_atom_site_label"] += new_atom_site_label 491 data.data["_atom_site_type_symbol"] += new_atom_site_type_symbol 492 data.data["_atom_site_occupancy"] += new_atom_site_occupancy 493 data.data["_atom_site_fract_x"] += new_fract_x 494 data.data["_atom_site_fract_y"] += new_fract_y 495 data.data["_atom_site_fract_z"] += new_fract_z 496 497 """ 498 This fixes inconsistencies in naming of several magCIF tags 499 as a result of magCIF being in widespread use prior to 500 specification being finalized (on advice of Branton Campbell). 501 """ 502 503 if self.feature_flags["magcif"]: 504 505 # CIF-1 style has all underscores, interim standard 506 # had period before magn instead of before the final 507 # component (e.g. xyz) 508 # we want to standardize on a specific key, to simplify 509 # parsing code 510 correct_keys = [ 511 "_space_group_symop_magn_operation.xyz", 512 "_space_group_symop_magn_centering.xyz", 513 "_space_group_magn.name_BNS", 514 "_space_group_magn.number_BNS", 515 "_atom_site_moment_crystalaxis_x", 516 "_atom_site_moment_crystalaxis_y", 517 "_atom_site_moment_crystalaxis_z", 518 "_atom_site_moment_label", 519 ] 520 521 # cannot mutate OrderedDict during enumeration, 522 # so store changes we want to make 523 changes_to_make = {} 524 525 for original_key in data.data: 526 for correct_key in correct_keys: 527 # convert to all underscore 528 trial_key = "_".join(correct_key.split(".")) 529 test_key = "_".join(original_key.split(".")) 530 if trial_key == test_key: 531 changes_to_make[correct_key] = original_key 532 533 # make changes 534 for correct_key, original_key in changes_to_make.items(): 535 data.data[correct_key] = data.data[original_key] 536 537 # renamed_keys maps interim_keys to final_keys 538 renamed_keys = { 539 "_magnetic_space_group.transform_to_standard_Pp_abc": "_space_group_magn.transform_BNS_Pp_abc" 540 } 541 changes_to_make = {} 542 543 for interim_key, final_key in renamed_keys.items(): 544 if data.data.get(interim_key): 545 changes_to_make[final_key] = interim_key 546 547 if len(changes_to_make) > 0: 548 self.warnings.append("Keys changed to match new magCIF specification.") 549 550 for final_key, interim_key in changes_to_make.items(): 551 data.data[final_key] = data.data[interim_key] 552 553 # check for finite precision frac co-ordinates (e.g. 0.6667 instead of 0.6666666...7) 554 # this can sometimes cause serious issues when applying symmetry operations 555 important_fracs = (1 / 3.0, 2 / 3.0) 556 fracs_to_change = {} 557 for label in ("_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z"): 558 if label in data.data.keys(): 559 for idx, frac in enumerate(data.data[label]): 560 try: 561 frac = str2float(frac) 562 except Exception: 563 # co-ordinate might not be defined e.g. '?' 564 continue 565 for comparison_frac in important_fracs: 566 if abs(1 - frac / comparison_frac) < 1e-4: 567 fracs_to_change[(label, idx)] = str(comparison_frac) 568 if fracs_to_change: 569 self.warnings.append( 570 "Some fractional co-ordinates rounded to ideal values to " "avoid issues with finite precision." 571 ) 572 for (label, idx), val in fracs_to_change.items(): 573 data.data[label][idx] = val 574 575 return data 576 577 def _unique_coords(self, coords_in, magmoms_in=None, lattice=None): 578 """ 579 Generate unique coordinates using coord and symmetry positions 580 and also their corresponding magnetic moments, if supplied. 581 """ 582 coords = [] 583 if magmoms_in: 584 magmoms = [] 585 if len(magmoms_in) != len(coords_in): 586 raise ValueError 587 for tmp_coord, tmp_magmom in zip(coords_in, magmoms_in): 588 for op in self.symmetry_operations: 589 coord = op.operate(tmp_coord) 590 coord = np.array([i - math.floor(i) for i in coord]) 591 if isinstance(op, MagSymmOp): 592 # Up to this point, magmoms have been defined relative 593 # to crystal axis. Now convert to Cartesian and into 594 # a Magmom object. 595 magmom = Magmom.from_moment_relative_to_crystal_axes( 596 op.operate_magmom(tmp_magmom), lattice=lattice 597 ) 598 else: 599 magmom = Magmom(tmp_magmom) 600 if not in_coord_list_pbc(coords, coord, atol=self._site_tolerance): 601 coords.append(coord) 602 magmoms.append(magmom) 603 return coords, magmoms 604 605 for tmp_coord in coords_in: 606 for op in self.symmetry_operations: 607 coord = op.operate(tmp_coord) 608 coord = np.array([i - math.floor(i) for i in coord]) 609 if not in_coord_list_pbc(coords, coord, atol=self._site_tolerance): 610 coords.append(coord) 611 return coords, [Magmom(0)] * len(coords) # return dummy magmoms 612 613 def get_lattice( 614 self, 615 data, 616 length_strings=("a", "b", "c"), 617 angle_strings=("alpha", "beta", "gamma"), 618 lattice_type=None, 619 ): 620 """ 621 Generate the lattice from the provided lattice parameters. In 622 the absence of all six lattice parameters, the crystal system 623 and necessary parameters are parsed 624 """ 625 try: 626 627 lengths = [str2float(data["_cell_length_" + i]) for i in length_strings] 628 angles = [str2float(data["_cell_angle_" + i]) for i in angle_strings] 629 if not lattice_type: 630 return Lattice.from_parameters(*lengths, *angles) 631 632 return getattr(Lattice, lattice_type)(*(lengths + angles)) 633 634 except KeyError: 635 # Missing Key search for cell setting 636 for lattice_lable in [ 637 "_symmetry_cell_setting", 638 "_space_group_crystal_system", 639 ]: 640 if data.data.get(lattice_lable): 641 lattice_type = data.data.get(lattice_lable).lower() 642 try: 643 644 required_args = getargspec(getattr(Lattice, lattice_type)).args 645 646 lengths = (l for l in length_strings if l in required_args) 647 angles = (a for a in angle_strings if a in required_args) 648 return self.get_lattice(data, lengths, angles, lattice_type=lattice_type) 649 except AttributeError as exc: 650 self.warnings.append(str(exc)) 651 warnings.warn(exc) 652 653 else: 654 return None 655 return None 656 657 def get_symops(self, data): 658 """ 659 In order to generate symmetry equivalent positions, the symmetry 660 operations are parsed. If the symops are not present, the space 661 group symbol is parsed, and symops are generated. 662 """ 663 symops = [] 664 for symmetry_label in [ 665 "_symmetry_equiv_pos_as_xyz", 666 "_symmetry_equiv_pos_as_xyz_", 667 "_space_group_symop_operation_xyz", 668 "_space_group_symop_operation_xyz_", 669 ]: 670 if data.data.get(symmetry_label): 671 xyz = data.data.get(symmetry_label) 672 if isinstance(xyz, str): 673 msg = "A 1-line symmetry op P1 CIF is detected!" 674 warnings.warn(msg) 675 self.warnings.append(msg) 676 xyz = [xyz] 677 try: 678 symops = [SymmOp.from_xyz_string(s) for s in xyz] 679 break 680 except ValueError: 681 continue 682 if not symops: 683 # Try to parse symbol 684 for symmetry_label in [ 685 "_symmetry_space_group_name_H-M", 686 "_symmetry_space_group_name_H_M", 687 "_symmetry_space_group_name_H-M_", 688 "_symmetry_space_group_name_H_M_", 689 "_space_group_name_Hall", 690 "_space_group_name_Hall_", 691 "_space_group_name_H-M_alt", 692 "_space_group_name_H-M_alt_", 693 "_symmetry_space_group_name_hall", 694 "_symmetry_space_group_name_hall_", 695 "_symmetry_space_group_name_h-m", 696 "_symmetry_space_group_name_h-m_", 697 ]: 698 sg = data.data.get(symmetry_label) 699 700 if sg: 701 sg = sub_spgrp(sg) 702 try: 703 spg = space_groups.get(sg) 704 if spg: 705 symops = SpaceGroup(spg).symmetry_ops 706 msg = ( 707 "No _symmetry_equiv_pos_as_xyz type key found. " 708 "Spacegroup from %s used." % symmetry_label 709 ) 710 warnings.warn(msg) 711 self.warnings.append(msg) 712 break 713 except ValueError: 714 # Ignore any errors 715 pass 716 717 try: 718 for d in _get_cod_data(): 719 if sg == re.sub(r"\s+", "", d["hermann_mauguin"]): 720 xyz = d["symops"] 721 symops = [SymmOp.from_xyz_string(s) for s in xyz] 722 msg = ( 723 "No _symmetry_equiv_pos_as_xyz type key found. " 724 "Spacegroup from %s used." % symmetry_label 725 ) 726 warnings.warn(msg) 727 self.warnings.append(msg) 728 break 729 except Exception: 730 continue 731 732 if symops: 733 break 734 if not symops: 735 # Try to parse International number 736 for symmetry_label in [ 737 "_space_group_IT_number", 738 "_space_group_IT_number_", 739 "_symmetry_Int_Tables_number", 740 "_symmetry_Int_Tables_number_", 741 ]: 742 if data.data.get(symmetry_label): 743 try: 744 i = int(str2float(data.data.get(symmetry_label))) 745 symops = SpaceGroup.from_int_number(i).symmetry_ops 746 break 747 except ValueError: 748 continue 749 750 if not symops: 751 msg = "No _symmetry_equiv_pos_as_xyz type key found. " "Defaulting to P1." 752 warnings.warn(msg) 753 self.warnings.append(msg) 754 symops = [SymmOp.from_xyz_string(s) for s in ["x", "y", "z"]] 755 756 return symops 757 758 def get_magsymops(self, data): 759 """ 760 Equivalent to get_symops except for magnetic symmetry groups. 761 Separate function since additional operation for time reversal symmetry 762 (which changes magnetic moments on sites) needs to be returned. 763 """ 764 magsymmops = [] 765 766 # check to see if magCIF file explicitly contains magnetic symmetry operations 767 if data.data.get("_space_group_symop_magn_operation.xyz"): 768 769 xyzt = data.data.get("_space_group_symop_magn_operation.xyz") 770 if isinstance(xyzt, str): 771 xyzt = [xyzt] 772 magsymmops = [MagSymmOp.from_xyzt_string(s) for s in xyzt] 773 774 if data.data.get("_space_group_symop_magn_centering.xyz"): 775 776 xyzt = data.data.get("_space_group_symop_magn_centering.xyz") 777 if isinstance(xyzt, str): 778 xyzt = [xyzt] 779 centering_symops = [MagSymmOp.from_xyzt_string(s) for s in xyzt] 780 781 all_ops = [] 782 for op in magsymmops: 783 for centering_op in centering_symops: 784 new_translation = [ 785 i - np.floor(i) for i in op.translation_vector + centering_op.translation_vector 786 ] 787 new_time_reversal = op.time_reversal * centering_op.time_reversal 788 all_ops.append( 789 MagSymmOp.from_rotation_and_translation_and_time_reversal( 790 rotation_matrix=op.rotation_matrix, 791 translation_vec=new_translation, 792 time_reversal=new_time_reversal, 793 ) 794 ) 795 magsymmops = all_ops 796 797 # else check to see if it specifies a magnetic space group 798 elif data.data.get("_space_group_magn.name_BNS") or data.data.get("_space_group_magn.number_BNS"): 799 800 if data.data.get("_space_group_magn.name_BNS"): 801 # get BNS label for MagneticSpaceGroup() 802 id = data.data.get("_space_group_magn.name_BNS") 803 else: 804 # get BNS number for MagneticSpaceGroup() 805 # by converting string to list of ints 806 id = list(map(int, (data.data.get("_space_group_magn.number_BNS").split(".")))) 807 808 if data.data.get("_space_group_magn.transform_BNS_Pp_abc"): 809 if data.data.get("_space_group_magn.transform_BNS_Pp_abc") != "a,b,c;0,0,0": 810 811 jf = data.data.get("_space_group_magn.transform_BNS_Pp_abc") 812 msg = MagneticSpaceGroup(id, jf) 813 814 elif data.data.get("_space_group_magn.transform_BNS_Pp"): 815 return NotImplementedError("Incomplete specification to implement.") 816 else: 817 msg = MagneticSpaceGroup(id) 818 819 magsymmops = msg.symmetry_ops 820 821 if not magsymmops: 822 msg = "No magnetic symmetry detected, using primitive symmetry." 823 warnings.warn(msg) 824 self.warnings.append(msg) 825 magsymmops = [MagSymmOp.from_xyzt_string("x, y, z, 1")] 826 827 return magsymmops 828 829 @staticmethod 830 def parse_oxi_states(data): 831 """ 832 Parse oxidation states from data dictionary 833 """ 834 try: 835 oxi_states = { 836 data["_atom_type_symbol"][i]: str2float(data["_atom_type_oxidation_number"][i]) 837 for i in range(len(data["_atom_type_symbol"])) 838 } 839 # attempt to strip oxidation state from _atom_type_symbol 840 # in case the label does not contain an oxidation state 841 for i, symbol in enumerate(data["_atom_type_symbol"]): 842 oxi_states[re.sub(r"\d?[\+,\-]?$", "", symbol)] = str2float(data["_atom_type_oxidation_number"][i]) 843 844 except (ValueError, KeyError): 845 oxi_states = None 846 return oxi_states 847 848 @staticmethod 849 def parse_magmoms(data, lattice=None): 850 """ 851 Parse atomic magnetic moments from data dictionary 852 """ 853 if lattice is None: 854 raise Exception("Magmoms given in terms of crystal axes in magCIF spec.") 855 try: 856 magmoms = { 857 data["_atom_site_moment_label"][i]: np.array( 858 [ 859 str2float(data["_atom_site_moment_crystalaxis_x"][i]), 860 str2float(data["_atom_site_moment_crystalaxis_y"][i]), 861 str2float(data["_atom_site_moment_crystalaxis_z"][i]), 862 ] 863 ) 864 for i in range(len(data["_atom_site_moment_label"])) 865 } 866 except (ValueError, KeyError): 867 return None 868 return magmoms 869 870 def _parse_symbol(self, sym): 871 """ 872 Parse a string with a symbol to extract a string representing an element. 873 874 Args: 875 sym (str): A symbol to be parsed. 876 877 Returns: 878 A string with the parsed symbol. None if no parsing was possible. 879 """ 880 # Common representations for elements/water in cif files 881 # TODO: fix inconsistent handling of water 882 special = { 883 "Hw": "H", 884 "Ow": "O", 885 "Wat": "O", 886 "wat": "O", 887 "OH": "", 888 "OH2": "", 889 "NO3": "N", 890 } 891 892 parsed_sym = None 893 # try with special symbols, otherwise check the first two letters, 894 # then the first letter alone. If everything fails try extracting the 895 # first letters. 896 m_sp = re.match("|".join(special.keys()), sym) 897 if m_sp: 898 parsed_sym = special[m_sp.group()] 899 elif Element.is_valid_symbol(sym[:2].title()): 900 parsed_sym = sym[:2].title() 901 elif Element.is_valid_symbol(sym[0].upper()): 902 parsed_sym = sym[0].upper() 903 else: 904 m = re.match(r"w?[A-Z][a-z]*", sym) 905 if m: 906 parsed_sym = m.group() 907 908 if parsed_sym is not None and (m_sp or not re.match(r"{}\d*".format(parsed_sym), sym)): 909 msg = "{} parsed as {}".format(sym, parsed_sym) 910 warnings.warn(msg) 911 self.warnings.append(msg) 912 913 return parsed_sym 914 915 def _get_structure(self, data, primitive, symmetrized): 916 """ 917 Generate structure from part of the cif. 918 """ 919 920 def get_num_implicit_hydrogens(sym): 921 num_h = {"Wat": 2, "wat": 2, "O-H": 1} 922 return num_h.get(sym[:3], 0) 923 924 lattice = self.get_lattice(data) 925 926 # if magCIF, get magnetic symmetry moments and magmoms 927 # else standard CIF, and use empty magmom dict 928 if self.feature_flags["magcif_incommensurate"]: 929 raise NotImplementedError("Incommensurate structures not currently supported.") 930 if self.feature_flags["magcif"]: 931 self.symmetry_operations = self.get_magsymops(data) 932 magmoms = self.parse_magmoms(data, lattice=lattice) 933 else: 934 self.symmetry_operations = self.get_symops(data) 935 magmoms = {} 936 937 oxi_states = self.parse_oxi_states(data) 938 939 coord_to_species = OrderedDict() 940 coord_to_magmoms = OrderedDict() 941 942 def get_matching_coord(coord): 943 keys = list(coord_to_species.keys()) 944 coords = np.array(keys) 945 for op in self.symmetry_operations: 946 c = op.operate(coord) 947 inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance) 948 # cant use if inds, because python is dumb and np.array([0]) evaluates 949 # to False 950 if len(inds): 951 return keys[inds[0]] 952 return False 953 954 for i in range(len(data["_atom_site_label"])): 955 956 try: 957 # If site type symbol exists, use it. Otherwise, we use the 958 # label. 959 symbol = self._parse_symbol(data["_atom_site_type_symbol"][i]) 960 num_h = get_num_implicit_hydrogens(data["_atom_site_type_symbol"][i]) 961 except KeyError: 962 symbol = self._parse_symbol(data["_atom_site_label"][i]) 963 num_h = get_num_implicit_hydrogens(data["_atom_site_label"][i]) 964 if not symbol: 965 continue 966 967 if oxi_states is not None: 968 o_s = oxi_states.get(symbol, 0) 969 # use _atom_site_type_symbol if possible for oxidation state 970 if "_atom_site_type_symbol" in data.data.keys(): 971 oxi_symbol = data["_atom_site_type_symbol"][i] 972 o_s = oxi_states.get(oxi_symbol, o_s) 973 try: 974 el = Species(symbol, o_s) 975 except Exception: 976 el = DummySpecies(symbol, o_s) 977 else: 978 el = get_el_sp(symbol) 979 980 x = str2float(data["_atom_site_fract_x"][i]) 981 y = str2float(data["_atom_site_fract_y"][i]) 982 z = str2float(data["_atom_site_fract_z"][i]) 983 magmom = magmoms.get(data["_atom_site_label"][i], np.array([0, 0, 0])) 984 985 try: 986 occu = str2float(data["_atom_site_occupancy"][i]) 987 except (KeyError, ValueError): 988 occu = 1 989 990 if occu > 0: 991 coord = (x, y, z) 992 match = get_matching_coord(coord) 993 comp_d = {el: occu} 994 if num_h > 0: 995 comp_d["H"] = num_h 996 self.warnings.append( 997 "Structure has implicit hydrogens defined, " 998 "parsed structure unlikely to be suitable for use " 999 "in calculations unless hydrogens added." 1000 ) 1001 comp = Composition(comp_d) 1002 if not match: 1003 coord_to_species[coord] = comp 1004 coord_to_magmoms[coord] = magmom 1005 else: 1006 coord_to_species[match] += comp 1007 # disordered magnetic not currently supported 1008 coord_to_magmoms[match] = None 1009 1010 sum_occu = [ 1011 sum(c.values()) for c in coord_to_species.values() if not set(c.elements) == {Element("O"), Element("H")} 1012 ] 1013 if any(o > 1 for o in sum_occu): 1014 msg = ( 1015 "Some occupancies ({}) sum to > 1! If they are within " 1016 "the occupancy_tolerance, they will be rescaled. " 1017 "The current occupancy_tolerance is set to: {}".format(sum_occu, self._occupancy_tolerance) 1018 ) 1019 warnings.warn(msg) 1020 self.warnings.append(msg) 1021 1022 allspecies = [] 1023 allcoords = [] 1024 allmagmoms = [] 1025 allhydrogens = [] 1026 equivalent_indices = [] 1027 1028 # check to see if magCIF file is disordered 1029 if self.feature_flags["magcif"]: 1030 for k, v in coord_to_magmoms.items(): 1031 if v is None: 1032 # Proposed solution to this is to instead store magnetic 1033 # moments as Species 'spin' property, instead of site 1034 # property, but this introduces ambiguities for end user 1035 # (such as unintended use of `spin` and Species will have 1036 # fictious oxidation state). 1037 raise NotImplementedError("Disordered magnetic structures not currently supported.") 1038 1039 if coord_to_species.items(): 1040 for idx, (comp, group) in enumerate( 1041 groupby( 1042 sorted(list(coord_to_species.items()), key=lambda x: x[1]), 1043 key=lambda x: x[1], 1044 ) 1045 ): 1046 tmp_coords = [site[0] for site in group] 1047 tmp_magmom = [coord_to_magmoms[tmp_coord] for tmp_coord in tmp_coords] 1048 1049 if self.feature_flags["magcif"]: 1050 coords, magmoms = self._unique_coords(tmp_coords, magmoms_in=tmp_magmom, lattice=lattice) 1051 else: 1052 coords, magmoms = self._unique_coords(tmp_coords) 1053 1054 if set(comp.elements) == {Element("O"), Element("H")}: 1055 # O with implicit hydrogens 1056 im_h = comp["H"] 1057 species = Composition({"O": comp["O"]}) 1058 else: 1059 im_h = 0 1060 species = comp 1061 1062 # The following might be a more natural representation of equivalent indicies, 1063 # but is not in the format expect by SymmetrizedStructure: 1064 # equivalent_indices.append(list(range(len(allcoords), len(coords)+len(allcoords)))) 1065 # The above gives a list like: 1066 # [[0, 1, 2, 3], [4, 5, 6, 7, 8, 9, 10, 11]] where the 1067 # integers are site indices, whereas the version used below will give a version like: 1068 # [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] 1069 # which is a list in the same order as the sites, but where if a site has the same integer 1070 # it is equivalent. 1071 equivalent_indices += len(coords) * [idx] 1072 1073 allhydrogens.extend(len(coords) * [im_h]) 1074 allcoords.extend(coords) 1075 allspecies.extend(len(coords) * [species]) 1076 allmagmoms.extend(magmoms) 1077 1078 # rescale occupancies if necessary 1079 for i, species in enumerate(allspecies): 1080 totaloccu = sum(species.values()) 1081 if 1 < totaloccu <= self._occupancy_tolerance: 1082 allspecies[i] = species / totaloccu 1083 1084 if allspecies and len(allspecies) == len(allcoords) and len(allspecies) == len(allmagmoms): 1085 site_properties = {} 1086 if any(allhydrogens): 1087 assert len(allhydrogens) == len(allcoords) 1088 site_properties["implicit_hydrogens"] = allhydrogens 1089 1090 if self.feature_flags["magcif"]: 1091 site_properties["magmom"] = allmagmoms 1092 1093 if len(site_properties) == 0: 1094 site_properties = None 1095 1096 struct = Structure(lattice, allspecies, allcoords, site_properties=site_properties) 1097 1098 if symmetrized: 1099 1100 # Wyckoff labels not currently parsed, note that not all CIFs will contain Wyckoff labels 1101 # TODO: extract Wyckoff labels (or other CIF attributes) and include as site_properties 1102 wyckoffs = ["Not Parsed"] * len(struct) 1103 1104 # Names of space groups are likewise not parsed (again, not all CIFs will contain this information) 1105 # What is stored are the lists of symmetry operations used to generate the structure 1106 # TODO: ensure space group labels are stored if present 1107 sg = SpacegroupOperations("Not Parsed", -1, self.symmetry_operations) 1108 1109 return SymmetrizedStructure(struct, sg, equivalent_indices, wyckoffs) 1110 1111 struct = struct.get_sorted_structure() 1112 1113 if primitive and self.feature_flags["magcif"]: 1114 struct = struct.get_primitive_structure(use_site_props=True) 1115 elif primitive: 1116 struct = struct.get_primitive_structure() 1117 struct = struct.get_reduced_structure() 1118 1119 return struct 1120 1121 def get_structures(self, primitive=True, symmetrized=False): 1122 """ 1123 Return list of structures in CIF file. primitive boolean sets whether a 1124 conventional cell structure or primitive cell structure is returned. 1125 1126 Args: 1127 primitive (bool): Set to False to return conventional unit cells. 1128 Defaults to True. With magnetic CIF files, will return primitive 1129 magnetic cell which may be larger than nuclear primitive cell. 1130 symmetrized (bool): If True, return a SymmetrizedStructure which will 1131 include the equivalent indices and symmetry operations used to 1132 create the Structure as provided by the CIF (if explicit symmetry 1133 operations are included in the CIF) or generated from information 1134 in the CIF (if only space group labels are provided). Note that 1135 currently Wyckoff labels and space group labels or numbers are 1136 not included in the generated SymmetrizedStructure, these will be 1137 notated as "Not Parsed" or -1 respectively. 1138 1139 Returns: 1140 List of Structures. 1141 """ 1142 1143 if primitive and symmetrized: 1144 raise ValueError( 1145 "Using both 'primitive' and 'symmetrized' arguments is not currently supported " 1146 "since unexpected behavior might result." 1147 ) 1148 1149 structures = [] 1150 for i, d in enumerate(self._cif.data.values()): 1151 try: 1152 s = self._get_structure(d, primitive, symmetrized) 1153 if s: 1154 structures.append(s) 1155 except (KeyError, ValueError) as exc: 1156 # Warn the user (Errors should never pass silently) 1157 # A user reported a problem with cif files produced by Avogadro 1158 # in which the atomic coordinates are in Cartesian coords. 1159 self.warnings.append(str(exc)) 1160 warnings.warn("No structure parsed for %d structure in CIF. Section of CIF file below." % (i + 1)) 1161 warnings.warn(str(d)) 1162 warnings.warn("Error is %s." % str(exc)) 1163 1164 if self.warnings: 1165 warnings.warn("Issues encountered while parsing CIF: %s" % "\n".join(self.warnings)) 1166 if len(structures) == 0: 1167 raise ValueError("Invalid cif file with no structures!") 1168 return structures 1169 1170 def get_bibtex_string(self): 1171 """ 1172 Get BibTeX reference from CIF file. 1173 :param data: 1174 :return: BibTeX string 1175 """ 1176 1177 try: 1178 from pybtex.database import BibliographyData, Entry 1179 except ImportError: 1180 raise RuntimeError("Bibliographic data extraction requires pybtex.") 1181 1182 bibtex_keys = { 1183 "author": ("_publ_author_name", "_citation_author_name"), 1184 "title": ("_publ_section_title", "_citation_title"), 1185 "journal": ( 1186 "_journal_name_full", 1187 "_journal_name_abbrev", 1188 "_citation_journal_full", 1189 "_citation_journal_abbrev", 1190 ), 1191 "volume": ("_journal_volume", "_citation_journal_volume"), 1192 "year": ("_journal_year", "_citation_year"), 1193 "number": ("_journal_number", "_citation_number"), 1194 "page_first": ("_journal_page_first", "_citation_page_first"), 1195 "page_last": ("_journal_page_last", "_citation_page_last"), 1196 "doi": ("_journal_DOI", "_citation_DOI"), 1197 } 1198 1199 entries = {} 1200 1201 # TODO: parse '_publ_section_references' when it exists? 1202 # TODO: CIF specification supports multiple citations. 1203 1204 for idx, data in enumerate(self._cif.data.values()): 1205 1206 # convert to lower-case keys, some cif files inconsistent 1207 data = {k.lower(): v for k, v in data.data.items()} 1208 1209 bibtex_entry = {} 1210 1211 for field, tags in bibtex_keys.items(): 1212 for tag in tags: 1213 if tag in data: 1214 if isinstance(data[tag], list): 1215 bibtex_entry[field] = data[tag][0] 1216 else: 1217 bibtex_entry[field] = data[tag] 1218 1219 # convert to bibtex author format ('and' delimited) 1220 if "author" in bibtex_entry: 1221 # separate out semicolon authors 1222 if isinstance(bibtex_entry["author"], str): 1223 if ";" in bibtex_entry["author"]: 1224 bibtex_entry["author"] = bibtex_entry["author"].split(";") 1225 1226 if isinstance(bibtex_entry["author"], list): 1227 bibtex_entry["author"] = " and ".join(bibtex_entry["author"]) 1228 1229 # convert to bibtex page range format, use empty string if not specified 1230 if ("page_first" in bibtex_entry) or ("page_last" in bibtex_entry): 1231 bibtex_entry["pages"] = "{0}--{1}".format( 1232 bibtex_entry.get("page_first", ""), 1233 bibtex_entry.get("page_last", ""), 1234 ) 1235 bibtex_entry.pop("page_first", None) # and remove page_first, page_list if present 1236 bibtex_entry.pop("page_last", None) 1237 1238 # cite keys are given as cif-reference-idx in order they are found 1239 entries["cifref{}".format(idx)] = Entry("article", list(bibtex_entry.items())) 1240 1241 return BibliographyData(entries).to_string(bib_format="bibtex") 1242 1243 def as_dict(self): 1244 """ 1245 :return: MSONable dict 1246 """ 1247 d = OrderedDict() 1248 for k, v in self._cif.data.items(): 1249 d[k] = {} 1250 for k2, v2 in v.data.items(): 1251 d[k][k2] = v2 1252 return d 1253 1254 @property 1255 def has_errors(self): 1256 """ 1257 :return: Whether there are errors/warnings detected in CIF parsing. 1258 """ 1259 return len(self.warnings) > 0 1260 1261 1262class CifWriter: 1263 """ 1264 A wrapper around CifFile to write CIF files from pymatgen structures. 1265 """ 1266 1267 def __init__( 1268 self, 1269 struct, 1270 symprec=None, 1271 write_magmoms=False, 1272 significant_figures=8, 1273 angle_tolerance=5.0, 1274 refine_struct=True, 1275 ): 1276 """ 1277 Args: 1278 struct (Structure): structure to write 1279 symprec (float): If not none, finds the symmetry of the structure 1280 and writes the cif with symmetry information. Passes symprec 1281 to the SpacegroupAnalyzer. See also refine_struct. 1282 write_magmoms (bool): If True, will write magCIF file. Incompatible 1283 with symprec 1284 significant_figures (int): Specifies precision for formatting of floats. 1285 Defaults to 8. 1286 angle_tolerance (float): Angle tolerance for symmetry finding. Passes 1287 angle_tolerance to the SpacegroupAnalyzer. Used only if symprec 1288 is not None. 1289 refine_struct: Used only if symprec is not None. If True, get_refined_structure 1290 is invoked to convert input structure from primitive to conventional. 1291 """ 1292 1293 if write_magmoms and symprec: 1294 warnings.warn("Magnetic symmetry cannot currently be detected by pymatgen," "disabling symmetry detection.") 1295 symprec = None 1296 1297 format_str = "{:.%df}" % significant_figures 1298 1299 block = OrderedDict() 1300 loops = [] 1301 spacegroup = ("P 1", 1) 1302 if symprec is not None: 1303 sf = SpacegroupAnalyzer(struct, symprec, angle_tolerance=angle_tolerance) 1304 spacegroup = (sf.get_space_group_symbol(), sf.get_space_group_number()) 1305 1306 if refine_struct: 1307 # Needs the refined structure when using symprec. This converts 1308 # primitive to conventional structures, the standard for CIF. 1309 struct = sf.get_refined_structure() 1310 1311 latt = struct.lattice 1312 comp = struct.composition 1313 no_oxi_comp = comp.element_composition 1314 block["_symmetry_space_group_name_H-M"] = spacegroup[0] 1315 for cell_attr in ["a", "b", "c"]: 1316 block["_cell_length_" + cell_attr] = format_str.format(getattr(latt, cell_attr)) 1317 for cell_attr in ["alpha", "beta", "gamma"]: 1318 block["_cell_angle_" + cell_attr] = format_str.format(getattr(latt, cell_attr)) 1319 block["_symmetry_Int_Tables_number"] = spacegroup[1] 1320 block["_chemical_formula_structural"] = no_oxi_comp.reduced_formula 1321 block["_chemical_formula_sum"] = no_oxi_comp.formula 1322 block["_cell_volume"] = format_str.format(latt.volume) 1323 1324 reduced_comp, fu = no_oxi_comp.get_reduced_composition_and_factor() 1325 block["_cell_formula_units_Z"] = str(int(fu)) 1326 1327 if symprec is None: 1328 block["_symmetry_equiv_pos_site_id"] = ["1"] 1329 block["_symmetry_equiv_pos_as_xyz"] = ["x, y, z"] 1330 else: 1331 sf = SpacegroupAnalyzer(struct, symprec) 1332 1333 symmops = [] 1334 for op in sf.get_symmetry_operations(): 1335 v = op.translation_vector 1336 symmops.append(SymmOp.from_rotation_and_translation(op.rotation_matrix, v)) 1337 1338 ops = [op.as_xyz_string() for op in symmops] 1339 block["_symmetry_equiv_pos_site_id"] = ["%d" % i for i in range(1, len(ops) + 1)] 1340 block["_symmetry_equiv_pos_as_xyz"] = ops 1341 1342 loops.append(["_symmetry_equiv_pos_site_id", "_symmetry_equiv_pos_as_xyz"]) 1343 1344 try: 1345 symbol_to_oxinum = OrderedDict([(el.__str__(), float(el.oxi_state)) for el in sorted(comp.elements)]) 1346 block["_atom_type_symbol"] = symbol_to_oxinum.keys() 1347 block["_atom_type_oxidation_number"] = symbol_to_oxinum.values() 1348 loops.append(["_atom_type_symbol", "_atom_type_oxidation_number"]) 1349 except (TypeError, AttributeError): 1350 symbol_to_oxinum = OrderedDict([(el.symbol, 0) for el in sorted(comp.elements)]) 1351 1352 atom_site_type_symbol = [] 1353 atom_site_symmetry_multiplicity = [] 1354 atom_site_fract_x = [] 1355 atom_site_fract_y = [] 1356 atom_site_fract_z = [] 1357 atom_site_label = [] 1358 atom_site_occupancy = [] 1359 atom_site_moment_label = [] 1360 atom_site_moment_crystalaxis_x = [] 1361 atom_site_moment_crystalaxis_y = [] 1362 atom_site_moment_crystalaxis_z = [] 1363 count = 0 1364 if symprec is None: 1365 for site in struct: 1366 for sp, occu in sorted(site.species.items()): 1367 atom_site_type_symbol.append(sp.__str__()) 1368 atom_site_symmetry_multiplicity.append("1") 1369 atom_site_fract_x.append(format_str.format(site.a)) 1370 atom_site_fract_y.append(format_str.format(site.b)) 1371 atom_site_fract_z.append(format_str.format(site.c)) 1372 atom_site_label.append("{}{}".format(sp.symbol, count)) 1373 atom_site_occupancy.append(occu.__str__()) 1374 1375 magmom = Magmom(site.properties.get("magmom", getattr(sp, "spin", 0))) 1376 if write_magmoms and abs(magmom) > 0: 1377 moment = Magmom.get_moment_relative_to_crystal_axes(magmom, latt) 1378 atom_site_moment_label.append("{}{}".format(sp.symbol, count)) 1379 atom_site_moment_crystalaxis_x.append(format_str.format(moment[0])) 1380 atom_site_moment_crystalaxis_y.append(format_str.format(moment[1])) 1381 atom_site_moment_crystalaxis_z.append(format_str.format(moment[2])) 1382 1383 count += 1 1384 else: 1385 # The following just presents a deterministic ordering. 1386 unique_sites = [ 1387 ( 1388 sorted(sites, key=lambda s: tuple(abs(x) for x in s.frac_coords))[0], 1389 len(sites), 1390 ) 1391 for sites in sf.get_symmetrized_structure().equivalent_sites 1392 ] 1393 for site, mult in sorted( 1394 unique_sites, 1395 key=lambda t: ( 1396 t[0].species.average_electroneg, 1397 -t[1], 1398 t[0].a, 1399 t[0].b, 1400 t[0].c, 1401 ), 1402 ): 1403 for sp, occu in site.species.items(): 1404 atom_site_type_symbol.append(sp.__str__()) 1405 atom_site_symmetry_multiplicity.append("%d" % mult) 1406 atom_site_fract_x.append(format_str.format(site.a)) 1407 atom_site_fract_y.append(format_str.format(site.b)) 1408 atom_site_fract_z.append(format_str.format(site.c)) 1409 atom_site_label.append("{}{}".format(sp.symbol, count)) 1410 atom_site_occupancy.append(occu.__str__()) 1411 count += 1 1412 1413 block["_atom_site_type_symbol"] = atom_site_type_symbol 1414 block["_atom_site_label"] = atom_site_label 1415 block["_atom_site_symmetry_multiplicity"] = atom_site_symmetry_multiplicity 1416 block["_atom_site_fract_x"] = atom_site_fract_x 1417 block["_atom_site_fract_y"] = atom_site_fract_y 1418 block["_atom_site_fract_z"] = atom_site_fract_z 1419 block["_atom_site_occupancy"] = atom_site_occupancy 1420 loops.append( 1421 [ 1422 "_atom_site_type_symbol", 1423 "_atom_site_label", 1424 "_atom_site_symmetry_multiplicity", 1425 "_atom_site_fract_x", 1426 "_atom_site_fract_y", 1427 "_atom_site_fract_z", 1428 "_atom_site_occupancy", 1429 ] 1430 ) 1431 if write_magmoms: 1432 block["_atom_site_moment_label"] = atom_site_moment_label 1433 block["_atom_site_moment_crystalaxis_x"] = atom_site_moment_crystalaxis_x 1434 block["_atom_site_moment_crystalaxis_y"] = atom_site_moment_crystalaxis_y 1435 block["_atom_site_moment_crystalaxis_z"] = atom_site_moment_crystalaxis_z 1436 loops.append( 1437 [ 1438 "_atom_site_moment_label", 1439 "_atom_site_moment_crystalaxis_x", 1440 "_atom_site_moment_crystalaxis_y", 1441 "_atom_site_moment_crystalaxis_z", 1442 ] 1443 ) 1444 d = OrderedDict() 1445 d[comp.reduced_formula] = CifBlock(block, loops, comp.reduced_formula) 1446 self._cf = CifFile(d) 1447 1448 @property 1449 def ciffile(self): 1450 """ 1451 Returns: CifFile associated with the CifWriter. 1452 """ 1453 return self._cf 1454 1455 def __str__(self): 1456 """ 1457 Returns the cif as a string. 1458 """ 1459 return self._cf.__str__() 1460 1461 def write_file(self, filename): 1462 """ 1463 Write the cif file. 1464 """ 1465 with zopen(filename, "wt") as f: 1466 f.write(self.__str__()) 1467 1468 1469def str2float(text): 1470 """ 1471 Remove uncertainty brackets from strings and return the float. 1472 """ 1473 1474 try: 1475 # Note that the ending ) is sometimes missing. That is why the code has 1476 # been modified to treat it as optional. Same logic applies to lists. 1477 return float(re.sub(r"\(.+\)*", "", text)) 1478 except TypeError: 1479 if isinstance(text, list) and len(text) == 1: 1480 return float(re.sub(r"\(.+\)*", "", text[0])) 1481 except ValueError as ex: 1482 if text.strip() == ".": 1483 return 0 1484 raise ex 1485 raise ValueError(f"{text} cannot be converted to float") 1486