1# coding: utf-8 2# Copyright (c) Pymatgen Development Team. 3# Distributed under the terms of the MIT License. 4""" 5This module provides utility classes for string operations. 6""" 7import re 8from fractions import Fraction 9 10 11SUBSCRIPT_UNICODE = { 12 "0": "₀", 13 "1": "₁", 14 "2": "₂", 15 "3": "₃", 16 "4": "₄", 17 "5": "₅", 18 "6": "₆", 19 "7": "₇", 20 "8": "₈", 21 "9": "₉", 22} 23 24SUPERSCRIPT_UNICODE = { 25 "0": "⁰", 26 "1": "¹", 27 "2": "²", 28 "3": "³", 29 "4": "⁴", 30 "5": "⁵", 31 "6": "⁶", 32 "7": "⁷", 33 "8": "⁸", 34 "9": "⁹", 35 "+": "⁺", 36 "-": "⁻", 37} 38 39# TODO: make standalone functions in this module use the same implementation as Stringify 40# Note: previous deprecations of standalone functions in this module were removed due to 41# a community need. 42 43 44class Stringify: 45 """ 46 Mix-in class for string formatting, e.g. superscripting numbers and symbols or superscripting. 47 """ 48 49 STRING_MODE = "SUBSCRIPT" 50 51 def to_pretty_string(self) -> str: 52 """ 53 :return: A pretty string representation. By default, the __str__ output is used, but this method can be 54 overridden if a different representation from default is desired. 55 """ 56 return self.__str__() 57 58 def to_latex_string(self) -> str: 59 """ 60 Generates a LaTeX formatted string. The mode is set by the class variable STRING_MODE, which defaults to 61 "SUBSCRIPT". E.g., Fe2O3 is transformed to Fe$_{2}$O$_{3}$. Setting STRING_MODE to "SUPERSCRIPT" creates 62 superscript, e.g., Fe2+ becomes Fe^{2+}. The initial string is obtained from the class's __str__ method. 63 64 :return: String for display as in LaTeX with proper superscripts and subscripts. 65 """ 66 str_ = self.to_pretty_string() 67 # First we process strings that already have _ and ^ by escaping the relevant parts. 68 str_ = re.sub(r"_(\d+)", r"$_{\1}$", str_) 69 str_ = re.sub(r"\^([\d\+\-]+)", r"$^{\1}$", str_) 70 if self.STRING_MODE == "SUBSCRIPT": 71 return re.sub(r"([A-Za-z\(\)])([\d\+\-\.]+)", r"\1$_{\2}$", str_) 72 if self.STRING_MODE == "SUPERSCRIPT": 73 return re.sub(r"([A-Za-z\(\)])([\d\+\-\.]+)", r"\1$^{\2}$", str_) 74 return str_ 75 76 def to_html_string(self) -> str: 77 """ 78 Generates a HTML formatted string. This uses the output from to_latex_string to generate a HTML output. 79 :return: HTML formatted string. 80 """ 81 str_ = re.sub(r"\$_\{([^}]+)\}\$", r"<sub>\1</sub>", self.to_latex_string()) 82 str_ = re.sub(r"\$\^\{([^}]+)\}\$", r"<sup>\1</sup>", str_) 83 return re.sub(r"\$\\overline\{([^}]+)\}\$", r'<span style="text-decoration:overline">\1</span>', str_) 84 85 def to_unicode_string(self): 86 """ 87 :return: Unicode string with proper sub and superscripts. Note that this works only with systems where the sub 88 and superscripts are pure integers. 89 """ 90 str_ = self.to_latex_string() 91 for m in re.finditer(r"\$_\{(\d+)\}\$", str_): 92 s1 = m.group() 93 s2 = [SUBSCRIPT_UNICODE[s] for s in m.group(1)] 94 str_ = str_.replace(s1, "".join(s2)) 95 for m in re.finditer(r"\$\^\{([\d\+\-]+)\}\$", str_): 96 s1 = m.group() 97 s2 = [SUPERSCRIPT_UNICODE[s] for s in m.group(1)] 98 str_ = str_.replace(s1, "".join(s2)) 99 return str_ 100 101 102def str_delimited(results, header=None, delimiter="\t"): 103 """ 104 Given a tuple of tuples, generate a delimited string form. 105 >>> results = [["a","b","c"],["d","e","f"],[1,2,3]] 106 >>> print(str_delimited(results,delimiter=",")) 107 a,b,c 108 d,e,f 109 1,2,3 110 111 Args: 112 result: 2d sequence of arbitrary types. 113 header: optional header 114 115 Returns: 116 Aligned string output in a table-like format. 117 """ 118 returnstr = "" 119 if header is not None: 120 returnstr += delimiter.join(header) + "\n" 121 return returnstr + "\n".join([delimiter.join([str(m) for m in result]) for result in results]) 122 123 124def formula_double_format(afloat, ignore_ones=True, tol=1e-8): 125 """ 126 This function is used to make pretty formulas by formatting the amounts. 127 Instead of Li1.0 Fe1.0 P1.0 O4.0, you get LiFePO4. 128 129 Args: 130 afloat (float): a float 131 ignore_ones (bool): if true, floats of 1 are ignored. 132 tol (float): Tolerance to round to nearest int. i.e. 2.0000000001 -> 2 133 134 Returns: 135 A string representation of the float for formulas. 136 """ 137 if ignore_ones and afloat == 1: 138 return "" 139 if abs(afloat - int(afloat)) < tol: 140 return str(int(afloat)) 141 return str(round(afloat, 8)) 142 143 144def latexify(formula): 145 """ 146 Generates a LaTeX formatted formula. E.g., Fe2O3 is transformed to 147 Fe$_{2}$O$_{3}$. 148 149 Note that Composition now has a to_latex_string() method that may 150 be used instead. 151 152 Args: 153 formula (str): Input formula. 154 155 Returns: 156 Formula suitable for display as in LaTeX with proper subscripts. 157 """ 158 return re.sub(r"([A-Za-z\(\)])([\d\.]+)", r"\1$_{\2}$", formula) 159 160 161def htmlify(formula): 162 """ 163 Generates a HTML formatted formula, e.g. Fe2O3 is transformed to 164 Fe<sub>2</sub>O</sub>3</sub> 165 166 Note that Composition now has a to_html_string() method that may 167 be used instead. 168 169 :param formula: 170 :return: 171 """ 172 return re.sub(r"([A-Za-z\(\)])([\d\.]+)", r"\1<sub>\2</sub>", formula) 173 174 175def unicodeify(formula): 176 """ 177 Generates a formula with unicode subscripts, e.g. Fe2O3 is transformed 178 to Fe₂O₃. Does not support formulae with decimal points. 179 180 Note that Composition now has a to_unicode_string() method that may 181 be used instead. 182 183 :param formula: 184 :return: 185 """ 186 187 if "." in formula: 188 raise ValueError("No unicode character exists for subscript period.") 189 190 for original_subscript, subscript_unicode in SUBSCRIPT_UNICODE.items(): 191 formula = formula.replace(str(original_subscript), subscript_unicode) 192 193 return formula 194 195 196def latexify_spacegroup(spacegroup_symbol): 197 r""" 198 Generates a latex formatted spacegroup. E.g., P2_1/c is converted to 199 P2$_{1}$/c and P-1 is converted to P$\\overline{1}$. 200 201 Note that SymmetryGroup now has a to_latex_string() method that may 202 be called instead. 203 204 Args: 205 spacegroup_symbol (str): A spacegroup symbol 206 207 Returns: 208 A latex formatted spacegroup with proper subscripts and overlines. 209 """ 210 sym = re.sub(r"_(\d+)", r"$_{\1}$", spacegroup_symbol) 211 return re.sub(r"-(\d)", r"$\\overline{\1}$", sym) 212 213 214def unicodeify_spacegroup(spacegroup_symbol): 215 r""" 216 Generates a unicode formatted spacegroup. E.g., P2$_{1}$/c is converted to 217 P2₁/c and P$\\overline{1}$ is converted to P̅1. 218 219 Note that SymmetryGroup now has a to_unicode_string() method that 220 may be called instead. 221 222 Args: 223 spacegroup_symbol (str): A spacegroup symbol as LaTeX 224 225 Returns: 226 A unicode spacegroup with proper subscripts and overlines. 227 """ 228 229 if not spacegroup_symbol: 230 return "" 231 232 symbol = latexify_spacegroup(spacegroup_symbol) 233 234 for number, unicode_number in SUBSCRIPT_UNICODE.items(): 235 symbol = symbol.replace("$_{" + str(number) + "}$", unicode_number) 236 symbol = symbol.replace("_" + str(number), unicode_number) 237 238 overline = "\u0305" # u"\u0304" (macron) is also an option 239 240 symbol = symbol.replace("$\\overline{", "") 241 symbol = symbol.replace("$", "") 242 symbol = symbol.replace("{", "") 243 # overline unicode symbol comes after the character with the overline 244 symbol = symbol.replace("}", overline) 245 246 return symbol 247 248 249def unicodeify_species(specie_string): 250 r""" 251 Generates a unicode formatted species string, with appropriate 252 superscripts for oxidation states. 253 254 Note that Species now has a to_unicode_string() method that 255 may be used instead. 256 257 Args: 258 specie_string (str): Species string, e.g. O2- 259 260 Returns: 261 Species string, e.g. O²⁻ 262 """ 263 264 if not specie_string: 265 return "" 266 267 for character, unicode_character in SUPERSCRIPT_UNICODE.items(): 268 specie_string = specie_string.replace(character, unicode_character) 269 270 return specie_string 271 272 273def stream_has_colours(stream): 274 """ 275 True if stream supports colours. Python cookbook, #475186 276 """ 277 if not hasattr(stream, "isatty"): 278 return False 279 280 if not stream.isatty(): 281 return False # auto color only on TTYs 282 try: 283 import curses 284 285 curses.setupterm() 286 return curses.tigetnum("colors") > 2 287 except Exception: 288 return False # guess false in case of error 289 290 291def transformation_to_string(matrix, translation_vec=(0, 0, 0), components=("x", "y", "z"), c="", delim=","): 292 """ 293 Convenience method. Given matrix returns string, e.g. x+2y+1/4 294 :param matrix 295 :param translation_vec 296 :param components: either ('x', 'y', 'z') or ('a', 'b', 'c') 297 :param c: optional additional character to print (used for magmoms) 298 :param delim: delimiter 299 :return: xyz string 300 """ 301 parts = [] 302 for i in range(3): 303 s = "" 304 m = matrix[i] 305 t = translation_vec[i] 306 for j, dim in enumerate(components): 307 if m[j] != 0: 308 f = Fraction(m[j]).limit_denominator() 309 if s != "" and f >= 0: 310 s += "+" 311 if abs(f.numerator) != 1: 312 s += str(f.numerator) 313 elif f < 0: 314 s += "-" 315 s += c + dim 316 if f.denominator != 1: 317 s += "/" + str(f.denominator) 318 if t != 0: 319 s += ("+" if (t > 0 and s != "") else "") + str(Fraction(t).limit_denominator()) 320 if s == "": 321 s += "0" 322 parts.append(s) 323 return delim.join(parts) 324 325 326def disordered_formula(disordered_struct, symbols=("x", "y", "z"), fmt="plain"): 327 """ 328 Returns a formula of a form like AxB1-x (x=0.5) 329 for disordered structures. Will only return a 330 formula for disordered structures with one 331 kind of disordered site at present. 332 333 Args: 334 disordered_struct: a disordered structure 335 symbols: a tuple of characters to use for 336 subscripts, by default this is ('x', 'y', 'z') 337 but if you have more than three disordered 338 species more symbols will need to be added 339 fmt (str): 'plain', 'HTML' or 'LaTeX' 340 341 Returns (str): a disordered formula string 342 """ 343 344 # this is in string utils and not in 345 # Composition because we need to have access 346 # to site occupancies to calculate this, so 347 # have to pass the full structure as an argument 348 # (alternatively this could be made a method on 349 # Structure) 350 from pymatgen.core.composition import Composition 351 from pymatgen.core.periodic_table import get_el_sp 352 353 if disordered_struct.is_ordered: 354 raise ValueError("Structure is not disordered, " "so disordered formula not defined.") 355 356 disordered_site_compositions = {site.species for site in disordered_struct if not site.is_ordered} 357 358 if len(disordered_site_compositions) > 1: 359 # this probably won't happen too often 360 raise ValueError( 361 "Ambiguous how to define disordered " "formula when more than one type of disordered " "site is present." 362 ) 363 disordered_site_composition = disordered_site_compositions.pop() 364 365 disordered_species = {str(sp) for sp, occu in disordered_site_composition.items()} 366 367 if len(disordered_species) > len(symbols): 368 # this probably won't happen too often either 369 raise ValueError("Not enough symbols to describe disordered composition: " "{}".format(symbols)) 370 symbols = list(symbols)[0 : len(disordered_species) - 1] 371 372 comp = disordered_struct.composition.get_el_amt_dict().items() 373 # sort by electronegativity, as per composition 374 comp = sorted(comp, key=lambda x: get_el_sp(x[0]).X) 375 376 disordered_comp = [] 377 variable_map = {} 378 379 total_disordered_occu = sum([occu for sp, occu in comp if str(sp) in disordered_species]) 380 381 # composition to get common factor 382 factor_comp = disordered_struct.composition.as_dict() 383 factor_comp["X"] = total_disordered_occu 384 for sp in disordered_species: 385 del factor_comp[str(sp)] 386 factor_comp = Composition.from_dict(factor_comp) 387 factor = factor_comp.get_reduced_formula_and_factor()[1] 388 389 total_disordered_occu /= factor 390 remainder = "{}-{}".format( 391 formula_double_format(total_disordered_occu, ignore_ones=False), 392 "-".join(symbols), 393 ) 394 395 for sp, occu in comp: 396 sp = str(sp) 397 if sp not in disordered_species: 398 disordered_comp.append((sp, formula_double_format(occu / factor))) 399 else: 400 if len(symbols) > 0: 401 symbol = symbols.pop(0) 402 disordered_comp.append((sp, symbol)) 403 variable_map[symbol] = occu / total_disordered_occu / factor 404 else: 405 disordered_comp.append((sp, remainder)) 406 407 if fmt == "LaTeX": 408 sub_start = "_{" 409 sub_end = "}" 410 elif fmt == "HTML": 411 sub_start = "<sub>" 412 sub_end = "</sub>" 413 elif fmt != "plain": 414 raise ValueError("Unsupported output format, " "choose from: LaTeX, HTML, plain") 415 416 disordered_formula = [] 417 for sp, occu in disordered_comp: 418 disordered_formula.append(sp) 419 if occu: # can be empty string if 1 420 if fmt != "plain": 421 disordered_formula.append(sub_start) 422 disordered_formula.append(occu) 423 if fmt != "plain": 424 disordered_formula.append(sub_end) 425 disordered_formula.append(" ") 426 disordered_formula += ["{}={} ".format(k, formula_double_format(v)) for k, v in variable_map.items()] 427 428 return "".join(map(str, disordered_formula))[0:-1] 429