1# coding: utf-8
2# Copyright (c) Pymatgen Development Team.
3# Distributed under the terms of the MIT License.
4"""
5This module provides utility classes for string operations.
6"""
7import re
8from fractions import Fraction
9
10
11SUBSCRIPT_UNICODE = {
12    "0": "₀",
13    "1": "₁",
14    "2": "₂",
15    "3": "₃",
16    "4": "₄",
17    "5": "₅",
18    "6": "₆",
19    "7": "₇",
20    "8": "₈",
21    "9": "₉",
22}
23
24SUPERSCRIPT_UNICODE = {
25    "0": "⁰",
26    "1": "¹",
27    "2": "²",
28    "3": "³",
29    "4": "⁴",
30    "5": "⁵",
31    "6": "⁶",
32    "7": "⁷",
33    "8": "⁸",
34    "9": "⁹",
35    "+": "⁺",
36    "-": "⁻",
37}
38
39# TODO: make standalone functions in this module use the same implementation as Stringify
40# Note: previous deprecations of standalone functions in this module were removed due to
41# a community need.
42
43
44class Stringify:
45    """
46    Mix-in class for string formatting, e.g. superscripting numbers and symbols or superscripting.
47    """
48
49    STRING_MODE = "SUBSCRIPT"
50
51    def to_pretty_string(self) -> str:
52        """
53        :return: A pretty string representation. By default, the __str__ output is used, but this method can be
54            overridden if a different representation from default is desired.
55        """
56        return self.__str__()
57
58    def to_latex_string(self) -> str:
59        """
60        Generates a LaTeX formatted string. The mode is set by the class variable STRING_MODE, which defaults to
61        "SUBSCRIPT". E.g., Fe2O3 is transformed to Fe$_{2}$O$_{3}$. Setting STRING_MODE to "SUPERSCRIPT" creates
62        superscript, e.g., Fe2+ becomes Fe^{2+}. The initial string is obtained from the class's __str__ method.
63
64        :return: String for display as in LaTeX with proper superscripts and subscripts.
65        """
66        str_ = self.to_pretty_string()
67        # First we process strings that already have _ and ^ by escaping the relevant parts.
68        str_ = re.sub(r"_(\d+)", r"$_{\1}$", str_)
69        str_ = re.sub(r"\^([\d\+\-]+)", r"$^{\1}$", str_)
70        if self.STRING_MODE == "SUBSCRIPT":
71            return re.sub(r"([A-Za-z\(\)])([\d\+\-\.]+)", r"\1$_{\2}$", str_)
72        if self.STRING_MODE == "SUPERSCRIPT":
73            return re.sub(r"([A-Za-z\(\)])([\d\+\-\.]+)", r"\1$^{\2}$", str_)
74        return str_
75
76    def to_html_string(self) -> str:
77        """
78        Generates a HTML formatted string. This uses the output from to_latex_string to generate a HTML output.
79        :return: HTML formatted string.
80        """
81        str_ = re.sub(r"\$_\{([^}]+)\}\$", r"<sub>\1</sub>", self.to_latex_string())
82        str_ = re.sub(r"\$\^\{([^}]+)\}\$", r"<sup>\1</sup>", str_)
83        return re.sub(r"\$\\overline\{([^}]+)\}\$", r'<span style="text-decoration:overline">\1</span>', str_)
84
85    def to_unicode_string(self):
86        """
87        :return: Unicode string with proper sub and superscripts. Note that this works only with systems where the sub
88            and superscripts are pure integers.
89        """
90        str_ = self.to_latex_string()
91        for m in re.finditer(r"\$_\{(\d+)\}\$", str_):
92            s1 = m.group()
93            s2 = [SUBSCRIPT_UNICODE[s] for s in m.group(1)]
94            str_ = str_.replace(s1, "".join(s2))
95        for m in re.finditer(r"\$\^\{([\d\+\-]+)\}\$", str_):
96            s1 = m.group()
97            s2 = [SUPERSCRIPT_UNICODE[s] for s in m.group(1)]
98            str_ = str_.replace(s1, "".join(s2))
99        return str_
100
101
102def str_delimited(results, header=None, delimiter="\t"):
103    """
104    Given a tuple of tuples, generate a delimited string form.
105    >>> results = [["a","b","c"],["d","e","f"],[1,2,3]]
106    >>> print(str_delimited(results,delimiter=","))
107    a,b,c
108    d,e,f
109    1,2,3
110
111    Args:
112        result: 2d sequence of arbitrary types.
113        header: optional header
114
115    Returns:
116        Aligned string output in a table-like format.
117    """
118    returnstr = ""
119    if header is not None:
120        returnstr += delimiter.join(header) + "\n"
121    return returnstr + "\n".join([delimiter.join([str(m) for m in result]) for result in results])
122
123
124def formula_double_format(afloat, ignore_ones=True, tol=1e-8):
125    """
126    This function is used to make pretty formulas by formatting the amounts.
127    Instead of Li1.0 Fe1.0 P1.0 O4.0, you get LiFePO4.
128
129    Args:
130        afloat (float): a float
131        ignore_ones (bool): if true, floats of 1 are ignored.
132        tol (float): Tolerance to round to nearest int. i.e. 2.0000000001 -> 2
133
134    Returns:
135        A string representation of the float for formulas.
136    """
137    if ignore_ones and afloat == 1:
138        return ""
139    if abs(afloat - int(afloat)) < tol:
140        return str(int(afloat))
141    return str(round(afloat, 8))
142
143
144def latexify(formula):
145    """
146    Generates a LaTeX formatted formula. E.g., Fe2O3 is transformed to
147    Fe$_{2}$O$_{3}$.
148
149    Note that Composition now has a to_latex_string() method that may
150    be used instead.
151
152    Args:
153        formula (str): Input formula.
154
155    Returns:
156        Formula suitable for display as in LaTeX with proper subscripts.
157    """
158    return re.sub(r"([A-Za-z\(\)])([\d\.]+)", r"\1$_{\2}$", formula)
159
160
161def htmlify(formula):
162    """
163    Generates a HTML formatted formula, e.g. Fe2O3 is transformed to
164    Fe<sub>2</sub>O</sub>3</sub>
165
166    Note that Composition now has a to_html_string() method that may
167    be used instead.
168
169    :param formula:
170    :return:
171    """
172    return re.sub(r"([A-Za-z\(\)])([\d\.]+)", r"\1<sub>\2</sub>", formula)
173
174
175def unicodeify(formula):
176    """
177    Generates a formula with unicode subscripts, e.g. Fe2O3 is transformed
178    to Fe₂O₃. Does not support formulae with decimal points.
179
180    Note that Composition now has a to_unicode_string() method that may
181    be used instead.
182
183    :param formula:
184    :return:
185    """
186
187    if "." in formula:
188        raise ValueError("No unicode character exists for subscript period.")
189
190    for original_subscript, subscript_unicode in SUBSCRIPT_UNICODE.items():
191        formula = formula.replace(str(original_subscript), subscript_unicode)
192
193    return formula
194
195
196def latexify_spacegroup(spacegroup_symbol):
197    r"""
198    Generates a latex formatted spacegroup. E.g., P2_1/c is converted to
199    P2$_{1}$/c and P-1 is converted to P$\\overline{1}$.
200
201    Note that SymmetryGroup now has a to_latex_string() method that may
202    be called instead.
203
204    Args:
205        spacegroup_symbol (str): A spacegroup symbol
206
207    Returns:
208        A latex formatted spacegroup with proper subscripts and overlines.
209    """
210    sym = re.sub(r"_(\d+)", r"$_{\1}$", spacegroup_symbol)
211    return re.sub(r"-(\d)", r"$\\overline{\1}$", sym)
212
213
214def unicodeify_spacegroup(spacegroup_symbol):
215    r"""
216    Generates a unicode formatted spacegroup. E.g., P2$_{1}$/c is converted to
217    P2₁/c and P$\\overline{1}$ is converted to P̅1.
218
219    Note that SymmetryGroup now has a to_unicode_string() method that
220    may be called instead.
221
222    Args:
223        spacegroup_symbol (str): A spacegroup symbol as LaTeX
224
225    Returns:
226        A unicode spacegroup with proper subscripts and overlines.
227    """
228
229    if not spacegroup_symbol:
230        return ""
231
232    symbol = latexify_spacegroup(spacegroup_symbol)
233
234    for number, unicode_number in SUBSCRIPT_UNICODE.items():
235        symbol = symbol.replace("$_{" + str(number) + "}$", unicode_number)
236        symbol = symbol.replace("_" + str(number), unicode_number)
237
238    overline = "\u0305"  # u"\u0304" (macron) is also an option
239
240    symbol = symbol.replace("$\\overline{", "")
241    symbol = symbol.replace("$", "")
242    symbol = symbol.replace("{", "")
243    # overline unicode symbol comes after the character with the overline
244    symbol = symbol.replace("}", overline)
245
246    return symbol
247
248
249def unicodeify_species(specie_string):
250    r"""
251    Generates a unicode formatted species string, with appropriate
252    superscripts for oxidation states.
253
254    Note that Species now has a to_unicode_string() method that
255    may be used instead.
256
257    Args:
258        specie_string (str): Species string, e.g. O2-
259
260    Returns:
261        Species string, e.g. O²⁻
262    """
263
264    if not specie_string:
265        return ""
266
267    for character, unicode_character in SUPERSCRIPT_UNICODE.items():
268        specie_string = specie_string.replace(character, unicode_character)
269
270    return specie_string
271
272
273def stream_has_colours(stream):
274    """
275    True if stream supports colours. Python cookbook, #475186
276    """
277    if not hasattr(stream, "isatty"):
278        return False
279
280    if not stream.isatty():
281        return False  # auto color only on TTYs
282    try:
283        import curses
284
285        curses.setupterm()
286        return curses.tigetnum("colors") > 2
287    except Exception:
288        return False  # guess false in case of error
289
290
291def transformation_to_string(matrix, translation_vec=(0, 0, 0), components=("x", "y", "z"), c="", delim=","):
292    """
293    Convenience method. Given matrix returns string, e.g. x+2y+1/4
294    :param matrix
295    :param translation_vec
296    :param components: either ('x', 'y', 'z') or ('a', 'b', 'c')
297    :param c: optional additional character to print (used for magmoms)
298    :param delim: delimiter
299    :return: xyz string
300    """
301    parts = []
302    for i in range(3):
303        s = ""
304        m = matrix[i]
305        t = translation_vec[i]
306        for j, dim in enumerate(components):
307            if m[j] != 0:
308                f = Fraction(m[j]).limit_denominator()
309                if s != "" and f >= 0:
310                    s += "+"
311                if abs(f.numerator) != 1:
312                    s += str(f.numerator)
313                elif f < 0:
314                    s += "-"
315                s += c + dim
316                if f.denominator != 1:
317                    s += "/" + str(f.denominator)
318        if t != 0:
319            s += ("+" if (t > 0 and s != "") else "") + str(Fraction(t).limit_denominator())
320        if s == "":
321            s += "0"
322        parts.append(s)
323    return delim.join(parts)
324
325
326def disordered_formula(disordered_struct, symbols=("x", "y", "z"), fmt="plain"):
327    """
328    Returns a formula of a form like AxB1-x (x=0.5)
329    for disordered structures. Will only return a
330    formula for disordered structures with one
331    kind of disordered site at present.
332
333    Args:
334        disordered_struct: a disordered structure
335        symbols: a tuple of characters to use for
336        subscripts, by default this is ('x', 'y', 'z')
337        but if you have more than three disordered
338        species more symbols will need to be added
339        fmt (str): 'plain', 'HTML' or 'LaTeX'
340
341    Returns (str): a disordered formula string
342    """
343
344    # this is in string utils and not in
345    # Composition because we need to have access
346    # to site occupancies to calculate this, so
347    # have to pass the full structure as an argument
348    # (alternatively this could be made a method on
349    # Structure)
350    from pymatgen.core.composition import Composition
351    from pymatgen.core.periodic_table import get_el_sp
352
353    if disordered_struct.is_ordered:
354        raise ValueError("Structure is not disordered, " "so disordered formula not defined.")
355
356    disordered_site_compositions = {site.species for site in disordered_struct if not site.is_ordered}
357
358    if len(disordered_site_compositions) > 1:
359        # this probably won't happen too often
360        raise ValueError(
361            "Ambiguous how to define disordered " "formula when more than one type of disordered " "site is present."
362        )
363    disordered_site_composition = disordered_site_compositions.pop()
364
365    disordered_species = {str(sp) for sp, occu in disordered_site_composition.items()}
366
367    if len(disordered_species) > len(symbols):
368        # this probably won't happen too often either
369        raise ValueError("Not enough symbols to describe disordered composition: " "{}".format(symbols))
370    symbols = list(symbols)[0 : len(disordered_species) - 1]
371
372    comp = disordered_struct.composition.get_el_amt_dict().items()
373    # sort by electronegativity, as per composition
374    comp = sorted(comp, key=lambda x: get_el_sp(x[0]).X)
375
376    disordered_comp = []
377    variable_map = {}
378
379    total_disordered_occu = sum([occu for sp, occu in comp if str(sp) in disordered_species])
380
381    # composition to get common factor
382    factor_comp = disordered_struct.composition.as_dict()
383    factor_comp["X"] = total_disordered_occu
384    for sp in disordered_species:
385        del factor_comp[str(sp)]
386    factor_comp = Composition.from_dict(factor_comp)
387    factor = factor_comp.get_reduced_formula_and_factor()[1]
388
389    total_disordered_occu /= factor
390    remainder = "{}-{}".format(
391        formula_double_format(total_disordered_occu, ignore_ones=False),
392        "-".join(symbols),
393    )
394
395    for sp, occu in comp:
396        sp = str(sp)
397        if sp not in disordered_species:
398            disordered_comp.append((sp, formula_double_format(occu / factor)))
399        else:
400            if len(symbols) > 0:
401                symbol = symbols.pop(0)
402                disordered_comp.append((sp, symbol))
403                variable_map[symbol] = occu / total_disordered_occu / factor
404            else:
405                disordered_comp.append((sp, remainder))
406
407    if fmt == "LaTeX":
408        sub_start = "_{"
409        sub_end = "}"
410    elif fmt == "HTML":
411        sub_start = "<sub>"
412        sub_end = "</sub>"
413    elif fmt != "plain":
414        raise ValueError("Unsupported output format, " "choose from: LaTeX, HTML, plain")
415
416    disordered_formula = []
417    for sp, occu in disordered_comp:
418        disordered_formula.append(sp)
419        if occu:  # can be empty string if 1
420            if fmt != "plain":
421                disordered_formula.append(sub_start)
422            disordered_formula.append(occu)
423            if fmt != "plain":
424                disordered_formula.append(sub_end)
425    disordered_formula.append(" ")
426    disordered_formula += ["{}={} ".format(k, formula_double_format(v)) for k, v in variable_map.items()]
427
428    return "".join(map(str, disordered_formula))[0:-1]
429