1# -*- coding: utf-8 -*-
2
3"""
4Strings and Characters
5"""
6
7import io
8import re
9import sys
10from sys import version_info
11import unicodedata
12from binascii import hexlify, unhexlify
13from heapq import heappush, heappop
14from typing import Any, Callable, List
15
16from mathics.version import __version__  # noqa used in loading to check consistency.
17from mathics.builtin.base import (
18    BinaryOperator,
19    Builtin,
20    Test,
21    Predefined,
22    PrefixOperator,
23)
24from mathics.core.expression import (
25    Expression,
26    Symbol,
27    SymbolFailed,
28    SymbolFalse,
29    SymbolTrue,
30    SymbolList,
31    String,
32    Integer,
33    Integer0,
34    Integer1,
35    from_python,
36    string_list,
37)
38from mathics.core.parser import MathicsFileLineFeeder, parse
39from mathics.builtin.lists import python_seq, convert_seq
40from mathics.settings import SYSTEM_CHARACTER_ENCODING
41from mathics_scanner import TranslateError
42
43_regex_longest = {
44    "+": "+",
45    "*": "*",
46}
47
48_regex_shortest = {
49    "+": "+?",
50    "*": "*?",
51}
52
53
54alphabet_descriptions = {
55    "English": {
56        "Lowercase": "abcdefghijklmnopqrstuvwxyz",
57        "Uppercase": "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
58    },
59    "Spanish": {
60        "Lowercase": "abcdefghijklmnñopqrstuvwxyz",
61        "Uppercase": "ABCDEFGHIJKLMNÑOPQRSTUVWXYZ",
62    },
63    "Greek": {
64        "Lowercase": "αβγδεζηθικλμνξοπρστυφχψω",
65        "Uppercase": "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ",
66    },
67    "Cyrillic": {
68        "Lowercase": "абвгґдђѓеёєжзѕиіїйјклљмнњопрстћќуўфхцчџшщъыьэюя",
69        "Uppercase": "АБВГҐДЂЃЕЁЄЖЗЅИІЇЙЈКЛЉМНЊОПРСТЋЌУЎФХЦЧЏШЩЪЫЬЭЮЯ",
70    },
71}
72
73alphabet_alias={
74    "English": "English",
75    "French": "English",
76    "German": "English",
77    "Spanish": "Spanish",
78    "Greek": "Greek",
79    "Cyrillic": "Cyrillic",
80    "Russian": "Cyrillic",
81}
82
83
84def _encode_pname(name):
85    return "n" + hexlify(name.encode("utf8")).decode("utf8")
86
87
88def _decode_pname(name):
89    return unhexlify(name[1:]).decode("utf8")
90
91
92def _evaluate_match(s, m, evaluation):
93    replace = dict(
94        (_decode_pname(name), String(value)) for name, value in m.groupdict().items()
95    )
96    return s.replace_vars(replace, in_scoping=False).evaluate(evaluation)
97
98
99def _parallel_match(text, rules, flags, limit):
100    heap = []
101
102    def push(i, iter, form):
103        m = None
104        try:
105            m = next(iter)
106        except StopIteration:
107            pass
108        if m is not None:
109            heappush(heap, (m.start(), i, m, form, iter))
110
111    for i, (patt, form) in enumerate(rules):
112        push(i, re.finditer(patt, text, flags=flags), form)
113
114    k = 0
115    n = 0
116
117    while heap:
118        start, i, match, form, iter = heappop(heap)
119
120        if start >= k:
121            yield match, form
122
123            n += 1
124            if n >= limit > 0:
125                break
126
127            k = match.end()
128
129        push(i, iter, form)
130
131
132def to_regex(
133    expr, evaluation, q=_regex_longest, groups=None, abbreviated_patterns=False
134):
135    if expr is None:
136        return None
137
138    if groups is None:
139        groups = {}
140
141    def recurse(x, quantifiers=q):
142        return to_regex(x, evaluation, q=quantifiers, groups=groups)
143
144    if isinstance(expr, String):
145        result = expr.get_string_value()
146        if abbreviated_patterns:
147            pieces = []
148            i, j = 0, 0
149            while j < len(result):
150                c = result[j]
151                if c == "\\" and j + 1 < len(result):
152                    pieces.append(re.escape(result[i:j]))
153                    pieces.append(re.escape(result[j + 1]))
154                    j += 2
155                    i = j
156                elif c == "*":
157                    pieces.append(re.escape(result[i:j]))
158                    pieces.append("(.*)")
159                    j += 1
160                    i = j
161                elif c == "@":
162                    pieces.append(re.escape(result[i:j]))
163                    # one or more characters, excluding uppercase letters
164                    pieces.append("([^A-Z]+)")
165                    j += 1
166                    i = j
167                else:
168                    j += 1
169            pieces.append(re.escape(result[i:j]))
170            result = "".join(pieces)
171        else:
172            result = re.escape(result)
173        return result
174    if expr.has_form("RegularExpression", 1):
175        regex = expr.leaves[0].get_string_value()
176        if regex is None:
177            return regex
178        try:
179            re.compile(regex)
180            # Don't return the compiled regex because it may need to composed
181            # further e.g. StringExpression["abc", RegularExpression[regex2]].
182            return regex
183        except re.error:
184            return None  # invalid regex
185
186    if isinstance(expr, Symbol):
187        return {
188            "System`NumberString": r"[-|+]?(\d+(\.\d*)?|\.\d+)?",
189            "System`Whitespace": r"(?u)\s+",
190            "System`DigitCharacter": r"\d",
191            "System`WhitespaceCharacter": r"(?u)\s",
192            "System`WordCharacter": r"(?u)[^\W_]",
193            "System`StartOfLine": r"^",
194            "System`EndOfLine": r"$",
195            "System`StartOfString": r"\A",
196            "System`EndOfString": r"\Z",
197            "System`WordBoundary": r"\b",
198            "System`LetterCharacter": r"(?u)[^\W_0-9]",
199            "System`HexidecimalCharacter": r"[0-9a-fA-F]",
200        }.get(expr.get_name())
201
202    if expr.has_form("CharacterRange", 2):
203        (start, stop) = (leaf.get_string_value() for leaf in expr.leaves)
204        if all(x is not None and len(x) == 1 for x in (start, stop)):
205            return "[{0}-{1}]".format(re.escape(start), re.escape(stop))
206
207    if expr.has_form("Blank", 0):
208        return r"(.|\n)"
209    if expr.has_form("BlankSequence", 0):
210        return r"(.|\n)" + q["+"]
211    if expr.has_form("BlankNullSequence", 0):
212        return r"(.|\n)" + q["*"]
213    if expr.has_form("Except", 1, 2):
214        if len(expr.leaves) == 1:
215            leaves = [expr.leaves[0], Expression("Blank")]
216        else:
217            leaves = [expr.leaves[0], expr.leaves[1]]
218        leaves = [recurse(leaf) for leaf in leaves]
219        if all(leaf is not None for leaf in leaves):
220            return "(?!{0}){1}".format(*leaves)
221    if expr.has_form("Characters", 1):
222        leaf = expr.leaves[0].get_string_value()
223        if leaf is not None:
224            return "[{0}]".format(re.escape(leaf))
225    if expr.has_form("StringExpression", None):
226        leaves = [recurse(leaf) for leaf in expr.leaves]
227        if None in leaves:
228            return None
229        return "".join(leaves)
230    if expr.has_form("Repeated", 1):
231        leaf = recurse(expr.leaves[0])
232        if leaf is not None:
233            return "({0})".format(leaf) + q["+"]
234    if expr.has_form("RepeatedNull", 1):
235        leaf = recurse(expr.leaves[0])
236        if leaf is not None:
237            return "({0})".format(leaf) + q["*"]
238    if expr.has_form("Alternatives", None):
239        leaves = [recurse(leaf) for leaf in expr.leaves]
240        if all(leaf is not None for leaf in leaves):
241            return "|".join(leaves)
242    if expr.has_form("Shortest", 1):
243        return recurse(expr.leaves[0], quantifiers=_regex_shortest)
244    if expr.has_form("Longest", 1):
245        return recurse(expr.leaves[0], quantifiers=_regex_longest)
246    if expr.has_form("Pattern", 2) and isinstance(expr.leaves[0], Symbol):
247        name = expr.leaves[0].get_name()
248        patt = groups.get(name, None)
249        if patt is not None:
250            if expr.leaves[1].has_form("Blank", 0):
251                pass  # ok, no warnings
252            elif not expr.leaves[1].sameQ(patt):
253                evaluation.message(
254                    "StringExpression", "cond", expr.leaves[0], expr, expr.leaves[0]
255                )
256            return "(?P=%s)" % _encode_pname(name)
257        else:
258            groups[name] = expr.leaves[1]
259            return "(?P<%s>%s)" % (_encode_pname(name), recurse(expr.leaves[1]))
260
261    return None
262
263
264def anchor_pattern(patt):
265    """
266    anchors a regex in order to force matching against an entire string.
267    """
268    if not patt.endswith(r"\Z"):
269        patt = patt + r"\Z"
270    if not patt.startswith(r"\A"):
271        patt = r"\A" + patt
272    return patt
273
274
275def mathics_split(patt, string, flags):
276    """
277    Python's re.split includes the text of groups if they are capturing.
278
279    Furthermore, you can't split on empty matches. Trying to do this returns
280    the original string for Python < 3.5, raises a ValueError for
281    Python >= 3.5, <= X and works as expected for Python >= X, where 'X' is
282    some future version of Python (> 3.6).
283
284    For these reasons we implement our own split.
285    """
286    # (start, end) indices of splits
287    indices = list((m.start(), m.end()) for m in re.finditer(patt, string, flags))
288
289    # (start, end) indices of stuff to keep
290    indices = [(None, 0)] + indices + [(len(string), None)]
291    indices = [(indices[i][1], indices[i + 1][0]) for i in range(len(indices) - 1)]
292
293    # slice up the string
294    return [string[start:stop] for start, stop in indices]
295
296
297if version_info >= (3, 0):
298
299    def pack_bytes(codes):
300        return bytes(codes)
301
302    def unpack_bytes(codes):
303        return [int(code) for code in codes]
304
305
306else:
307    from struct import pack, unpack
308
309    def pack_bytes(codes):
310        return pack("B" * len(codes), *codes)
311
312    def unpack_bytes(codes):
313        return unpack("B" * len(codes), codes)
314
315
316class SystemCharacterEncoding(Predefined):
317    """
318    <dl>
319    <dt>$SystemCharacterEncoding
320
321    </dl>
322    """
323
324    name = "$SystemCharacterEncoding"
325
326    rules = {
327        "$SystemCharacterEncoding": '"' + SYSTEM_CHARACTER_ENCODING + '"',
328    }
329
330
331class CharacterEncoding(Predefined):
332    """
333    <dl>
334    <dt>'CharacterEncoding'
335        <dd>specifies the default character encoding to use if no other encoding is
336        specified.
337    </dl>
338    """
339
340    name = "$CharacterEncoding"
341    value = '"UTF-8"'
342
343    rules = {
344        "$CharacterEncoding": value,
345    }
346
347
348_encodings = {
349    # see https://docs.python.org/2/library/codecs.html#standard-encodings
350    "ASCII": "ascii",
351    "CP949": "cp949",
352    "CP950": "cp950",
353    "EUC-JP": "euc_jp",
354    "IBM-850": "cp850",
355    "ISOLatin1": "iso8859_1",
356    "ISOLatin2": "iso8859_2",
357    "ISOLatin3": "iso8859_3",
358    "ISOLatin4": "iso8859_4",
359    "ISOLatinCyrillic": "iso8859_5",
360    "ISO8859-1": "iso8859_1",
361    "ISO8859-2": "iso8859_2",
362    "ISO8859-3": "iso8859_3",
363    "ISO8859-4": "iso8859_4",
364    "ISO8859-5": "iso8859_5",
365    "ISO8859-6": "iso8859_6",
366    "ISO8859-7": "iso8859_7",
367    "ISO8859-8": "iso8859_8",
368    "ISO8859-9": "iso8859_9",
369    "ISO8859-10": "iso8859_10",
370    "ISO8859-13": "iso8859_13",
371    "ISO8859-14": "iso8859_14",
372    "ISO8859-15": "iso8859_15",
373    "ISO8859-16": "iso8859_16",
374    "koi8-r": "koi8_r",
375    "MacintoshCyrillic": "mac_cyrillic",
376    "MacintoshGreek": "mac_greek",
377    "MacintoshIcelandic": "mac_iceland",
378    "MacintoshRoman": "mac_roman",
379    "MacintoshTurkish": "mac_turkish",
380    "ShiftJIS": "shift_jis",
381    "Unicode": "utf_16",
382    "UTF-8": "utf_8",
383    "UTF8": "utf_8",
384    "WindowsANSI": "cp1252",
385    "WindowsBaltic": "cp1257",
386    "WindowsCyrillic": "cp1251",
387    "WindowsEastEurope": "cp1250",
388    "WindowsGreek": "cp1253",
389    "WindowsTurkish": "cp1254",
390}
391
392
393def to_python_encoding(encoding):
394    return _encodings.get(encoding)
395
396
397class CharacterEncodings(Predefined):
398    name = "$CharacterEncodings"
399    value = "{%s}" % ",".join(map(lambda s: '"%s"' % s, _encodings.keys()))
400
401    rules = {
402        "$CharacterEncodings": value,
403    }
404
405
406class StringExpression(BinaryOperator):
407    """
408    <dl>
409    <dt>'StringExpression[s_1, s_2, ...]'
410      <dd>represents a sequence of strings and symbolic string objects $s_i$.
411    </dl>
412
413    >> "a" ~~ "b" // FullForm
414     = "ab"
415
416    #> "a" ~~ "b" ~~ "c" // FullForm
417     = "abc"
418
419    #> a ~~ b
420     = a ~~ b
421    """
422
423    operator = "~~"
424    precedence = 135
425    attributes = ("Flat", "OneIdentity", "Protected")
426
427    messages = {
428        "invld": "Element `1` is not a valid string or pattern element in `2`.",
429        "cond": "Ignored restriction given for `1` in `2` as it does not match previous occurences of `1`.",
430    }
431
432    def apply(self, args, evaluation):
433        "StringExpression[args__String]"
434        args = args.get_sequence()
435        args = [arg.get_string_value() for arg in args]
436        if None in args:
437            return
438        return String("".join(args))
439
440
441class RegularExpression(Builtin):
442    r"""
443    <dl>
444    <dt>'RegularExpression["regex"]'
445      <dd>represents the regex specified by the string $"regex"$.
446    </dl>
447
448    >> StringSplit["1.23, 4.56  7.89", RegularExpression["(\\s|,)+"]]
449     = {1.23, 4.56, 7.89}
450
451    #> RegularExpression["[abc]"]
452     = RegularExpression[[abc]]
453
454    ## Mathematica doesn't seem to verify the correctness of regex
455    #> StringSplit["ab23c", RegularExpression["[0-9]++"]]
456     : Element RegularExpression[[0-9]++] is not a valid string or pattern element in RegularExpression[[0-9]++].
457     = StringSplit[ab23c, RegularExpression[[0-9]++]]
458
459    #> StringSplit["ab23c", RegularExpression[2]]
460     : Element RegularExpression[2] is not a valid string or pattern element in RegularExpression[2].
461     = StringSplit[ab23c, RegularExpression[2]]
462    """
463
464
465class NumberString(Builtin):
466    """
467    <dl>
468    <dt>'NumberString'
469      <dd>represents the characters in a number.
470    </dl>
471
472    >> StringMatchQ["1234", NumberString]
473     = True
474
475    >> StringMatchQ["1234.5", NumberString]
476    = True
477
478    >> StringMatchQ["1.2`20", NumberString]
479     = False
480
481    #> StringMatchQ[".12", NumberString]
482     = True
483    #> StringMatchQ["12.", NumberString]
484     = True
485    #> StringMatchQ["12.31.31", NumberString]
486     = False
487    #> StringMatchQ[".", NumberString]
488     = False
489    #> StringMatchQ["-1.23", NumberString]
490     = True
491    #> StringMatchQ["+12.3", NumberString]
492     = True
493    #> StringMatchQ["+.2", NumberString]
494     = True
495    #> StringMatchQ["1.2e4", NumberString]
496     = False
497    """
498
499
500class DigitCharacter(Builtin):
501    """
502    <dl>
503    <dt>'DigitCharacter'
504      <dd>represents the digits 0-9.
505    </dl>
506
507    >> StringMatchQ["1", DigitCharacter]
508     = True
509    >> StringMatchQ["a", DigitCharacter]
510     = False
511    >> StringMatchQ["12", DigitCharacter]
512     = False
513
514    >> StringMatchQ["123245", DigitCharacter..]
515     = True
516
517    #> StringMatchQ["123245a6", DigitCharacter..]
518     = False
519    """
520
521
522class Whitespace(Builtin):
523    r"""
524    <dl>
525    <dt>'Whitespace'
526      <dd>represents a sequence of whitespace characters.
527    </dl>
528
529    >> StringMatchQ["\r \n", Whitespace]
530     = True
531
532    >> StringSplit["a  \n b \r\n c d", Whitespace]
533     = {a, b, c, d}
534
535    >> StringReplace[" this has leading and trailing whitespace \n ", (StartOfString ~~ Whitespace) | (Whitespace ~~ EndOfString) -> ""] <> " removed" // FullForm
536     = "this has leading and trailing whitespace removed"
537    """
538
539
540class WhitespaceCharacter(Builtin):
541    r"""
542    <dl>
543    <dt>'WhitespaceCharacter'
544      <dd>represents a single whitespace character.
545    </dl>
546
547    >> StringMatchQ["\n", WhitespaceCharacter]
548     = True
549
550    >> StringSplit["a\nb\r\nc\rd", WhitespaceCharacter]
551     = {a, b, c, d}
552
553    For sequences of whitespace characters use 'Whitespace':
554    >> StringMatchQ[" \n", WhitespaceCharacter]
555     = False
556    >> StringMatchQ[" \n", Whitespace]
557     = True
558    """
559
560
561class WordCharacter(Builtin):
562    r"""
563    <dl>
564    <dt>'WordCharacter'
565      <dd>represents a single letter or digit character.
566    </dl>
567
568    >> StringMatchQ[#, WordCharacter] &/@ {"1", "a", "A", ",", " "}
569     = {True, True, True, False, False}
570
571    Test whether a string is alphanumeric:
572    >> StringMatchQ["abc123DEF", WordCharacter..]
573     = True
574    >> StringMatchQ["$b;123", WordCharacter..]
575     = False
576    """
577
578
579class StartOfString(Builtin):
580    r"""
581    <dl>
582    <dt>'StartOfString'
583      <dd>represents the start of a string.
584    </dl>
585
586    Test whether strings start with "a":
587    >> StringMatchQ[#, StartOfString ~~ "a" ~~ __] &/@ {"apple", "banana", "artichoke"}
588     = {True, False, True}
589
590    >> StringReplace["aba\nabb", StartOfString ~~ "a" -> "c"]
591     = cba
592     . abb
593    """
594
595
596class EndOfString(Builtin):
597    r"""
598    <dl>
599    <dt>'EndOfString'
600      <dd>represents the end of a string.
601    </dl>
602
603    Test whether strings end with "e":
604    >> StringMatchQ[#, __ ~~ "e" ~~ EndOfString] &/@ {"apple", "banana", "artichoke"}
605     = {True, False, True}
606
607    >> StringReplace["aab\nabb", "b" ~~ EndOfString -> "c"]
608     = aab
609     . abc
610    """
611
612
613class StartOfLine(Builtin):
614    r"""
615    <dl>
616    <dt>'StartOfString'
617      <dd>represents the start of a line in a string.
618    </dl>
619
620    >> StringReplace["aba\nbba\na\nab", StartOfLine ~~ "a" -> "c"]
621     = cba
622     . bba
623     . c
624     . cb
625
626    >> StringSplit["abc\ndef\nhij", StartOfLine]
627     = {abc
628     . , def
629     . , hij}
630    """
631
632
633class EndOfLine(Builtin):
634    r"""
635    <dl>
636    <dt>'EndOfString'
637      <dd>represents the end of a line in a string.
638    </dl>
639
640    >> StringReplace["aba\nbba\na\nab", "a" ~~ EndOfLine -> "c"]
641     = abc
642     . bbc
643     . c
644     . ab
645
646    >> StringSplit["abc\ndef\nhij", EndOfLine]
647     = {abc,
648     . def,
649     . hij}
650    """
651
652
653class WordBoundary(Builtin):
654    """
655    <dl>
656    <dt>'WordBoundary'
657      <dd>represents the boundary between words.
658    </dl>
659
660    >> StringReplace["apple banana orange artichoke", "e" ~~ WordBoundary -> "E"]
661     = applE banana orangE artichokE
662    """
663
664
665class LetterCharacter(Builtin):
666    """
667    <dl>
668    <dt>'LetterCharacter'
669      <dd>represents letters.
670    </dl>
671
672    >> StringMatchQ[#, LetterCharacter] & /@ {"a", "1", "A", " ", "."}
673     = {True, False, True, False, False}
674
675    LetterCharacter also matches unicode characters.
676    >> StringMatchQ["\\[Lambda]", LetterCharacter]
677     = True
678    """
679
680
681# FIXME: Generalize string.lower() and ord()
682def letter_number(chars: List[str], start_ord) -> List["Integer"]:
683    # Note caller has verified that everything isalpha() and
684    # each char has length 1.
685    return [Integer(ord(char.lower()) - start_ord) for char in chars]
686
687
688class Alphabet(Builtin):
689    """
690     <dl>
691      <dt>'Alphabet'[]
692      <dd>gives the list of lowercase letters a-z in the English alphabet .
693
694      <dt>'Alphabet[$type$]'
695      <dd> gives the alphabet for the language or class $type$.
696    </dl>
697
698    >> Alphabet[]
699     = {a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z}
700    >> Alphabet["German"]
701     = {a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z}
702
703    """
704    messages = {
705        "nalph": "The alphabet `` is not known or not available.",
706    }
707
708    rules = {
709        "Alphabet[]": """Alphabet["English"]""",
710    }
711
712    def apply(self, alpha, evaluation):
713        """Alphabet[alpha_String]"""
714        alphakey = alpha.get_string_value()
715        alphakey = alphabet_alias[alphakey]
716        if alphakey is None:
717            evaluation.message("Alphabet", "nalph", alpha)
718            return
719        alphabet = alphabet_descriptions.get(alphakey, None)
720        if alphabet is None:
721            evaluation.message("Alphabet", "nalph", alpha)
722            return
723        return Expression(SymbolList, *[String(c) for c in alphabet["Lowercase"]])
724
725
726class LetterNumber(Builtin):
727    r"""
728    <dl>
729      <dt>'LetterNumber'[$c$]
730      <dd>returns the position of the character $c$ in the English alphabet.
731
732      <dt>'LetterNumber["string"]'
733      <dd>returns a list of the positions of characters in string.
734      <dt>'LetterNumber["string", $alpha$]'
735      <dd>returns a list of the positions of characters in string, regarding the alphabet $alpha$.
736    </dl>
737
738    >> LetterNumber["b"]
739     = 2
740
741    LetterNumber also works with uppercase characters
742    >> LetterNumber["B"]
743     = 2
744
745    >> LetterNumber["ss2!"]
746     = {19, 19, 0, 0}
747
748    Get positions of each of the letters in a string:
749    >> LetterNumber[Characters["Peccary"]]
750    = {16, 5, 3, 3, 1, 18, 25}
751
752    >> LetterNumber[{"P", "Pe", "P1", "eck"}]
753    = {16, {16, 5}, {16, 0}, {5, 3, 11}}
754
755    #> LetterNumber[4]
756     : The argument 4 is not a string.
757     = LetterNumber[4]
758
759    >> LetterNumber["\[Beta]", "Greek"]
760     = 2
761
762    """
763    # FIXME: put the right unicode characters in a way that the
764    # following test works...
765    r"""
766    # #> LetterNumber["\[CapitalBeta]", "Greek"]
767    #  = 2
768
769    """
770    messages = {
771        "nalph": "The alphabet `` is not known or not available.",
772        "nas": ("The argument `1` is not a string."),
773    }
774
775    def apply_alpha_str(self, chars: List[Any], alpha: String, evaluation):
776        "LetterNumber[chars_, alpha_String]"
777        alphakey = alpha.get_string_value()
778        alphakey = alphabet_alias.get(alphakey, None)
779        if alphakey is None:
780            evaluation.message("LetterNumber", "nalph", alpha)
781            return
782        if alphakey == "English":
783            return self.apply(chars, evaluation)
784        alphabet = alphabet_descriptions.get(alphakey, None)
785        if alphabet is None:
786            evaluation.message("LetterNumber", "nalph", alpha)
787            return
788        # TODO: handle Uppercase
789        if isinstance(chars, String):
790            py_chars = chars.get_string_value()
791            if len(py_chars) == 1:
792                # FIXME generalize ord("a")
793                res = alphabet["Lowercase"].find(py_chars) + 1
794                if res == -1:
795                    res = alphabet["Uppercase"].find(py_chars) + 1
796                return Integer(res)
797            else:
798                r = []
799                for c in py_chars:
800                    cp = alphabet["Lowercase"].find(c) + 1
801                    if cp == -1:
802                        cp = alphabet["Uppercase"].find(c) + 1
803                    r.append(cp)
804                return Expression(SymbolList, *r)
805        elif chars.has_form("List", 1, None):
806            result = []
807            for leaf in chars.leaves:
808                result.append(self.apply_alpha_str(leaf, alpha, evaluation))
809            return Expression(SymbolList, *result)
810        else:
811            return evaluation.message(self.__class__.__name__, "nas", chars)
812        return None
813
814    def apply(self, chars: List[Any], evaluation):
815        "LetterNumber[chars_]"
816
817        start_ord = ord("a") - 1
818        if isinstance(chars, String):
819            py_chars = chars.get_string_value()
820            if len(py_chars) == 1:
821                # FIXME generalize ord("a")
822                return letter_number([py_chars[0]], start_ord)[0]
823            else:
824                r = [
825                    letter_number(c, start_ord)[0] if c.isalpha() else 0
826                    for c in py_chars
827                ]
828                return Expression(SymbolList, *r)
829        elif chars.has_form("List", 1, None):
830            result = []
831            for leaf in chars.leaves:
832                result.append(self.apply(leaf, evaluation))
833            return Expression(SymbolList, *result)
834        else:
835            return evaluation.message(self.__class__.__name__, "nas", chars)
836        return None
837
838
839class HexidecimalCharacter(Builtin):
840    """
841    <dl>
842    <dt>'HexidecimalCharacter'
843      <dd>represents the characters 0-9, a-f and A-F.
844    </dl>
845
846    >> StringMatchQ[#, HexidecimalCharacter] & /@ {"a", "1", "A", "x", "H", " ", "."}
847     = {True, True, True, False, False, False, False}
848    """
849
850
851class DigitQ(Builtin):
852    """
853    <dl>
854    <dt>'DigitQ[$string$]'
855        yields 'True' if all the characters in the $string$ are digits, and yields 'False' otherwise.
856    </dl>
857
858    >> DigitQ["9"]
859     = True
860
861    >> DigitQ["a"]
862     = False
863
864    >> DigitQ["01001101011000010111010001101000011010010110001101110011"]
865     = True
866
867    >> DigitQ["-123456789"]
868     = False
869
870    #> DigitQ[""]
871     = True
872
873    #> DigitQ["."]
874     = False
875
876    #> DigitQ[1==2]
877     = False
878
879    #> DigitQ[a=1]
880     = False
881    """
882
883    rules = {
884        "DigitQ[string_]": (
885            "If[StringQ[string], StringMatchQ[string, DigitCharacter...], False, False]"
886        ),
887    }
888
889
890class LetterQ(Builtin):
891    """
892    <dl>
893    <dt>'LetterQ[$string$]'
894        yields 'True' if all the characters in the $string$ are letters, and yields 'False' otherwise.
895    </dl>
896
897    >> LetterQ["m"]
898     = True
899
900    >> LetterQ["9"]
901     = False
902
903    >> LetterQ["Mathics"]
904     = True
905
906    >> LetterQ["Welcome to Mathics"]
907     = False
908
909    #> LetterQ[""]
910     = True
911
912    #> LetterQ["\\[Alpha]\\[Beta]\\[Gamma]\\[Delta]\\[Epsilon]\\[Zeta]\\[Eta]\\[Theta]"]
913     = True
914    """
915
916    rules = {
917        "LetterQ[string_]": (
918            "If[StringQ[string], StringMatchQ[string, LetterCharacter...], False, False]"
919        ),
920    }
921
922
923class StringMatchQ(Builtin):
924    r"""
925    >> StringMatchQ["abc", "abc"]
926     = True
927
928    >> StringMatchQ["abc", "abd"]
929     = False
930
931    >> StringMatchQ["15a94xcZ6", (DigitCharacter | LetterCharacter)..]
932     = True
933
934    #> StringMatchQ["abc1", LetterCharacter]
935     = False
936
937    #> StringMatchQ["abc", "ABC"]
938     = False
939    #> StringMatchQ["abc", "ABC", IgnoreCase -> True]
940     = True
941
942    ## Words containing nonword characters
943    #> StringMatchQ[{"monkey", "don't", "AAA", "S&P"}, ___ ~~ Except[WordCharacter] ~~ ___]
944     = {False, True, False, True}
945
946    ## Try to match a literal number
947    #> StringMatchQ[1.5, NumberString]
948     : String or list of strings expected at position 1 in StringMatchQ[1.5, NumberString].
949     = StringMatchQ[1.5, NumberString]
950
951    Use StringMatchQ as an operator
952    >> StringMatchQ[LetterCharacter]["a"]
953     = True
954
955    ## Abbreviated string patterns Issue #517
956    #> StringMatchQ["abcd", "abc*"]
957     = True
958    #> StringMatchQ["abc", "abc*"]
959     = True
960    #> StringMatchQ["abc\\", "abc\\"]
961     = True
962    #> StringMatchQ["abc*d", "abc\\*d"]
963     = True
964    #> StringMatchQ["abc*d", "abc\\**"]
965     = True
966    #> StringMatchQ["abcde", "a*f"]
967     = False
968
969    #> StringMatchQ["abcde", "a@e"]
970     = True
971    #> StringMatchQ["aBCDe", "a@e"]
972     = False
973    #> StringMatchQ["ae", "a@e"]
974     = False
975    """
976
977    attributes = ("Listable",)
978
979    options = {
980        "IgnoreCase": "False",
981        "SpellingCorrections": "None",
982    }
983
984    messages = {
985        "strse": "String or list of strings expected at position `1` in `2`.",
986    }
987
988    rules = {
989        "StringMatchQ[patt_][expr_]": "StringMatchQ[expr, patt]",
990    }
991
992    def apply(self, string, patt, evaluation, options):
993        "StringMatchQ[string_, patt_, OptionsPattern[%(name)s]]"
994        py_string = string.get_string_value()
995        if py_string is None:
996            return evaluation.message(
997                "StringMatchQ",
998                "strse",
999                Integer1,
1000                Expression("StringMatchQ", string, patt),
1001            )
1002
1003        re_patt = to_regex(patt, evaluation, abbreviated_patterns=True)
1004        if re_patt is None:
1005            return evaluation.message(
1006                "StringExpression", "invld", patt, Expression("StringExpression", patt)
1007            )
1008
1009        re_patt = anchor_pattern(re_patt)
1010
1011        flags = re.MULTILINE
1012        if options["System`IgnoreCase"] == SymbolTrue:
1013            flags = flags | re.IGNORECASE
1014
1015        if re.match(re_patt, py_string, flags=flags) is None:
1016            return SymbolFalse
1017        else:
1018            return SymbolTrue
1019
1020
1021class StringJoin(BinaryOperator):
1022    """
1023    <dl>
1024    <dt>'StringJoin["$s1$", "$s2$", ...]'
1025        <dd>returns the concatenation of the strings $s1$, $s2$, ….
1026    </dl>
1027
1028    >> StringJoin["a", "b", "c"]
1029     = abc
1030    >> "a" <> "b" <> "c" // InputForm
1031     = "abc"
1032
1033    'StringJoin' flattens lists out:
1034    >> StringJoin[{"a", "b"}] // InputForm
1035     = "ab"
1036    >> Print[StringJoin[{"Hello", " ", {"world"}}, "!"]]
1037     | Hello world!
1038    """
1039
1040    operator = "<>"
1041    precedence = 600
1042    attributes = ("Flat", "OneIdentity")
1043
1044    def apply(self, items, evaluation):
1045        "StringJoin[items___]"
1046
1047        result = ""
1048        items = items.flatten(SymbolList)
1049        if items.get_head_name() == "System`List":
1050            items = items.leaves
1051        else:
1052            items = items.get_sequence()
1053        for item in items:
1054            if not isinstance(item, String):
1055                evaluation.message("StringJoin", "string")
1056                return
1057            result += item.value
1058        return String(result)
1059
1060
1061class StringSplit(Builtin):
1062    """
1063    <dl>
1064    <dt>'StringSplit["$s$"]'
1065        <dd>splits the string $s$ at whitespace, discarding the
1066        whitespace and returning a list of strings.
1067    <dt>'StringSplit["$s$", "$d$"]'
1068        <dd>splits $s$ at the delimiter $d$.
1069    <dt>'StringSplit[$s$, {"$d1$", "$d2$", ...}]'
1070        <dd>splits $s$ using multiple delimiters.
1071    <dt>'StringSplit[{$s_1$, $s_2, ...}, {"$d1$", "$d2$", ...}]'
1072        <dd>returns a list with the result of applying the function to
1073            each element.
1074    </dl>
1075
1076    >> StringSplit["abc,123", ","]
1077     = {abc, 123}
1078
1079    >> StringSplit["abc 123"]
1080     = {abc, 123}
1081
1082    #> StringSplit["  abc    123  "]
1083     = {abc, 123}
1084
1085    >> StringSplit["abc,123.456", {",", "."}]
1086     = {abc, 123, 456}
1087
1088    >> StringSplit["a  b    c", RegularExpression[" +"]]
1089     = {a, b, c}
1090
1091    >> StringSplit[{"a  b", "c  d"}, RegularExpression[" +"]]
1092     = {{a, b}, {c, d}}
1093
1094    #> StringSplit["x", "x"]
1095     = {}
1096
1097    #> StringSplit[x]
1098     : String or list of strings expected at position 1 in StringSplit[x].
1099     = StringSplit[x, Whitespace]
1100
1101    #> StringSplit["x", x]
1102     : Element x is not a valid string or pattern element in x.
1103     = StringSplit[x, x]
1104
1105    #> StringSplit["12312123", "12"..]
1106     = {3, 3}
1107
1108    #> StringSplit["abaBa", "b"]
1109     = {a, aBa}
1110    #> StringSplit["abaBa", "b", IgnoreCase -> True]
1111     = {a, a, a}
1112    """
1113
1114    rules = {
1115        "StringSplit[s_]": "StringSplit[s, Whitespace]",
1116    }
1117
1118    options = {
1119        "IgnoreCase": "False",
1120        "MetaCharacters": "None",
1121    }
1122
1123    messages = {
1124        "strse": "String or list of strings expected at position `1` in `2`.",
1125        "pysplit": "As of Python 3.5 re.split does not handle empty pattern matches.",
1126    }
1127
1128    def apply(self, string, patt, evaluation, options):
1129        "StringSplit[string_, patt_, OptionsPattern[%(name)s]]"
1130
1131        if string.get_head_name() == "System`List":
1132            leaves = [self.apply(s, patt, evaluation, options) for s in string._leaves]
1133            return Expression(SymbolList, *leaves)
1134
1135        py_string = string.get_string_value()
1136
1137        if py_string is None:
1138            return evaluation.message(
1139                "StringSplit", "strse", Integer1, Expression("StringSplit", string)
1140            )
1141
1142        if patt.has_form("List", None):
1143            patts = patt.get_leaves()
1144        else:
1145            patts = [patt]
1146        re_patts = []
1147        for p in patts:
1148            py_p = to_regex(p, evaluation)
1149            if py_p is None:
1150                return evaluation.message("StringExpression", "invld", p, patt)
1151            re_patts.append(py_p)
1152
1153        flags = re.MULTILINE
1154        if options["System`IgnoreCase"] == SymbolTrue:
1155            flags = flags | re.IGNORECASE
1156
1157        result = [py_string]
1158        for re_patt in re_patts:
1159            result = [t for s in result for t in mathics_split(re_patt, s, flags=flags)]
1160
1161        return string_list(SymbolList, [String(x) for x in result if x != ""], evaluation)
1162
1163
1164class StringPosition(Builtin):
1165    """
1166    <dl>
1167    <dt>'StringPosition["$string$", $patt$]'
1168      <dd>gives a list of starting and ending positions where $patt$ matches "$string$".
1169    <dt>'StringPosition["$string$", $patt$, $n$]'
1170      <dd>returns the first $n$ matches only.
1171    <dt>'StringPosition["$string$", {$patt1$, $patt2$, ...}, $n$]'
1172      <dd>matches multiple patterns.
1173    <dt>'StringPosition[{$s1$, $s2$, ...}, $patt$]'
1174      <dd>returns a list of matches for multiple strings.
1175    </dl>
1176
1177    >> StringPosition["123ABCxyABCzzzABCABC", "ABC"]
1178     = {{4, 6}, {9, 11}, {15, 17}, {18, 20}}
1179
1180    >> StringPosition["123ABCxyABCzzzABCABC", "ABC", 2]
1181     = {{4, 6}, {9, 11}}
1182
1183    'StringPosition' can be useful for searching through text.
1184    >> data = Import["ExampleData/EinsteinSzilLetter.txt"];
1185    >> StringPosition[data, "uranium"]
1186     = {{299, 305}, {870, 876}, {1538, 1544}, {1671, 1677}, {2300, 2306}, {2784, 2790}, {3093, 3099}}
1187
1188    #> StringPosition["123ABCxyABCzzzABCABC", "ABC", -1]
1189     : Non-negative integer or Infinity expected at position 3 in StringPosition[123ABCxyABCzzzABCABC, ABC, -1].
1190     = StringPosition[123ABCxyABCzzzABCABC, ABC, -1]
1191
1192    ## Overlaps
1193    #> StringPosition["1231221312112332", RegularExpression["[12]+"]]
1194     = {{1, 2}, {2, 2}, {4, 7}, {5, 7}, {6, 7}, {7, 7}, {9, 13}, {10, 13}, {11, 13}, {12, 13}, {13, 13}, {16, 16}}
1195    #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> False]
1196     = {{1, 2}, {4, 7}, {9, 13}, {16, 16}}
1197    #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> x]
1198     = {{1, 2}, {4, 7}, {9, 13}, {16, 16}}
1199    #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> All]
1200     : Overlaps -> All option is not currently implemented in Mathics.
1201     = {{1, 2}, {2, 2}, {4, 7}, {5, 7}, {6, 7}, {7, 7}, {9, 13}, {10, 13}, {11, 13}, {12, 13}, {13, 13}, {16, 16}}
1202
1203    #> StringPosition["21211121122", {"121", "11"}]
1204     = {{2, 4}, {4, 5}, {5, 6}, {6, 8}, {8, 9}}
1205    #> StringPosition["21211121122", {"121", "11"}, Overlaps -> False]
1206     = {{2, 4}, {5, 6}, {8, 9}}
1207
1208    #> StringPosition[{"abc", "abcda"}, "a"]
1209     = {{{1, 1}}, {{1, 1}, {5, 5}}}
1210
1211    #> StringPosition[{"abc"}, "a", Infinity]
1212     = {{{1, 1}}}
1213
1214    #> StringPosition["abc"]["123AabcDEabc"]
1215     = {{5, 7}, {10, 12}}
1216    """
1217
1218    options = {
1219        "IgnoreCase": "False",
1220        "MetaCharacters": "None",
1221        "Overlaps": "True",
1222    }
1223
1224    messages = {
1225        "strse": "String or list of strings expected at position `1` in `2`.",
1226        "overall": "Overlaps -> All option is not currently implemented in Mathics.",
1227        "innf": "Non-negative integer or Infinity expected at position `2` in `1`.",
1228    }
1229
1230    rules = {
1231        "StringPosition[patt_][s_]": "StringPosition[s, patt]",
1232    }
1233
1234    def apply(self, string, patt, evaluation, options):
1235        "StringPosition[string_, patt_, OptionsPattern[StringPosition]]"
1236
1237        return self.apply_n(
1238            string,
1239            patt,
1240            Expression("DirectedInfinity", Integer1),
1241            evaluation,
1242            options,
1243        )
1244
1245    def apply_n(self, string, patt, n, evaluation, options):
1246        "StringPosition[string_, patt_, n:(_Integer|DirectedInfinity[1]), OptionsPattern[StringPosition]]"
1247
1248        expr = Expression("StringPosition", string, patt, n)
1249
1250        # check n
1251        if n.has_form("DirectedInfinity", 1):
1252            py_n = float("inf")
1253        else:
1254            py_n = n.get_int_value()
1255            if py_n is None or py_n < 0:
1256                return evaluation.message("StringPosition", "innf", expr, Integer(3))
1257
1258        # check options
1259        if options["System`Overlaps"] == SymbolTrue:
1260            overlap = True
1261        elif options["System`Overlaps"] == SymbolFalse:
1262            overlap = False
1263        elif options["System`Overlaps"] == Symbol("All"):
1264            # TODO
1265            evaluation.message("StringPosition", "overall")
1266            overlap = True
1267        else:
1268            overlap = False  # unknown options are teated as False
1269
1270        # convert patterns
1271        if patt.has_form("List", None):
1272            patts = patt.get_leaves()
1273        else:
1274            patts = [patt]
1275        re_patts = []
1276        for p in patts:
1277            py_p = to_regex(p, evaluation)
1278            if py_p is None:
1279                return evaluation.message("StringExpression", "invld", p, patt)
1280            re_patts.append(py_p)
1281        compiled_patts = [re.compile(re_patt) for re_patt in re_patts]
1282
1283        # string or list of strings
1284        if string.has_form("List", None):
1285            py_strings = [s.get_string_value() for s in string.leaves]
1286            if None in py_strings:
1287                return
1288            results = [
1289                self.do_apply(py_string, compiled_patts, py_n, overlap)
1290                for py_string in py_strings
1291            ]
1292            return Expression(SymbolList, *results)
1293        else:
1294            py_string = string.get_string_value()
1295            if py_string is None:
1296                return
1297            return self.do_apply(py_string, compiled_patts, py_n, overlap)
1298
1299    @staticmethod
1300    def do_apply(py_string, compiled_patts, py_n, overlap):
1301        result = []
1302        start = 0
1303        while start < len(py_string):
1304            found_match = False
1305            for compiled_patt in compiled_patts:
1306                m = compiled_patt.match(py_string, start)
1307                if m is None:
1308                    continue
1309                found_match = True
1310                result.append([m.start() + 1, m.end()])  # 0 to 1 based indexing
1311                if len(result) == py_n:
1312                    return from_python(result)
1313                if not overlap:
1314                    start = m.end()
1315            if overlap or not found_match:
1316                start += 1
1317        return from_python(result)
1318
1319
1320class StringLength(Builtin):
1321    """
1322    <dl>
1323    <dt>'StringLength["$string$"]'
1324        <dd>gives the length of $string$.
1325    </dl>
1326
1327    >> StringLength["abc"]
1328     = 3
1329    'StringLength' is listable:
1330    >> StringLength[{"a", "bc"}]
1331     = {1, 2}
1332
1333    >> StringLength[x]
1334     : String expected.
1335     = StringLength[x]
1336    """
1337
1338    attributes = ("Listable",)
1339
1340    def apply(self, str, evaluation):
1341        "StringLength[str_]"
1342
1343        if not isinstance(str, String):
1344            evaluation.message("StringLength", "string")
1345            return
1346        return Integer(len(str.value))
1347
1348
1349class _StringFind(Builtin):
1350    attributes = "Protected"
1351
1352    options = {
1353        "IgnoreCase": "False",
1354        "MetaCharacters": "None",
1355    }
1356
1357    messages = {
1358        "strse": "String or list of strings expected at position `1` in `2`.",
1359        "srep": "`1` is not a valid string replacement rule.",
1360        "innf": (
1361            "Non-negative integer or Infinity expected at " "position `1` in `2`."
1362        ),
1363    }
1364
1365    def _find(py_stri, py_rules, py_n, flags):
1366        raise NotImplementedError()
1367
1368    def _apply(self, string, rule, n, evaluation, options, cases):
1369        if n.sameQ(Symbol("System`Private`Null")):
1370            expr = Expression(self.get_name(), string, rule)
1371            n = None
1372        else:
1373            expr = Expression(self.get_name(), string, rule, n)
1374
1375        # convert string
1376        if string.has_form("List", None):
1377            py_strings = [stri.get_string_value() for stri in string.leaves]
1378            if None in py_strings:
1379                return evaluation.message(self.get_name(), "strse", Integer1, expr)
1380        else:
1381            py_strings = string.get_string_value()
1382            if py_strings is None:
1383                return evaluation.message(self.get_name(), "strse", Integer1, expr)
1384
1385        # convert rule
1386        def convert_rule(r):
1387            if r.has_form("Rule", None) and len(r.leaves) == 2:
1388                py_s = to_regex(r.leaves[0], evaluation)
1389                if py_s is None:
1390                    return evaluation.message(
1391                        "StringExpression", "invld", r.leaves[0], r.leaves[0]
1392                    )
1393                py_sp = r.leaves[1]
1394                return py_s, py_sp
1395            elif cases:
1396                py_s = to_regex(r, evaluation)
1397                if py_s is None:
1398                    return evaluation.message("StringExpression", "invld", r, r)
1399                return py_s, None
1400
1401            return evaluation.message(self.get_name(), "srep", r)
1402
1403        if rule.has_form("List", None):
1404            py_rules = [convert_rule(r) for r in rule.leaves]
1405        else:
1406            py_rules = [convert_rule(rule)]
1407        if None in py_rules:
1408            return None
1409
1410        # convert n
1411        if n is None:
1412            py_n = 0
1413        elif n == Expression("DirectedInfinity", Integer1):
1414            py_n = 0
1415        else:
1416            py_n = n.get_int_value()
1417            if py_n is None or py_n < 0:
1418                return evaluation.message(self.get_name(), "innf", Integer(3), expr)
1419
1420        # flags
1421        flags = re.MULTILINE
1422        if options["System`IgnoreCase"] == SymbolTrue:
1423            flags = flags | re.IGNORECASE
1424
1425        if isinstance(py_strings, list):
1426            return Expression(
1427                "List",
1428                *[
1429                    self._find(py_stri, py_rules, py_n, flags, evaluation)
1430                    for py_stri in py_strings
1431                ]
1432            )
1433        else:
1434            return self._find(py_strings, py_rules, py_n, flags, evaluation)
1435
1436
1437class StringReplace(_StringFind):
1438    """
1439    <dl>
1440    <dt>'StringReplace["$string$", "$a$"->"$b$"]'
1441        <dd>replaces each occurrence of $old$ with $new$ in $string$.
1442    <dt>'StringReplace["$string$", {"$s1$"->"$sp1$", "$s2$"->"$sp2$"}]'
1443        <dd>performs multiple replacements of each $si$ by the
1444        corresponding $spi$ in $string$.
1445    <dt>'StringReplace["$string$", $srules$, $n$]'
1446        <dd>only performs the first $n$ replacements.
1447    <dt>'StringReplace[{"$string1$", "$string2$", ...}, $srules$]'
1448        <dd>performs the replacements specified by $srules$ on a list
1449        of strings.
1450    </dl>
1451
1452    StringReplace replaces all occurrences of one substring with another:
1453    >> StringReplace["xyxyxyyyxxxyyxy", "xy" -> "A"]
1454     = AAAyyxxAyA
1455
1456    Multiple replacements can be supplied:
1457    >> StringReplace["xyzwxyzwxxyzxyzw", {"xyz" -> "A", "w" -> "BCD"}]
1458     = ABCDABCDxAABCD
1459
1460    Only replace the first 2 occurences:
1461    >> StringReplace["xyxyxyyyxxxyyxy", "xy" -> "A", 2]
1462     = AAxyyyxxxyyxy
1463
1464    Also works for multiple rules:
1465    >> StringReplace["abba", {"a" -> "A", "b" -> "B"}, 2]
1466     = ABba
1467
1468    StringReplace acts on lists of strings too:
1469    >> StringReplace[{"xyxyxxy", "yxyxyxxxyyxy"}, "xy" -> "A"]
1470     = {AAxA, yAAxxAyA}
1471
1472    #> StringReplace["abcabc", "a" -> "b", Infinity]
1473     = bbcbbc
1474    #> StringReplace[x, "a" -> "b"]
1475     : String or list of strings expected at position 1 in StringReplace[x, a -> b].
1476     = StringReplace[x, a -> b]
1477    #> StringReplace["xyzwxyzwaxyzxyzw", x]
1478     : x is not a valid string replacement rule.
1479     = StringReplace[xyzwxyzwaxyzxyzw, x]
1480    #> StringReplace["xyzwxyzwaxyzxyzw", x -> y]
1481     : Element x is not a valid string or pattern element in x.
1482     = StringReplace[xyzwxyzwaxyzxyzw, x -> y]
1483    #> StringReplace["abcabc", "a" -> "b", -1]
1484     : Non-negative integer or Infinity expected at position 3 in StringReplace[abcabc, a -> b, -1].
1485     = StringReplace[abcabc, a -> b, -1]
1486    #> StringReplace["abc", "b" -> 4]
1487     : String expected.
1488     = a <> 4 <> c
1489
1490    #> StringReplace["01101100010", "01" .. -> "x"]
1491     = x1x100x0
1492
1493    #> StringReplace["abc abcb abdc", "ab" ~~ _ -> "X"]
1494     = X Xb Xc
1495
1496    #> StringReplace["abc abcd abcd",  WordBoundary ~~ "abc" ~~ WordBoundary -> "XX"]
1497     = XX abcd abcd
1498
1499    #> StringReplace["abcd acbd", RegularExpression["[ab]"] -> "XX"]
1500     = XXXXcd XXcXXd
1501
1502    #> StringReplace["abcd acbd", RegularExpression["[ab]"] ~~ _ -> "YY"]
1503     = YYcd YYYY
1504
1505    #> StringReplace["abcdabcdaabcabcd", {"abc" -> "Y", "d" -> "XXX"}]
1506     = YXXXYXXXaYYXXX
1507
1508
1509    #> StringReplace["  Have a nice day.  ", (StartOfString ~~ Whitespace) | (Whitespace ~~ EndOfString) -> ""] // FullForm
1510     = "Have a nice day."
1511
1512    #> StringReplace["xyXY", "xy" -> "01"]
1513     = 01XY
1514    #> StringReplace["xyXY", "xy" -> "01", IgnoreCase -> True]
1515     = 0101
1516
1517    StringReplace also can be used as an operator:
1518    >> StringReplace["y" -> "ies"]["city"]
1519     = cities
1520    """
1521
1522    # TODO Special Characters
1523    """
1524    #> StringReplace["product: A \\[CirclePlus] B" , "\\[CirclePlus]" -> "x"]
1525     = A x B
1526    """
1527
1528    rules = {
1529        "StringReplace[rule_][string_]": "StringReplace[string, rule]",
1530    }
1531
1532    def _find(self, py_stri, py_rules, py_n, flags, evaluation):
1533        def cases():
1534            k = 0
1535            for match, form in _parallel_match(py_stri, py_rules, flags, py_n):
1536                start, end = match.span()
1537                if start > k:
1538                    yield String(py_stri[k:start])
1539                yield _evaluate_match(form, match, evaluation)
1540                k = end
1541            if k < len(py_stri):
1542                yield String(py_stri[k:])
1543
1544        return Expression("StringJoin", *list(cases()))
1545
1546    def apply(self, string, rule, n, evaluation, options):
1547        "%(name)s[string_, rule_, OptionsPattern[%(name)s], n_:System`Private`Null]"
1548        # this pattern is a slight hack to get around missing Shortest/Longest.
1549        return self._apply(string, rule, n, evaluation, options, False)
1550
1551
1552class StringReverse(Builtin):
1553    """
1554    <dl>
1555      <dt>'StringReverse["$string$"]'
1556      <dd>reverses the order of the characters in "string".
1557      </dl>
1558
1559      >> StringReverse["live"]
1560       = evil
1561    """
1562
1563    attributes = ("Listable", "Protected")
1564
1565    def apply(self, string, evaluation):
1566        "StringReverse[string_String]"
1567        return String(string.get_string_value()[::-1])
1568
1569
1570class StringCases(_StringFind):
1571    """
1572    <dl>
1573    <dt>'StringCases["$string$", $pattern$]'
1574        <dd>gives all occurences of $pattern$ in $string$.
1575    <dt>'StringReplace["$string$", $pattern$ -> $form$]'
1576        <dd>gives all instances of $form$ that stem from occurences of $pattern$ in $string$.
1577    <dt>'StringCases["$string$", {$pattern1$, $pattern2$, ...}]'
1578        <dd>gives all occurences of $pattern1$, $pattern2$, ....
1579    <dt>'StringReplace["$string$", $pattern$, $n$]'
1580        <dd>gives only the first $n$ occurences.
1581    <dt>'StringReplace[{"$string1$", "$string2$", ...}, $pattern$]'
1582        <dd>gives occurences in $string1$, $string2$, ...
1583    </dl>
1584
1585    >> StringCases["axbaxxb", "a" ~~ x_ ~~ "b"]
1586     = {axb}
1587
1588    >> StringCases["axbaxxb", "a" ~~ x__ ~~ "b"]
1589     = {axbaxxb}
1590
1591    >> StringCases["axbaxxb", Shortest["a" ~~ x__ ~~ "b"]]
1592     = {axb, axxb}
1593
1594    >> StringCases["-abc- def -uvw- xyz", Shortest["-" ~~ x__ ~~ "-"] -> x]
1595     = {abc, uvw}
1596
1597    >> StringCases["-öhi- -abc- -.-", "-" ~~ x : WordCharacter .. ~~ "-" -> x]
1598     = {öhi, abc}
1599
1600    >> StringCases["abc-abc xyz-uvw", Shortest[x : WordCharacter .. ~~ "-" ~~ x_] -> x]
1601     = {abc}
1602
1603    #> StringCases["abc-abc xyz-uvw", Shortest[x : WordCharacter .. ~~ "-" ~~ x : LetterCharacter] -> x]
1604     : Ignored restriction given for x in x : LetterCharacter as it does not match previous occurences of x.
1605     = {abc}
1606
1607    >> StringCases["abba", {"a" -> 10, "b" -> 20}, 2]
1608     = {10, 20}
1609
1610    >> StringCases["a#ä_123", WordCharacter]
1611     = {a, ä, 1, 2, 3}
1612
1613    >> StringCases["a#ä_123", LetterCharacter]
1614     = {a, ä}
1615    """
1616
1617    rules = {
1618        "StringCases[rule_][string_]": "StringCases[string, rule]",
1619    }
1620
1621    def _find(self, py_stri, py_rules, py_n, flags, evaluation):
1622        def cases():
1623            for match, form in _parallel_match(py_stri, py_rules, flags, py_n):
1624                if form is None:
1625                    yield String(match.group(0))
1626                else:
1627                    yield _evaluate_match(form, match, evaluation)
1628
1629        return Expression(SymbolList, *list(cases()))
1630
1631    def apply(self, string, rule, n, evaluation, options):
1632        "%(name)s[string_, rule_, OptionsPattern[%(name)s], n_:System`Private`Null]"
1633        # this pattern is a slight hack to get around missing Shortest/Longest.
1634        return self._apply(string, rule, n, evaluation, options, True)
1635
1636
1637class StringRepeat(Builtin):
1638    """
1639    <dl>
1640    <dt>'StringRepeat["$string$", $n$]'
1641        <dd>gives $string$ repeated $n$ times.
1642    <dt>'StringRepeat["$string$", $n$, $max$]'
1643        <dd>gives $string$ repeated $n$ times, but not more than $max$ characters.
1644    </dl>
1645
1646    >> StringRepeat["abc", 3]
1647     = abcabcabc
1648
1649    >> StringRepeat["abc", 10, 7]
1650     = abcabca
1651
1652    #> StringRepeat["x", 0]
1653     : A positive integer is expected at position 2 in StringRepeat[x, 0].
1654     = StringRepeat[x, 0]
1655    """
1656
1657    messages = {
1658        "intp": "A positive integer is expected at position `1` in `2`.",
1659    }
1660
1661    def apply(self, s, n, expression, evaluation):
1662        "StringRepeat[s_String, n_]"
1663        py_n = n.get_int_value() if isinstance(n, Integer) else 0
1664        if py_n < 1:
1665            evaluation.message("StringRepeat", "intp", 2, expression)
1666        else:
1667            return String(s.get_string_value() * py_n)
1668
1669    def apply_truncated(self, s, n, m, expression, evaluation):
1670        "StringRepeat[s_String, n_Integer, m_Integer]"
1671        py_n = n.get_int_value() if isinstance(n, Integer) else 0
1672        py_m = m.get_int_value() if isinstance(m, Integer) else 0
1673
1674        if py_n < 1:
1675            evaluation.message("StringRepeat", "intp", 2, expression)
1676        elif py_m < 1:
1677            evaluation.message("StringRepeat", "intp", 3, expression)
1678        else:
1679            py_s = s.get_string_value()
1680            py_n = min(1 + py_m // len(py_s), py_n)
1681
1682            return String((py_s * py_n)[:py_m])
1683
1684
1685class Characters(Builtin):
1686    """
1687    <dl>
1688    <dt>'Characters["$string$"]'
1689        <dd>returns a list of the characters in $string$.
1690    </dl>
1691
1692    >> Characters["abc"]
1693     = {a, b, c}
1694
1695    #> \\.78\\.79\\.7A
1696     = xyz
1697
1698    #> \\:0078\\:0079\\:007A
1699     = xyz
1700
1701    #> \\101\\102\\103\\061\\062\\063
1702     = ABC123
1703
1704    #> \\[Alpha]\\[Beta]\\[Gamma]
1705     = \u03B1\u03B2\u03B3
1706    """
1707
1708    attributes = ("Listable",)
1709
1710    def apply(self, string, evaluation):
1711        "Characters[string_String]"
1712
1713        return Expression(SymbolList, *(String(c) for c in string.value))
1714
1715
1716class CharacterRange(Builtin):
1717    """
1718    <dl>
1719    <dt>'CharacterRange["$a$", "$b$"]'
1720        <dd>returns a list of the Unicode characters from $a$ to $b$
1721        inclusive.
1722    </dl>
1723
1724    >> CharacterRange["a", "e"]
1725     = {a, b, c, d, e}
1726    >> CharacterRange["b", "a"]
1727     = {}
1728    """
1729
1730    attributes = ("ReadProtected",)
1731
1732    messages = {
1733        "argtype": "Arguments `1` and `2` are not both strings of length 1.",
1734    }
1735
1736    def apply(self, start, stop, evaluation):
1737        "CharacterRange[start_String, stop_String]"
1738
1739        if len(start.value) != 1 or len(stop.value) != 1:
1740            evaluation.message("CharacterRange", "argtype", start, stop)
1741            return
1742        start = ord(start.value[0])
1743        stop = ord(stop.value[0])
1744        return Expression(
1745            "List", *[String(chr(code)) for code in range(start, stop + 1)]
1746        )
1747
1748
1749class String_(Builtin):
1750    """
1751    <dl>
1752    <dt>'String'
1753        <dd>is the head of strings.
1754    </dl>
1755
1756    >> Head["abc"]
1757     = String
1758    >> "abc"
1759     = abc
1760
1761    Use 'InputForm' to display quotes around strings:
1762    >> InputForm["abc"]
1763     = "abc"
1764
1765    'FullForm' also displays quotes:
1766    >> FullForm["abc" + 2]
1767     = Plus[2, "abc"]
1768    """
1769
1770    name = "String"
1771
1772
1773class LowerCaseQ(Test):
1774    """
1775    <dl>
1776    <dt>'LowerCaseQ[$s$]'
1777        <dd>returns True if $s$ consists wholly of lower case characters.
1778    </dl>
1779
1780    >> LowerCaseQ["abc"]
1781     = True
1782
1783    An empty string returns True.
1784    >> LowerCaseQ[""]
1785     = True
1786    """
1787
1788    def test(self, s):
1789        return isinstance(s, String) and all(c.islower() for c in s.get_string_value())
1790
1791
1792class ToLowerCase(Builtin):
1793    """
1794    <dl>
1795    <dt>'ToLowerCase[$s$]'
1796        <dd>returns $s$ in all lower case.
1797    </dl>
1798
1799    >> ToLowerCase["New York"]
1800     = new york
1801    """
1802
1803    attributes = ("Listable", "Protected")
1804
1805    def apply(self, s, evaluation):
1806        "ToLowerCase[s_String]"
1807        return String(s.get_string_value().lower())
1808
1809
1810class UpperCaseQ(Test):
1811    """
1812    <dl>
1813    <dt>'UpperCaseQ[$s$]'
1814        <dd>returns True if $s$ consists wholly of upper case characters.
1815    </dl>
1816
1817    >> UpperCaseQ["ABC"]
1818     = True
1819
1820    An empty string returns True.
1821    >> UpperCaseQ[""]
1822     = True
1823    """
1824
1825    def test(self, s):
1826        return isinstance(s, String) and all(c.isupper() for c in s.get_string_value())
1827
1828
1829class ToUpperCase(Builtin):
1830    """
1831    <dl>
1832    <dt>'ToUpperCase[$s$]'
1833        <dd>returns $s$ in all upper case.
1834    </dl>
1835
1836    >> ToUpperCase["New York"]
1837     = NEW YORK
1838    """
1839
1840    attributes = ("Listable", "Protected")
1841
1842    def apply(self, s, evaluation):
1843        "ToUpperCase[s_String]"
1844        return String(s.get_string_value().upper())
1845
1846
1847class ToString(Builtin):
1848    """
1849    <dl>
1850    <dt>'ToString[$expr$]'
1851        <dd>returns a string representation of $expr$.
1852    <dt>'ToString[$expr$, $form$]'
1853        <dd>returns a string representation of $expr$ in the form
1854          $form$.
1855    </dl>
1856
1857    >> ToString[2]
1858     = 2
1859    >> ToString[2] // InputForm
1860     = "2"
1861    >> ToString[a+b]
1862     = a + b
1863    >> "U" <> 2
1864     : String expected.
1865     = U <> 2
1866    >> "U" <> ToString[2]
1867     = U2
1868    >> ToString[Integrate[f[x],x], TeXForm]
1869     = \\int f\\left[x\\right] \\, dx
1870
1871    """
1872
1873    options = {
1874        "CharacterEncoding": '"Unicode"',
1875        "FormatType": "OutputForm",
1876        "NumberMarks": "$NumberMarks",
1877        "PageHeight": "Infinity",
1878        "PageWidth": "Infinity",
1879        "TotalHeight": "Infinity",
1880        "TotalWidth": "Infinity",
1881    }
1882
1883    def apply_default(self, value, evaluation, options):
1884        "ToString[value_, OptionsPattern[ToString]]"
1885        return self.apply_form(value, Symbol("System`OutputForm"), evaluation, options)
1886
1887    def apply_form(self, value, form, evaluation, options):
1888        "ToString[value_, form_, OptionsPattern[ToString]]"
1889        encoding = options["System`CharacterEncoding"]
1890        text = value.format(evaluation, form.get_name(), encoding=encoding)
1891        text = text.boxes_to_text(evaluation=evaluation)
1892        return String(text)
1893
1894
1895class InterpretedBox(PrefixOperator):
1896    r"""
1897    <dl>
1898      <dt>'InterpretedBox[$box$]'
1899      <dd>is the ad hoc fullform for \! $box$. just
1900          for internal use...
1901
1902    >> \! \(2+2\)
1903     = 4
1904    </dl>
1905    """
1906
1907    operator = "\\!"
1908    precedence = 670
1909
1910    def apply_dummy(self, boxes, evaluation):
1911        """InterpretedBox[boxes_]"""
1912        # TODO: the following is a very raw and dummy way to
1913        # handle these expressions.
1914        # In the first place, this should handle different kind
1915        # of boxes in different ways.
1916        reinput = boxes.boxes_to_text()
1917        return Expression("ToExpression", reinput).evaluate(evaluation)
1918
1919
1920class ToExpression(Builtin):
1921    r"""
1922    <dl>
1923      <dt>'ToExpression[$input$]'
1924      <dd>inteprets a given string as Mathics input.
1925
1926      <dt>'ToExpression[$input$, $form$]'
1927      <dd>reads the given input in the specified $form$.
1928
1929      <dt>'ToExpression[$input$, $form$, $h$]'
1930      <dd>applies the head $h$ to the expression before evaluating it.
1931
1932    </dl>
1933
1934    >> ToExpression["1 + 2"]
1935     = 3
1936
1937    >> ToExpression["{2, 3, 1}", InputForm, Max]
1938     = 3
1939
1940    >> ToExpression["2 3", InputForm]
1941     = 6
1942
1943    Note that newlines are like semicolons, not blanks. So so the return value is the second-line value.
1944    >> ToExpression["2\[NewLine]3"]
1945     = 3
1946
1947    #> ToExpression["log(x)", InputForm]
1948     = log x
1949
1950    #> ToExpression["1+"]
1951     : Incomplete expression; more input is needed (line 1 of "ToExpression['1+']").
1952     = $Failed
1953
1954    #> ToExpression[]
1955     : ToExpression called with 0 arguments; between 1 and 3 arguments are expected.
1956     = ToExpression[]
1957    """
1958
1959    # TODO: Other forms
1960    """
1961    >> ToExpression["log(x)", TraditionalForm]
1962     = Log[x]
1963    >> ToExpression["log(x)", TraditionalForm]
1964     = Log[x]
1965    #> ToExpression["log(x)", StandardForm]
1966     = log x
1967    """
1968    attributes = ("Listable", "Protected")
1969
1970    messages = {
1971        "argb": (
1972            "`1` called with `2` arguments; "
1973            "between `3` and `4` arguments are expected."
1974        ),
1975        "interpfmt": (
1976            "`1` is not a valid interpretation format. "
1977            "Valid interpretation formats include InputForm "
1978            "and any member of $BoxForms."
1979        ),
1980        "notstr": "The format type `1` is valid only for string input.",
1981    }
1982
1983    def apply(self, seq, evaluation):
1984        "ToExpression[seq__]"
1985
1986        # Organise Arguments
1987        py_seq = seq.get_sequence()
1988        if len(py_seq) == 1:
1989            (inp, form, head) = (py_seq[0], Symbol("InputForm"), None)
1990        elif len(py_seq) == 2:
1991            (inp, form, head) = (py_seq[0], py_seq[1], None)
1992        elif len(py_seq) == 3:
1993            (inp, form, head) = (py_seq[0], py_seq[1], py_seq[2])
1994        else:
1995            assert len(py_seq) > 3  # 0 case handled by apply_empty
1996            evaluation.message(
1997                "ToExpression",
1998                "argb",
1999                "ToExpression",
2000                Integer(len(py_seq)),
2001                Integer1,
2002                Integer(3),
2003            )
2004            return
2005
2006        # Apply the different forms
2007        if form == Symbol("InputForm"):
2008            if isinstance(inp, String):
2009
2010                # TODO: turn the below up into a function and call that.
2011                s = inp.get_string_value()
2012                short_s = s[:15] + "..." if len(s) > 16 else s
2013                with io.StringIO(s) as f:
2014                    f.name = """ToExpression['%s']""" % short_s
2015                    feeder = MathicsFileLineFeeder(f)
2016                    while not feeder.empty():
2017                        try:
2018                            query = parse(evaluation.definitions, feeder)
2019                        except TranslateError:
2020                            return SymbolFailed
2021                        finally:
2022                            feeder.send_messages(evaluation)
2023                        if query is None:  # blank line / comment
2024                            continue
2025                        result = query.evaluate(evaluation)
2026
2027            else:
2028                result = inp
2029        else:
2030            evaluation.message("ToExpression", "interpfmt", form)
2031            return
2032
2033        # Apply head if present
2034        if head is not None:
2035            result = Expression(head, result).evaluate(evaluation)
2036
2037        return result
2038
2039    def apply_empty(self, evaluation):
2040        "ToExpression[]"
2041        evaluation.message(
2042            "ToExpression", "argb", "ToExpression", Integer0, Integer1, Integer(3)
2043        )
2044        return
2045
2046
2047class ToCharacterCode(Builtin):
2048    """
2049    <dl>
2050    <dt>'ToCharacterCode["$string$"]'
2051      <dd>converts the string to a list of character codes (Unicode
2052      codepoints).
2053    <dt>'ToCharacterCode[{"$string1$", "$string2$", ...}]'
2054      <dd>converts a list of strings to character codes.
2055    </dl>
2056
2057    >> ToCharacterCode["abc"]
2058     = {97, 98, 99}
2059    >> FromCharacterCode[%]
2060     = abc
2061
2062    >> ToCharacterCode["\\[Alpha]\\[Beta]\\[Gamma]"]
2063     = {945, 946, 947}
2064
2065    >> ToCharacterCode["ä", "UTF8"]
2066     = {195, 164}
2067
2068    >> ToCharacterCode["ä", "ISO8859-1"]
2069     = {228}
2070
2071    >> ToCharacterCode[{"ab", "c"}]
2072     = {{97, 98}, {99}}
2073
2074    #> ToCharacterCode[{"ab"}]
2075     = {{97, 98}}
2076
2077    #> ToCharacterCode[{{"ab"}}]
2078     : String or list of strings expected at position 1 in ToCharacterCode[{{ab}}].
2079     = ToCharacterCode[{{ab}}]
2080
2081    >> ToCharacterCode[{"ab", x}]
2082     : String or list of strings expected at position 1 in ToCharacterCode[{ab, x}].
2083     = ToCharacterCode[{ab, x}]
2084
2085    >> ListPlot[ToCharacterCode["plot this string"], Filling -> Axis]
2086     = -Graphics-
2087
2088    #> ToCharacterCode[x]
2089     : String or list of strings expected at position 1 in ToCharacterCode[x].
2090     = ToCharacterCode[x]
2091
2092    #> ToCharacterCode[""]
2093     = {}
2094    """
2095
2096    messages = {
2097        "strse": "String or list of strings expected at position `1` in `2`.",
2098    }
2099
2100    def _encode(self, string, encoding, evaluation):
2101        exp = Expression("ToCharacterCode", string)
2102
2103        if string.has_form("List", None):
2104            string = [substring.get_string_value() for substring in string.leaves]
2105            if any(substring is None for substring in string):
2106                evaluation.message("ToCharacterCode", "strse", Integer1, exp)
2107                return None
2108        else:
2109            string = string.get_string_value()
2110            if string is None:
2111                evaluation.message("ToCharacterCode", "strse", Integer1, exp)
2112                return None
2113
2114        if encoding == "Unicode":
2115
2116            def convert(s):
2117                return Expression(SymbolList, *[Integer(ord(code)) for code in s])
2118
2119        else:
2120            py_encoding = to_python_encoding(encoding)
2121            if py_encoding is None:
2122                evaluation.message("General", "charcode", encoding)
2123                return
2124
2125            def convert(s):
2126                return Expression(
2127                    "List", *[Integer(x) for x in unpack_bytes(s.encode(py_encoding))]
2128                )
2129
2130        if isinstance(string, list):
2131            return Expression(SymbolList, *[convert(substring) for substring in string])
2132        elif isinstance(string, str):
2133            return convert(string)
2134
2135    def apply_default(self, string, evaluation):
2136        "ToCharacterCode[string_]"
2137        return self._encode(string, "Unicode", evaluation)
2138
2139    def apply(self, string, encoding, evaluation):
2140        "ToCharacterCode[string_, encoding_String]"
2141        return self._encode(string, encoding.get_string_value(), evaluation)
2142
2143
2144class _InvalidCodepointError(ValueError):
2145    pass
2146
2147
2148class FromCharacterCode(Builtin):
2149    """
2150    <dl>
2151    <dt>'FromCharacterCode[$n$]'
2152        <dd>returns the character corresponding to Unicode codepoint $n$.
2153    <dt>'FromCharacterCode[{$n1$, $n2$, ...}]'
2154        <dd>returns a string with characters corresponding to $n_i$.
2155    <dt>'FromCharacterCode[{{$n11$, $n12$, ...}, {$n21$, $n22$, ...}, ...}]'
2156        <dd>returns a list of strings.
2157    </dl>
2158
2159    >> FromCharacterCode[100]
2160     = d
2161
2162    >> FromCharacterCode[228, "ISO8859-1"]
2163     = ä
2164
2165    >> FromCharacterCode[{100, 101, 102}]
2166     = def
2167    >> ToCharacterCode[%]
2168     = {100, 101, 102}
2169
2170    >> FromCharacterCode[{{97, 98, 99}, {100, 101, 102}}]
2171     = {abc, def}
2172
2173    >> ToCharacterCode["abc 123"] // FromCharacterCode
2174     = abc 123
2175
2176    #> #1 == ToCharacterCode[FromCharacterCode[#1]] & [RandomInteger[{0, 65535}, 100]]
2177     = True
2178
2179    #> FromCharacterCode[{}] // InputForm
2180     = ""
2181
2182    #> FromCharacterCode[65536]
2183     : A character code, which should be a non-negative integer less than 65536, is expected at position 1 in {65536}.
2184     = FromCharacterCode[65536]
2185    #> FromCharacterCode[-1]
2186     : Non-negative machine-sized integer expected at position 1 in FromCharacterCode[-1].
2187     = FromCharacterCode[-1]
2188    #> FromCharacterCode[444444444444444444444444444444444444]
2189     : Non-negative machine-sized integer expected at position 1 in FromCharacterCode[444444444444444444444444444444444444].
2190     = FromCharacterCode[444444444444444444444444444444444444]
2191
2192    #> FromCharacterCode[{100, 101, -1}]
2193     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, -1}.
2194     = FromCharacterCode[{100, 101, -1}]
2195    #> FromCharacterCode[{100, 101, 65536}]
2196     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, 65536}.
2197     = FromCharacterCode[{100, 101, 65536}]
2198    #> FromCharacterCode[{100, 101, x}]
2199     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, x}.
2200     = FromCharacterCode[{100, 101, x}]
2201    #> FromCharacterCode[{100, {101}}]
2202     : A character code, which should be a non-negative integer less than 65536, is expected at position 2 in {100, {101}}.
2203     = FromCharacterCode[{100, {101}}]
2204
2205    #> FromCharacterCode[{{97, 98, 99}, {100, 101, x}}]
2206     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, x}.
2207     = FromCharacterCode[{{97, 98, 99}, {100, 101, x}}]
2208    #> FromCharacterCode[{{97, 98, x}, {100, 101, x}}]
2209     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {97, 98, x}.
2210     = FromCharacterCode[{{97, 98, x}, {100, 101, x}}]
2211    """
2212
2213    messages = {
2214        "notunicode": (
2215            "A character code, which should be a non-negative integer less "
2216            "than 65536, is expected at position `2` in `1`."
2217        ),
2218        "intnm": (
2219            "Non-negative machine-sized integer expected at " "position `2` in `1`."
2220        ),
2221        "utf8": "The given codes could not be decoded as utf-8.",
2222    }
2223
2224    def _decode(self, n, encoding, evaluation):
2225        exp = Expression("FromCharacterCode", n)
2226
2227        py_encoding = to_python_encoding(encoding)
2228        if py_encoding is None:
2229            evaluation.message("General", "charcode", encoding)
2230            return
2231
2232        def convert_codepoint_list(l):
2233            if encoding == "Unicode":
2234                s = ""
2235                for i, ni in enumerate(l):
2236                    pyni = ni.get_int_value()
2237                    if not (pyni is not None and 0 <= pyni <= 0xFFFF):
2238                        evaluation.message(
2239                            "FromCharacterCode",
2240                            "notunicode",
2241                            Expression(SymbolList, *l),
2242                            Integer(i + 1),
2243                        )
2244                        raise _InvalidCodepointError
2245                    s += chr(pyni)
2246                return s
2247            else:
2248                codes = [x.get_int_value() & 0xFF for x in l]
2249                return pack_bytes(codes).decode(py_encoding)
2250
2251        try:
2252            if n.has_form("List", None):
2253                if not n.get_leaves():
2254                    return String("")
2255                # Mathematica accepts FromCharacterCode[{{100}, 101}],
2256                # so to match this, just check the first leaf to see
2257                # if we're dealing with nested lists.
2258                elif n.get_leaves()[0].has_form("List", None):
2259                    list_of_strings = []
2260                    for leaf in n.get_leaves():
2261                        if leaf.has_form("List", None):
2262                            stringi = convert_codepoint_list(leaf.get_leaves())
2263                        else:
2264                            stringi = convert_codepoint_list([leaf])
2265                        list_of_strings.append(String(stringi))
2266                    return Expression(SymbolList, *list_of_strings)
2267                else:
2268                    return String(convert_codepoint_list(n.get_leaves()))
2269            else:
2270                pyn = n.get_int_value()
2271                if not (isinstance(pyn, int) and pyn > 0 and pyn < sys.maxsize):
2272                    return evaluation.message(
2273                        "FromCharacterCode", "intnm", exp, Integer1
2274                    )
2275                return String(convert_codepoint_list([n]))
2276        except _InvalidCodepointError:
2277            return
2278        except UnicodeDecodeError:
2279            evaluation.message(self.get_name(), "utf8")
2280            return
2281
2282        assert False, "can't get here"
2283
2284    def apply_default(self, n, evaluation):
2285        "FromCharacterCode[n_]"
2286        return self._decode(n, "Unicode", evaluation)
2287
2288    def apply(self, n, encoding, evaluation):
2289        "FromCharacterCode[n_, encoding_String]"
2290        return self._decode(n, encoding.get_string_value(), evaluation)
2291
2292
2293class StringQ(Test):
2294    """
2295    <dl>
2296    <dt>'StringQ[$expr$]'
2297      <dd>returns 'True' if $expr$ is a 'String', or 'False' otherwise.
2298    </dl>
2299
2300    >> StringQ["abc"]
2301     = True
2302    >> StringQ[1.5]
2303     = False
2304    >> Select[{"12", 1, 3, 5, "yz", x, y}, StringQ]
2305     = {12, yz}
2306    """
2307
2308    def test(self, expr):
2309        return isinstance(expr, String)
2310
2311
2312class StringTake(Builtin):
2313    """
2314    <dl>
2315      <dt>'StringTake["$string$", $n$]'
2316      <dd>gives the first $n$ characters in $string$.
2317
2318      <dt>'StringTake["$string$", -$n$]'
2319      <dd>gives the last $n$ characters in $string$.
2320
2321      <dt>'StringTake["$string$", {$n$}]'
2322      <dd>gives the $n$th character in $string$.
2323
2324      <dt>'StringTake["$string$", {$m$, $n$}]'
2325      <dd>gives characters $m$ through $n$ in $string$.
2326
2327      <dt>'StringTake["$string$", {$m$, $n$, $s$}]'
2328      <dd>gives characters $m$ through $n$ in steps of $s$.
2329
2330      <dt>'StringTake[{$s1$, $s2$, ...} $spec$}]'
2331      <dd>gives the list of results for each of the $si$.
2332    </dl>
2333
2334    >> StringTake["abcde", 2]
2335     = ab
2336    >> StringTake["abcde", 0]
2337     = #<--#
2338    >> StringTake["abcde", -2]
2339     = de
2340    >> StringTake["abcde", {2}]
2341     = b
2342    >> StringTake["abcd", {2,3}]
2343     = bc
2344    >> StringTake["abcdefgh", {1, 5, 2}]
2345     = ace
2346
2347    Take the last 2 characters from several strings:
2348    >> StringTake[{"abcdef", "stuv", "xyzw"}, -2]
2349     = {ef, uv, zw}
2350
2351    StringTake also supports standard sequence specifications
2352    >> StringTake["abcdef", All]
2353     = abcdef
2354
2355    #> StringTake["abcd", 0] // InputForm
2356    = ""
2357    #> StringTake["abcd", {3, 2}] // InputForm
2358    = ""
2359    #> StringTake["", {1, 0}] // InputForm
2360    = ""
2361
2362    #> StringTake["abc", {0, 0}]
2363    : Cannot take positions 0 through 0 in "abc".
2364    = StringTake[abc, {0, 0}]
2365
2366    #> StringTake[{2, 4},2]
2367     : String or list of strings expected at position 1.
2368     = StringTake[{2, 4}, 2]
2369
2370    #> StringTake["kkkl",Graphics[{}]]
2371     : Integer or a list of sequence specifications expected at position 2.
2372     = StringTake[kkkl, -Graphics-]
2373    """
2374
2375    messages = {
2376        "strse": "String or list of strings expected at position 1.",
2377        # FIXME: mseqs should be: Sequence specification (+n, -n, {+n}, {-n}, {m, n}, or {m, n, s}) or a list
2378        # of sequence specifications expected at position 2 in
2379        "mseqs": "Integer or a list of sequence specifications expected at position 2.",
2380        "take": 'Cannot take positions `1` through `2` in "`3`".',
2381    }
2382
2383    def apply(self, string, seqspec, evaluation):
2384        "StringTake[string_String, seqspec_]"
2385        result = string.get_string_value()
2386        if result is None:
2387            return evaluation.message("StringTake", "strse")
2388
2389        if isinstance(seqspec, Integer):
2390            pos = seqspec.get_int_value()
2391            if pos >= 0:
2392                seq = (1, pos, 1)
2393            else:
2394                seq = (pos, None, 1)
2395        else:
2396            seq = convert_seq(seqspec)
2397
2398        if seq is None:
2399            return evaluation.message("StringTake", "mseqs")
2400
2401        start, stop, step = seq
2402        py_slice = python_seq(start, stop, step, len(result))
2403
2404        if py_slice is None:
2405            return evaluation.message("StringTake", "take", start, stop, string)
2406
2407        return String(result[py_slice])
2408
2409    def apply_strings(self, strings, spec, evaluation):
2410        "StringTake[strings__, spec_]"
2411        result_list = []
2412        for string in strings.leaves:
2413            result = self.apply(string, spec, evaluation)
2414            if result is None:
2415                return None
2416            result_list.append(result)
2417        return Expression("List", *result_list)
2418
2419
2420
2421class StringDrop(Builtin):
2422    """
2423    <dl>
2424    <dt>'StringDrop["$string$", $n$]'
2425        <dd>gives $string$ with the first $n$ characters dropped.
2426    <dt>'StringDrop["$string$", -$n$]'
2427        <dd>gives $string$ with the last $n$ characters dropped.
2428    <dt>'StringDrop["$string$", {$n$}]'
2429        <dd>gives $string$ with the $n$th character dropped.
2430    <dt>'StringDrop["$string$", {$m$, $n$}]'
2431        <dd>gives $string$ with the characters $m$ through $n$ dropped.
2432    </dl>
2433
2434    >> StringDrop["abcde", 2]
2435    = cde
2436    >> StringDrop["abcde", -2]
2437    = abc
2438    >> StringDrop["abcde", {2}]
2439    = acde
2440    >> StringDrop["abcde", {2,3}]
2441    = ade
2442    >> StringDrop["abcd",{3,2}]
2443    = abcd
2444    >> StringDrop["abcd",0]
2445    = abcd
2446    """
2447
2448    messages = {
2449        "strse": "String expected at position 1.",
2450        "mseqs": "Integer or list of two Integers are expected at position 2.",
2451        "drop": 'Cannot drop positions `1` through `2` in "`3`".',
2452    }
2453
2454    def apply_with_n(self, string, n, evaluation):
2455        "StringDrop[string_,n_Integer]"
2456        if not isinstance(string, String):
2457            return evaluation.message("StringDrop", "strse")
2458        if isinstance(n, Integer):
2459            pos = n.value
2460            if pos > len(string.get_string_value()):
2461                return evaluation.message("StringDrop", "drop", 1, pos, string)
2462            if pos < -len(string.get_string_value()):
2463                return evaluation.message("StringDrop", "drop", pos, -1, string)
2464            if pos > 0:
2465                return String(string.get_string_value()[pos:])
2466            if pos < 0:
2467                return String(string.get_string_value()[:(pos)])
2468            if pos == 0:
2469                return string
2470        return evaluation.message("StringDrop", "mseqs")
2471
2472    def apply_with_ni_nf(self, string, ni, nf, evaluation):
2473        "StringDrop[string_,{ni_Integer,nf_Integer}]"
2474        if not isinstance(string, String):
2475            return evaluation.message("StringDrop", "strse", string)
2476
2477        if ni.value == 0 or nf.value == 0:
2478            return evaluation.message("StringDrop", "drop", ni, nf)
2479        fullstring = string.get_string_value()
2480        lenfullstring = len(fullstring)
2481        posi = ni.value
2482        if posi < 0:
2483            posi = lenfullstring + posi + 1
2484        posf = nf.value
2485        if posf < 0:
2486            posf = lenfullstring + posf + 1
2487        if posf > lenfullstring or posi > lenfullstring or posf <= 0 or posi <= 0:
2488            # positions out or range
2489            return evaluation.message("StringDrop", "drop", ni, nf, fullstring)
2490        if posf < posi:
2491            return string  # this is what actually mma does
2492        return String(fullstring[: (posi - 1)] + fullstring[posf:])
2493
2494    def apply_with_ni(self, string, ni, evaluation):
2495        "StringDrop[string_,{ni_Integer}]"
2496        if not isinstance(string, String):
2497            return evaluation.message("StringDrop", "strse", string)
2498        if ni.value == 0:
2499            return evaluation.message("StringDrop", "drop", ni, ni)
2500        fullstring = string.get_string_value()
2501        lenfullstring = len(fullstring)
2502        posi = ni.value
2503        if posi < 0:
2504            posi = lenfullstring + posi + 1
2505        if posi > lenfullstring or posi <= 0:
2506            return evaluation.message("StringDrop", "drop", ni, ni, fullstring)
2507        return String(fullstring[: (posi - 1)] + fullstring[posi:])
2508
2509    def apply(self, string, something, evaluation):
2510        "StringDrop[string_,something___]"
2511        if not isinstance(string, String):
2512            return evaluation.message("StringDrop", "strse")
2513        return evaluation.message("StringDrop", "mseqs")
2514
2515
2516class HammingDistance(Builtin):
2517    """
2518    <dl>
2519    <dt>'HammingDistance[$u$, $v$]'
2520      <dd>returns the Hamming distance between $u$ and $v$, i.e. the number of different elements.
2521      $u$ and $v$ may be lists or strings.
2522    </dl>
2523
2524    >> HammingDistance[{1, 0, 1, 0}, {1, 0, 0, 1}]
2525    = 2
2526
2527    >> HammingDistance["time", "dime"]
2528    = 1
2529
2530    >> HammingDistance["TIME", "dime", IgnoreCase -> True]
2531    = 1
2532    """
2533
2534    messages = {
2535        "idim": "`1` and `2` must be of same length.",
2536    }
2537
2538    options = {
2539        "IgnoreCase": "False",
2540    }
2541
2542    @staticmethod
2543    def _compute(u, v, sameQ, evaluation):
2544        if len(u) != len(v):
2545            evaluation.message("HammingDistance", "idim", u, v)
2546            return None
2547        else:
2548            return Integer(sum(0 if sameQ(x, y) else 1 for x, y in zip(u, v)))
2549
2550    def apply_list(self, u, v, evaluation):
2551        "HammingDistance[u_List, v_List]"
2552        return HammingDistance._compute(
2553            u.leaves, v.leaves, lambda x, y: x.sameQ(y), evaluation
2554        )
2555
2556    def apply_string(self, u, v, evaluation, options):
2557        "HammingDistance[u_String, v_String, OptionsPattern[HammingDistance]]"
2558        ignore_case = self.get_option(options, "IgnoreCase", evaluation)
2559        py_u = u.get_string_value()
2560        py_v = v.get_string_value()
2561        if ignore_case and ignore_case.is_true():
2562            py_u = py_u.lower()
2563            py_v = py_v.lower()
2564        return HammingDistance._compute(py_u, py_v, lambda x, y: x == y, evaluation)
2565
2566
2567class _StringDistance(Builtin):
2568    options = {"IgnoreCase": "False"}
2569
2570    def apply(self, a, b, evaluation, options):
2571        "%(name)s[a_, b_, OptionsPattern[%(name)s]]"
2572        if isinstance(a, String) and isinstance(b, String):
2573            py_a = a.get_string_value()
2574            py_b = b.get_string_value()
2575            if options["System`IgnoreCase"] == SymbolTrue:
2576                if hasattr(str, "casefold"):
2577
2578                    def normalize(c):
2579                        return unicodedata.normalize("NFKD", c.casefold())
2580
2581                    py_a = [normalize(c) for c in py_a]
2582                    py_b = [normalize(c) for c in py_b]
2583                else:  # python2, PyPy
2584                    py_a = py_a.lower()
2585                    py_b = py_b.lower()
2586            return Integer(self._distance(py_a, py_b, lambda u, v: u == v))
2587        elif a.get_head_name() == "System`List" and b.get_head_name() == "System`List":
2588            return Integer(self._distance(a.leaves, b.leaves, lambda u, v: u.sameQ(v)))
2589        else:
2590            return Expression("EditDistance", a, b)
2591
2592
2593# Levenshtein's algorithm is defined by the following construction:
2594# (adapted from https://de.wikipedia.org/wiki/Levenshtein-Distanz)
2595#
2596# given two strings s1, s2, we build a matrix D sized (len(s1) + 1,
2597# len(s2) + 1) and fill it using the following rules:
2598#
2599# (1) D(0, 0) = 0
2600# (2) D(i, 0) = i, 1 <= i <= len(s1)
2601# (3) D(0, j) = j, 1 <= j <= len(s2)
2602# (4) D(i, j) = minimum of
2603#     D(i - 1, j - 1) + 0 if s1(j) = s2(j)
2604#     D(i - 1, j - 1) + 1 (substitution)
2605#     D(i, j - 1) + 1     (insertion)
2606#     D(i - 1, j) + 1     (deletion)
2607#
2608# The computed distance will be in D(len(s1) + 1, len(s2) + 1).
2609#
2610# note: double brackets indicate 1-based indices below, e.g. s1[[1]]
2611
2612
2613def _one_based(l):  # makes an enumerated generator 1-based
2614    return ((i + 1, x) for i, x in l)
2615
2616
2617def _prev_curr(l):  # yields pairs of (x[i - 1], x[i]) for i in 1, 2, ...
2618    prev = None
2619    for curr in l:
2620        yield prev, curr
2621        prev = curr
2622
2623
2624def _levenshtein_d0(s2):  # compute D(0, ...)
2625    return list(range(len(s2) + 1))  # see (1), (3)
2626
2627
2628def _levenshtein_di(c1, s2, i, d_prev, sameQ, cost):  # compute one new row
2629    # given c1 = s1[i], s2, i, d_prev = D(i - 1, ...), compute D(i, ...)
2630
2631    yield i  # start with D(i, 0) = i, see (2)
2632    d_curr_prev_j = i  # d_curr_prev_j stores D(i, j - 1)
2633
2634    for j, c2 in _one_based(enumerate(s2)):  # c2 = s2[[j]]
2635        cond = 0 if sameQ(c1, c2) else cost
2636
2637        d_curr_j = min(  # see (4)
2638            d_prev[j - 1] + cond,  # D(i - 1, j - 1) + cond; substitution
2639            d_curr_prev_j + 1,  # D(i, j - 1) + 1; insertion
2640            d_prev[j] + 1,
2641        )  # D(i - 1, j) + 1; deletion
2642
2643        yield d_curr_j
2644        d_curr_prev_j = d_curr_j
2645
2646
2647def _levenshtein(s1, s2, sameQ: Callable[..., bool]):
2648    d_prev = _levenshtein_d0(s2)
2649    for i, c1 in _one_based(enumerate(s1)):  # c1 = s1[[i]]
2650        d_prev = list(_levenshtein_di(c1, s2, i, d_prev, sameQ, 1))
2651    return d_prev[-1]
2652
2653
2654def _damerau_levenshtein(s1, s2, sameQ: Callable[..., bool]):
2655    # _damerau_levenshtein works like _levenshtein, except for one additional
2656    # rule covering transposition:
2657    #
2658    # if i > 1 and j > 1 and a[i] == b[j - 1] and a[i - 1] == b[j] then
2659    #     D(i, j) = minimum(D(i, j), D(i - 2, j - 2) + transposition_cost)
2660
2661    def row(d_prev_prev, d_prev, i, prev_c1, c1, cost):
2662        # given c1 = s1[i], d_prev_prev = D(i - 2), d_prev = D(i - 1),
2663        # prev_c1 = s1[[i - 1]], c1 = s1[[i]], compute D(i, ...)
2664        for j, d_curr_j in enumerate(_levenshtein_di(c1, s2, i, d_prev, sameQ, cost)):
2665            if i > 1 and j > 1:
2666                if sameQ(c1, s2[j - 2]) and sameQ(prev_c1, s2[j - 1]):  # transposition?
2667                    # i.e. if s1[[i]] = s2[[j-1]] and s1[[i-1]] = s2[[j]]
2668                    d_curr_j = min(d_curr_j, d_prev_prev[j - 2] + cost)
2669            yield d_curr_j
2670
2671    d_prev_prev = None
2672    d_prev = _levenshtein_d0(s2)
2673    for i, (prev_c1, c1) in _one_based(enumerate(_prev_curr(s1))):
2674        d_curr = list(row(d_prev_prev, d_prev, i, prev_c1, c1, 1))
2675        d_prev_prev = d_prev
2676        d_prev = d_curr
2677
2678    return d_prev[-1]
2679
2680
2681def _levenshtein_like_or_border_cases(s1, s2, sameQ: Callable[..., bool], compute):
2682    if len(s1) == len(s2) and all(sameQ(c1, c2) for c1, c2 in zip(s1, s2)):
2683        return 0
2684
2685    if len(s1) < len(s2):
2686        s1, s2 = s2, s1
2687
2688    if len(s2) == 0:
2689        return len(s1)
2690
2691    return compute(s1, s2, sameQ)
2692
2693
2694class EditDistance(_StringDistance):
2695    """
2696    <dl>
2697    <dt>'EditDistance[$a$, $b$]'
2698        <dd>returns the Levenshtein distance of $a$ and $b$, which is defined as the minimum number of
2699        insertions, deletions and substitutions on the constituents of $a$ and $b$ needed to transform
2700        one into the other.
2701    </dl>
2702
2703    >> EditDistance["kitten", "kitchen"]
2704     = 2
2705
2706    >> EditDistance["abc", "ac"]
2707     = 1
2708
2709    >> EditDistance["abc", "acb"]
2710     = 2
2711
2712    >> EditDistance["azbc", "abxyc"]
2713     = 3
2714
2715    The IgnoreCase option makes EditDistance ignore the case of letters:
2716    >> EditDistance["time", "Thyme"]
2717     = 3
2718
2719    >> EditDistance["time", "Thyme", IgnoreCase -> True]
2720     = 2
2721
2722    EditDistance also works on lists:
2723    >> EditDistance[{1, E, 2, Pi}, {1, E, Pi, 2}]
2724     = 2
2725    """
2726
2727    def _distance(self, s1, s2, sameQ: Callable[..., bool]):
2728        return _levenshtein_like_or_border_cases(s1, s2, sameQ, _levenshtein)
2729
2730
2731class DamerauLevenshteinDistance(_StringDistance):
2732    """
2733    <dl>
2734    <dt>'DamerauLevenshteinDistance[$a$, $b$]'
2735        <dd>returns the Damerau-Levenshtein distance of $a$ and $b$, which is defined as the minimum number of
2736        transpositions, insertions, deletions and substitutions needed to transform one into the other.
2737        In contrast to EditDistance, DamerauLevenshteinDistance counts transposition of adjacent items (e.g.
2738        "ab" into "ba") as one operation of change.
2739    </dl>
2740
2741    >> DamerauLevenshteinDistance["kitten", "kitchen"]
2742     = 2
2743
2744    >> DamerauLevenshteinDistance["abc", "ac"]
2745     = 1
2746
2747    >> DamerauLevenshteinDistance["abc", "acb"]
2748     = 1
2749
2750    >> DamerauLevenshteinDistance["azbc", "abxyc"]
2751     = 3
2752
2753    The IgnoreCase option makes DamerauLevenshteinDistance ignore the case of letters:
2754    >> DamerauLevenshteinDistance["time", "Thyme"]
2755     = 3
2756
2757    >> DamerauLevenshteinDistance["time", "Thyme", IgnoreCase -> True]
2758     = 2
2759
2760    DamerauLevenshteinDistance also works on lists:
2761    >> DamerauLevenshteinDistance[{1, E, 2, Pi}, {1, E, Pi, 2}]
2762     = 1
2763    """
2764
2765    def _distance(self, s1, s2, sameQ: Callable[..., bool]):
2766        return _levenshtein_like_or_border_cases(s1, s2, sameQ, _damerau_levenshtein)
2767
2768
2769class RemoveDiacritics(Builtin):
2770    """
2771    <dl>
2772    <dt>'RemoveDiacritics[$s$]'
2773        <dd>returns a version of $s$ with all diacritics removed.
2774    </dl>
2775
2776    >> RemoveDiacritics["en prononçant pêcher et pécher"]
2777     = en prononcant pecher et pecher
2778
2779    >> RemoveDiacritics["piñata"]
2780     = pinata
2781    """
2782
2783    def apply(self, s, evaluation):
2784        "RemoveDiacritics[s_String]"
2785        return String(
2786            unicodedata.normalize("NFKD", s.get_string_value())
2787            .encode("ascii", "ignore")
2788            .decode("ascii")
2789        )
2790
2791
2792class Transliterate(Builtin):
2793    """
2794    <dl>
2795    <dt>'Transliterate[$s$]'
2796        <dd>transliterates a text in some script into an ASCII string.
2797    </dl>
2798
2799    # The following examples were taken from
2800    # https://en.wikipedia.org/wiki/Iliad,
2801    # https://en.wikipedia.org/wiki/Russian_language, and
2802    # https://en.wikipedia.org/wiki/Hiragana
2803
2804    >> Transliterate["μήτηρ γάρ τέ μέ φησι θεὰ Θέτις ἀργυρόπεζα"]
2805     = meter gar te me phesi thea Thetis arguropeza
2806
2807    >> Transliterate["Алекса́ндр Пу́шкин"]
2808     = Aleksandr Pushkin
2809
2810    >> Transliterate["つかう"]
2811     = tsukau
2812    """
2813
2814    requires = ("unidecode",)
2815
2816    def apply(self, s, evaluation):
2817        "Transliterate[s_String]"
2818        from unidecode import unidecode
2819
2820        return String(unidecode(s.get_string_value()))
2821
2822
2823class StringTrim(Builtin):
2824    """
2825    <dl>
2826    <dt>'StringTrim[$s$]'
2827        <dd>returns a version of $s$ with whitespace removed from start and end.
2828    </dl>
2829
2830    >> StringJoin["a", StringTrim["  \\tb\\n "], "c"]
2831     = abc
2832
2833    >> StringTrim["ababaxababyaabab", RegularExpression["(ab)+"]]
2834     = axababya
2835    """
2836
2837    def apply(self, s, evaluation):
2838        "StringTrim[s_String]"
2839        return String(s.get_string_value().strip(" \t\n"))
2840
2841    def apply_pattern(self, s, patt, expression, evaluation):
2842        "StringTrim[s_String, patt_]"
2843        text = s.get_string_value()
2844        if not text:
2845            return s
2846
2847        py_patt = to_regex(patt, evaluation)
2848        if py_patt is None:
2849            return evaluation.message("StringExpression", "invld", patt, expression)
2850
2851        if not py_patt.startswith(r"\A"):
2852            left_patt = r"\A" + py_patt
2853        else:
2854            left_patt = py_patt
2855
2856        if not py_patt.endswith(r"\Z"):
2857            right_patt = py_patt + r"\Z"
2858        else:
2859            right_patt = py_patt
2860
2861        m = re.search(left_patt, text)
2862        left = m.end(0) if m else 0
2863
2864        m = re.search(right_patt, text)
2865        right = m.start(0) if m else len(text)
2866
2867        return String(text[left:right])
2868
2869
2870class StringInsert(Builtin):
2871    """
2872    <dl>
2873      <dt>'StringInsert["$string$", "$snew$", $n$]'
2874      <dd>yields a string with $snew$ inserted starting at position $n$ in $string$.
2875
2876      <dt>'StringInsert["$string$", "$snew$", -$n$]'
2877      <dd>inserts a at position $n$ from the end of "$string$".
2878
2879      <dt>'StringInsert["$string$", "$snew$", {$n_1$, $n_2$, ...}]'
2880      <dd>inserts a copy of $snew$ at each position $n_i$ in $string$;
2881        the $n_i$ are taken before any insertion is done.
2882
2883      <dt>'StringInsert[{$s_1$, $s_2$, ...}, "$snew$", $n$]'
2884      <dd>gives the list of resutls for each of the $s_i$.
2885    </dl>
2886
2887    >> StringInsert["noting", "h", 4]
2888     = nothing
2889
2890    #> StringInsert["abcdefghijklm", "X", 15]
2891     : Cannot insert at position 15 in abcdefghijklm.
2892     = StringInsert[abcdefghijklm, X, 15]
2893
2894    #> StringInsert[abcdefghijklm, "X", 4]
2895     : String or list of strings expected at position 1 in StringInsert[abcdefghijklm, X, 4].
2896     = StringInsert[abcdefghijklm, X, 4]
2897
2898    #> StringInsert["abcdefghijklm", X, 4]
2899     : String expected at position 2 in StringInsert[abcdefghijklm, X, 4].
2900     = StringInsert[abcdefghijklm, X, 4]
2901
2902    #> StringInsert["abcdefghijklm", "X", a]
2903     : Position specification a in StringInsert[abcdefghijklm, X, a] is not a machine-sized integer or a list of machine-sized integers.
2904     = StringInsert[abcdefghijklm, X, a]
2905
2906    #> StringInsert["abcdefghijklm", "X", 0]
2907     : Cannot insert at position 0 in abcdefghijklm.
2908     =  StringInsert[abcdefghijklm, X, 0]
2909
2910    >> StringInsert["note", "d", -1]
2911     = noted
2912
2913    >> StringInsert["here", "t", -5]
2914     = there
2915
2916    #> StringInsert["abcdefghijklm", "X", -15]
2917     : Cannot insert at position -15 in abcdefghijklm.
2918     = StringInsert[abcdefghijklm, X, -15]
2919
2920    >> StringInsert["adac", "he", {1, 5}]
2921     = headache
2922
2923    #> StringInsert["abcdefghijklm", "X", {1, -1, 14, -14}]
2924     = XXabcdefghijklmXX
2925
2926    #> StringInsert["abcdefghijklm", "X", {1, 0}]
2927     : Cannot insert at position 0 in abcdefghijklm.
2928     = StringInsert[abcdefghijklm, X, {1, 0}]
2929
2930    #> StringInsert["", "X", {1}]
2931     = X
2932
2933    #> StringInsert["", "X", {1, -1}]
2934     = XX
2935
2936    #> StringInsert["", "", {1}]
2937     = #<--#
2938
2939    #> StringInsert["", "X", {1, 2}]
2940     : Cannot insert at position 2 in .
2941     = StringInsert[, X, {1, 2}]
2942
2943    #> StringInsert["abcdefghijklm", "", {1, 2, 3, 4 ,5, -6}]
2944     = abcdefghijklm
2945
2946    #> StringInsert["abcdefghijklm", "X", {}]
2947     = abcdefghijklm
2948
2949    >> StringInsert[{"something", "sometimes"}, " ", 5]
2950     = {some thing, some times}
2951
2952    #> StringInsert[{"abcdefghijklm", "Mathics"}, "X", 13]
2953     : Cannot insert at position 13 in Mathics.
2954     = {abcdefghijklXm, StringInsert[Mathics, X, 13]}
2955
2956    #> StringInsert[{"", ""}, "", {1, 1, 1, 1}]
2957     = {, }
2958
2959    #> StringInsert[{"abcdefghijklm", "Mathics"}, "X", {0, 2}]
2960     : Cannot insert at position 0 in abcdefghijklm.
2961     : Cannot insert at position 0 in Mathics.
2962     = {StringInsert[abcdefghijklm, X, {0, 2}], StringInsert[Mathics, X, {0, 2}]}
2963
2964    #> StringInsert[{"abcdefghijklm", Mathics}, "X", {1, 2}]
2965     : String or list of strings expected at position 1 in StringInsert[{abcdefghijklm, Mathics}, X, {1, 2}].
2966     = StringInsert[{abcdefghijklm, Mathics}, X, {1, 2}]
2967
2968    #> StringInsert[{"", "Mathics"}, "X", {1, 1, -1}]
2969     = {XXX, XXMathicsX}
2970
2971    >> StringInsert["1234567890123456", ".", Range[-16, -4, 3]]
2972     = 1.234.567.890.123.456"""
2973
2974    messages = {
2975        "strse": "String or list of strings expected at position `1` in `2`.",
2976        "string": "String expected at position `1` in `2`.",
2977        "ins": "Cannot insert at position `1` in `2`.",
2978        "psl": "Position specification `1` in `2` is not a machine-sized integer or a list of machine-sized integers.",
2979    }
2980
2981    def _insert(self, str, add, lpos, evaluation):
2982        for pos in lpos:
2983            if abs(pos) < 1 or abs(pos) > len(str) + 1:
2984                evaluation.message("StringInsert", "ins", Integer(pos), String(str))
2985                return evaluation.format_output(
2986                    Expression(
2987                        "StringInsert", str, add, lpos[0] if len(lpos) == 1 else lpos
2988                    )
2989                )
2990
2991        # Create new list of position which are rearranged
2992        pos_limit = len(str) + 2
2993        listpos = [p if p > 0 else pos_limit + p for p in lpos]
2994        listpos.sort()
2995
2996        result = ""
2997        start = 0
2998        for pos in listpos:
2999            stop = pos - 1
3000            result += str[start:stop] + add
3001            start = stop
3002        else:
3003            result += str[start : len(str)]
3004
3005        return result
3006
3007    def apply(self, strsource, strnew, pos, evaluation):
3008        "StringInsert[strsource_, strnew_, pos_]"
3009
3010        exp = Expression("StringInsert", strsource, strnew, pos)
3011
3012        py_strnew = strnew.get_string_value()
3013        if py_strnew is None:
3014            return evaluation.message("StringInsert", "string", Integer(2), exp)
3015
3016        # Check and create list of position
3017        listpos = []
3018        if pos.has_form("List", None):
3019            leaves = pos.get_leaves()
3020            if not leaves:
3021                return strsource
3022            else:
3023                for i, posi in enumerate(leaves):
3024                    py_posi = posi.get_int_value()
3025                    if py_posi is None:
3026                        return evaluation.message("StringInsert", "psl", pos, exp)
3027                    listpos.append(py_posi)
3028        else:
3029            py_pos = pos.get_int_value()
3030            if py_pos is None:
3031                return evaluation.message("StringInsert", "psl", pos, exp)
3032            listpos.append(py_pos)
3033
3034        # Check and perform the insertion
3035        if strsource.has_form("List", None):
3036            py_strsource = [sub.get_string_value() for sub in strsource.leaves]
3037            if any(sub is None for sub in py_strsource):
3038                return evaluation.message("StringInsert", "strse", Integer1, exp)
3039            return Expression(
3040                "List",
3041                *[
3042                    String(self._insert(s, py_strnew, listpos, evaluation))
3043                    for s in py_strsource
3044                ]
3045            )
3046        else:
3047            py_strsource = strsource.get_string_value()
3048            if py_strsource is None:
3049                return evaluation.message("StringInsert", "strse", Integer1, exp)
3050            return String(self._insert(py_strsource, py_strnew, listpos, evaluation))
3051
3052
3053def _pattern_search(name, string, patt, evaluation, options, matched):
3054    # Get the pattern list and check validity for each
3055    if patt.has_form("List", None):
3056        patts = patt.get_leaves()
3057    else:
3058        patts = [patt]
3059    re_patts = []
3060    for p in patts:
3061        py_p = to_regex(p, evaluation)
3062        if py_p is None:
3063            return evaluation.message("StringExpression", "invld", p, patt)
3064        re_patts.append(py_p)
3065
3066    flags = re.MULTILINE
3067    if options["System`IgnoreCase"] == SymbolTrue:
3068        flags = flags | re.IGNORECASE
3069
3070    def _search(patts, str, flags, matched):
3071        if any(re.search(p, str, flags=flags) for p in patts):
3072            return SymbolTrue if matched else SymbolFalse
3073        return SymbolFalse if matched else SymbolTrue
3074
3075    # Check string validity and perform regex searchhing
3076    if string.has_form("List", None):
3077        py_s = [s.get_string_value() for s in string.leaves]
3078        if any(s is None for s in py_s):
3079            return evaluation.message(
3080                name, "strse", Integer1, Expression(name, string, patt)
3081            )
3082        return Expression(SymbolList, *[_search(re_patts, s, flags, matched) for s in py_s])
3083    else:
3084        py_s = string.get_string_value()
3085        if py_s is None:
3086            return evaluation.message(
3087                name, "strse", Integer1, Expression(name, string, patt)
3088            )
3089        return _search(re_patts, py_s, flags, matched)
3090
3091
3092class StringContainsQ(Builtin):
3093    """
3094    <dl>
3095    <dt>'StringContainsQ["$string$", $patt$]'
3096        <dd>returns True if any part of $string$ matches $patt$, and returns False otherwise.
3097    <dt>'StringContainsQ[{"s1", "s2", ...}, patt]'
3098        <dd>returns the list of results for each element of string list.
3099    <dt>'StringContainsQ[patt]'
3100        <dd>represents an operator form of StringContainsQ that can be applied to an expression.
3101    </dl>
3102
3103    >> StringContainsQ["mathics", "m" ~~ __ ~~ "s"]
3104     = True
3105
3106    >> StringContainsQ["mathics", "a" ~~ __ ~~ "m"]
3107     = False
3108
3109    #> StringContainsQ["Hello", "o"]
3110     = True
3111
3112    #> StringContainsQ["a"]["abcd"]
3113     = True
3114
3115    #> StringContainsQ["Mathics", "ma", IgnoreCase -> False]
3116     = False
3117
3118    >> StringContainsQ["Mathics", "MA" , IgnoreCase -> True]
3119     = True
3120
3121    #> StringContainsQ["", "Empty String"]
3122     = False
3123
3124    #> StringContainsQ["", ___]
3125     = True
3126
3127    #> StringContainsQ["Empty Pattern", ""]
3128     = True
3129
3130    #> StringContainsQ[notastring, "n"]
3131     : String or list of strings expected at position 1 in StringContainsQ[notastring, n].
3132     = StringContainsQ[notastring, n]
3133
3134    #> StringContainsQ["Welcome", notapattern]
3135     : Element notapattern is not a valid string or pattern element in notapattern.
3136     = StringContainsQ[Welcome, notapattern]
3137
3138    >> StringContainsQ[{"g", "a", "laxy", "universe", "sun"}, "u"]
3139     = {False, False, False, True, True}
3140
3141    #> StringContainsQ[{}, "list of string is empty"]
3142     = {}
3143
3144    >> StringContainsQ["e" ~~ ___ ~~ "u"] /@ {"The Sun", "Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"}
3145     = {True, True, True, False, False, False, False, False, True}
3146
3147    ## special cases, Mathematica allows list of patterns
3148    #> StringContainsQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}]
3149     = {False, False, True, True, False}
3150
3151    #> StringContainsQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}, IgnoreCase -> True]
3152     = {False, False, True, True, True}
3153
3154    #> StringContainsQ[{"A", "Galaxy", "Far", "Far", "Away"}, {}]
3155     = {False, False, False, False, False}
3156
3157    #> StringContainsQ[{"A", Galaxy, "Far", "Far", Away}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}]
3158     : String or list of strings expected at position 1 in StringContainsQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}].
3159     = StringContainsQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}]
3160
3161    #> StringContainsQ[{"A", "Galaxy", "Far", "Far", "Away"}, {F ~~ __ ~~ "r", aw ~~ ___}]
3162     : Element F ~~ __ ~~ r is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}.
3163     = StringContainsQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}]
3164    ## Mathematica can detemine correct invalid element in the pattern, it reports error:
3165    ## Element F is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}.
3166    """
3167
3168    options = {
3169        "IgnoreCase": "False",
3170    }
3171
3172    rules = {
3173        "StringContainsQ[patt_][expr_]": "StringContainsQ[expr, patt]",
3174    }
3175
3176    messages = {
3177        "strse": "String or list of strings expected at position `1` in `2`.",
3178    }
3179
3180    def apply(self, string, patt, evaluation, options):
3181        "StringContainsQ[string_, patt_, OptionsPattern[%(name)s]]"
3182        return _pattern_search(
3183            self.__class__.__name__, string, patt, evaluation, options, True
3184        )
3185
3186
3187class StringFreeQ(Builtin):
3188    """
3189    <dl>
3190    <dt>'StringFreeQ["$string$", $patt$]'
3191        <dd>returns True if no substring in $string$ matches the string expression $patt$, and returns False otherwise.
3192    <dt>'StringFreeQ[{"s1", "s2", ...}, patt]'
3193        <dd>returns the list of results for each element of string list.
3194    <dt>'StringFreeQ["string", {p1, p2, ...}]'
3195        <dd>returns True if no substring matches any of the $pi$.
3196    <dt>'StringFreeQ[patt]'
3197        <dd>represents an operator form of StringFreeQ that can be applied to an expression.
3198    </dl>
3199
3200    >> StringFreeQ["mathics", "m" ~~ __ ~~ "s"]
3201     = False
3202
3203    >> StringFreeQ["mathics", "a" ~~ __ ~~ "m"]
3204     = True
3205
3206    #> StringFreeQ["Hello", "o"]
3207     = False
3208
3209    #> StringFreeQ["a"]["abcd"]
3210     = False
3211
3212    #> StringFreeQ["Mathics", "ma", IgnoreCase -> False]
3213     = True
3214
3215    >> StringFreeQ["Mathics", "MA" , IgnoreCase -> True]
3216     = False
3217
3218    #> StringFreeQ["", "Empty String"]
3219     = True
3220
3221    #> StringFreeQ["", ___]
3222     = False
3223
3224    #> StringFreeQ["Empty Pattern", ""]
3225     = False
3226
3227    #> StringFreeQ[notastring, "n"]
3228     : String or list of strings expected at position 1 in StringFreeQ[notastring, n].
3229     = StringFreeQ[notastring, n]
3230
3231    #> StringFreeQ["Welcome", notapattern]
3232     : Element notapattern is not a valid string or pattern element in notapattern.
3233     = StringFreeQ[Welcome, notapattern]
3234
3235    >> StringFreeQ[{"g", "a", "laxy", "universe", "sun"}, "u"]
3236     = {True, True, True, False, False}
3237
3238    #> StringFreeQ[{}, "list of string is empty"]
3239     = {}
3240
3241    >> StringFreeQ["e" ~~ ___ ~~ "u"] /@ {"The Sun", "Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"}
3242     = {False, False, False, True, True, True, True, True, False}
3243
3244    #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}]
3245     = {True, True, False, False, True}
3246
3247    >> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}, IgnoreCase -> True]
3248     = {True, True, False, False, False}
3249
3250    #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {}]
3251     = {True, True, True, True, True}
3252
3253    #> StringFreeQ[{"A", Galaxy, "Far", "Far", Away}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}]
3254     : String or list of strings expected at position 1 in StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}].
3255     = StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}]
3256
3257    #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {F ~~ __ ~~ "r", aw ~~ ___}]
3258     : Element F ~~ __ ~~ r is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}.
3259     = StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}]
3260    ## Mathematica can detemine correct invalid element in the pattern, it reports error:
3261    ## Element F is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}.
3262    """
3263
3264    options = {
3265        "IgnoreCase": "False",
3266    }
3267
3268    rules = {
3269        "StringFreeQ[patt_][expr_]": "StringFreeQ[expr, patt]",
3270    }
3271
3272    messages = {
3273        "strse": "String or list of strings expected at position `1` in `2`.",
3274    }
3275
3276    def apply(self, string, patt, evaluation, options):
3277        "StringFreeQ[string_, patt_, OptionsPattern[%(name)s]]"
3278        return _pattern_search(
3279            self.__class__.__name__, string, patt, evaluation, options, False
3280        )
3281
3282
3283class StringRiffle(Builtin):
3284    """
3285    <dl>
3286    <dt>'StringRiffle[{s1, s2, s3, ...}]'
3287      <dd>returns a new string by concatenating all the $si$, with spaces inserted between them.
3288    <dt>'StringRiffle[list, sep]'
3289      <dd>inserts the separator $sep$ between all elements in $list$.
3290    <dt>'StringRiffle[list, {"left", "sep", "right"}]'
3291      <dd>use $left$ and $right$ as delimiters after concatenation.
3292
3293    ## These 2 forms are not currently implemented
3294    ## <dt>'StringRiffle[{{s11, s12, ...}, {s21, s22, ...}, ...}]'
3295    ##   <dd>returns a new string by concatenating the $sij$, and inserting spaces at the lowest level and newlines at the higher level.
3296    ## <dt>'StringRiffle[list, sep1, sep2, ...]'
3297    ##   <dd>inserts separator $sepi$ between elements of list at level i.
3298    </dl>
3299
3300    >> StringRiffle[{"a", "b", "c", "d", "e"}]
3301     = a b c d e
3302
3303    #> StringRiffle[{a, b, c, "d", e, "f"}]
3304     = a b c d e f
3305
3306    ## 1st is not a list
3307    #> StringRiffle["abcdef"]
3308     : List expected at position 1 in StringRiffle[abcdef].
3309     : StringRiffle called with 1 argument; 2 or more arguments are expected.
3310     = StringRiffle[abcdef]
3311
3312    #> StringRiffle[{"", "", ""}] // FullForm
3313     = "  "
3314
3315    ## This form is not supported
3316    #> StringRiffle[{{"a", "b"}, {"c", "d"}}]
3317     : Sublist form in position 1 is is not implemented yet.
3318     = StringRiffle[{{a, b}, {c, d}}]
3319
3320    >> StringRiffle[{"a", "b", "c", "d", "e"}, ", "]
3321     = a, b, c, d, e
3322
3323    #> StringRiffle[{"a", "b", "c", "d", "e"}, sep]
3324     : String expected at position 2 in StringRiffle[{a, b, c, d, e}, sep].
3325     = StringRiffle[{a, b, c, d, e}, sep]
3326
3327    >> StringRiffle[{"a", "b", "c", "d", "e"}, {"(", " ", ")"}]
3328     = (a b c d e)
3329
3330    #> StringRiffle[{"a", "b", "c", "d", "e"}, {" ", ")"}]
3331     : String expected at position 2 in StringRiffle[{a, b, c, d, e}, { , )}].
3332     = StringRiffle[{a, b, c, d, e}, { , )}]
3333    #> StringRiffle[{"a", "b", "c", "d", "e"}, {left, " ", "."}]
3334     : String expected at position 2 in StringRiffle[{a, b, c, d, e}, {left,  , .}].
3335     = StringRiffle[{a, b, c, d, e}, {left,  , .}]
3336
3337    ## This form is not supported
3338    #> StringRiffle[{"a", "b", "c"}, "+", "-"]
3339    ## Mathematica result: a+b+c, but we are not support multiple separators
3340     :  Multiple separators form is not implemented yet.
3341     = StringRiffle[{a, b, c}, +, -]
3342    """
3343
3344    attributes = ("ReadProtected",)
3345
3346    messages = {
3347        "list": "List expected at position `1` in `2`.",
3348        "argmu": "StringRiffle called with 1 argument; 2 or more arguments are expected.",
3349        "argm": "StringRiffle called with 0 arguments; 2 or more arguments are expected.",
3350        "string": "String expected at position `1` in `2`.",
3351        "sublist": "Sublist form in position 1 is is not implemented yet.",
3352        "mulsep": "Multiple separators form is not implemented yet.",
3353    }
3354
3355    def apply(self, liststr, seps, evaluation):
3356        "StringRiffle[liststr_, seps___]"
3357        separators = seps.get_sequence()
3358        exp = (
3359            Expression("StringRiffle", liststr, seps)
3360            if separators
3361            else Expression("StringRiffle", liststr)
3362        )
3363
3364        # Validate separators
3365        if len(separators) > 1:
3366            return evaluation.message("StringRiffle", "mulsep")
3367        elif len(separators) == 1:
3368            if separators[0].has_form("List", None):
3369                if len(separators[0].leaves) != 3 or any(
3370                    not isinstance(s, String) for s in separators[0].leaves
3371                ):
3372                    return evaluation.message("StringRiffle", "string", Integer(2), exp)
3373            elif not isinstance(separators[0], String):
3374                return evaluation.message("StringRiffle", "string", Integer(2), exp)
3375
3376        # Validate list of string
3377        if not liststr.has_form("List", None):
3378            evaluation.message("StringRiffle", "list", Integer1, exp)
3379            return evaluation.message("StringRiffle", "argmu", exp)
3380        elif any(leaf.has_form("List", None) for leaf in liststr.leaves):
3381            return evaluation.message("StringRiffle", "sublist")
3382
3383        # Determine the separation token
3384        left, right = "", ""
3385        if len(separators) == 0:
3386            sep = " "
3387        else:
3388            if separators[0].has_form("List", None):
3389                left = separators[0].leaves[0].value
3390                sep = separators[0].leaves[1].value
3391                right = separators[0].leaves[2].value
3392            else:
3393                sep = separators[0].get_string_value()
3394
3395        # Getting all together
3396        result = left
3397        for i in range(len(liststr.leaves)):
3398            text = (
3399                liststr.leaves[i]
3400                .format(evaluation, "System`OutputForm")
3401                .boxes_to_text(evaluation=evaluation)
3402            )
3403            if i == len(liststr.leaves) - 1:
3404                result += text + right
3405            else:
3406                result += text + sep
3407
3408        return String(result)
3409