1# -*- coding: utf-8 -*- 2 3""" 4Strings and Characters 5""" 6 7import io 8import re 9import sys 10from sys import version_info 11import unicodedata 12from binascii import hexlify, unhexlify 13from heapq import heappush, heappop 14from typing import Any, Callable, List 15 16from mathics.version import __version__ # noqa used in loading to check consistency. 17from mathics.builtin.base import ( 18 BinaryOperator, 19 Builtin, 20 Test, 21 Predefined, 22 PrefixOperator, 23) 24from mathics.core.expression import ( 25 Expression, 26 Symbol, 27 SymbolFailed, 28 SymbolFalse, 29 SymbolTrue, 30 SymbolList, 31 String, 32 Integer, 33 Integer0, 34 Integer1, 35 from_python, 36 string_list, 37) 38from mathics.core.parser import MathicsFileLineFeeder, parse 39from mathics.builtin.lists import python_seq, convert_seq 40from mathics.settings import SYSTEM_CHARACTER_ENCODING 41from mathics_scanner import TranslateError 42 43_regex_longest = { 44 "+": "+", 45 "*": "*", 46} 47 48_regex_shortest = { 49 "+": "+?", 50 "*": "*?", 51} 52 53 54alphabet_descriptions = { 55 "English": { 56 "Lowercase": "abcdefghijklmnopqrstuvwxyz", 57 "Uppercase": "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 58 }, 59 "Spanish": { 60 "Lowercase": "abcdefghijklmnñopqrstuvwxyz", 61 "Uppercase": "ABCDEFGHIJKLMNÑOPQRSTUVWXYZ", 62 }, 63 "Greek": { 64 "Lowercase": "αβγδεζηθικλμνξοπρστυφχψω", 65 "Uppercase": "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", 66 }, 67 "Cyrillic": { 68 "Lowercase": "абвгґдђѓеёєжзѕиіїйјклљмнњопрстћќуўфхцчџшщъыьэюя", 69 "Uppercase": "АБВГҐДЂЃЕЁЄЖЗЅИІЇЙЈКЛЉМНЊОПРСТЋЌУЎФХЦЧЏШЩЪЫЬЭЮЯ", 70 }, 71} 72 73alphabet_alias={ 74 "English": "English", 75 "French": "English", 76 "German": "English", 77 "Spanish": "Spanish", 78 "Greek": "Greek", 79 "Cyrillic": "Cyrillic", 80 "Russian": "Cyrillic", 81} 82 83 84def _encode_pname(name): 85 return "n" + hexlify(name.encode("utf8")).decode("utf8") 86 87 88def _decode_pname(name): 89 return unhexlify(name[1:]).decode("utf8") 90 91 92def _evaluate_match(s, m, evaluation): 93 replace = dict( 94 (_decode_pname(name), String(value)) for name, value in m.groupdict().items() 95 ) 96 return s.replace_vars(replace, in_scoping=False).evaluate(evaluation) 97 98 99def _parallel_match(text, rules, flags, limit): 100 heap = [] 101 102 def push(i, iter, form): 103 m = None 104 try: 105 m = next(iter) 106 except StopIteration: 107 pass 108 if m is not None: 109 heappush(heap, (m.start(), i, m, form, iter)) 110 111 for i, (patt, form) in enumerate(rules): 112 push(i, re.finditer(patt, text, flags=flags), form) 113 114 k = 0 115 n = 0 116 117 while heap: 118 start, i, match, form, iter = heappop(heap) 119 120 if start >= k: 121 yield match, form 122 123 n += 1 124 if n >= limit > 0: 125 break 126 127 k = match.end() 128 129 push(i, iter, form) 130 131 132def to_regex( 133 expr, evaluation, q=_regex_longest, groups=None, abbreviated_patterns=False 134): 135 if expr is None: 136 return None 137 138 if groups is None: 139 groups = {} 140 141 def recurse(x, quantifiers=q): 142 return to_regex(x, evaluation, q=quantifiers, groups=groups) 143 144 if isinstance(expr, String): 145 result = expr.get_string_value() 146 if abbreviated_patterns: 147 pieces = [] 148 i, j = 0, 0 149 while j < len(result): 150 c = result[j] 151 if c == "\\" and j + 1 < len(result): 152 pieces.append(re.escape(result[i:j])) 153 pieces.append(re.escape(result[j + 1])) 154 j += 2 155 i = j 156 elif c == "*": 157 pieces.append(re.escape(result[i:j])) 158 pieces.append("(.*)") 159 j += 1 160 i = j 161 elif c == "@": 162 pieces.append(re.escape(result[i:j])) 163 # one or more characters, excluding uppercase letters 164 pieces.append("([^A-Z]+)") 165 j += 1 166 i = j 167 else: 168 j += 1 169 pieces.append(re.escape(result[i:j])) 170 result = "".join(pieces) 171 else: 172 result = re.escape(result) 173 return result 174 if expr.has_form("RegularExpression", 1): 175 regex = expr.leaves[0].get_string_value() 176 if regex is None: 177 return regex 178 try: 179 re.compile(regex) 180 # Don't return the compiled regex because it may need to composed 181 # further e.g. StringExpression["abc", RegularExpression[regex2]]. 182 return regex 183 except re.error: 184 return None # invalid regex 185 186 if isinstance(expr, Symbol): 187 return { 188 "System`NumberString": r"[-|+]?(\d+(\.\d*)?|\.\d+)?", 189 "System`Whitespace": r"(?u)\s+", 190 "System`DigitCharacter": r"\d", 191 "System`WhitespaceCharacter": r"(?u)\s", 192 "System`WordCharacter": r"(?u)[^\W_]", 193 "System`StartOfLine": r"^", 194 "System`EndOfLine": r"$", 195 "System`StartOfString": r"\A", 196 "System`EndOfString": r"\Z", 197 "System`WordBoundary": r"\b", 198 "System`LetterCharacter": r"(?u)[^\W_0-9]", 199 "System`HexidecimalCharacter": r"[0-9a-fA-F]", 200 }.get(expr.get_name()) 201 202 if expr.has_form("CharacterRange", 2): 203 (start, stop) = (leaf.get_string_value() for leaf in expr.leaves) 204 if all(x is not None and len(x) == 1 for x in (start, stop)): 205 return "[{0}-{1}]".format(re.escape(start), re.escape(stop)) 206 207 if expr.has_form("Blank", 0): 208 return r"(.|\n)" 209 if expr.has_form("BlankSequence", 0): 210 return r"(.|\n)" + q["+"] 211 if expr.has_form("BlankNullSequence", 0): 212 return r"(.|\n)" + q["*"] 213 if expr.has_form("Except", 1, 2): 214 if len(expr.leaves) == 1: 215 leaves = [expr.leaves[0], Expression("Blank")] 216 else: 217 leaves = [expr.leaves[0], expr.leaves[1]] 218 leaves = [recurse(leaf) for leaf in leaves] 219 if all(leaf is not None for leaf in leaves): 220 return "(?!{0}){1}".format(*leaves) 221 if expr.has_form("Characters", 1): 222 leaf = expr.leaves[0].get_string_value() 223 if leaf is not None: 224 return "[{0}]".format(re.escape(leaf)) 225 if expr.has_form("StringExpression", None): 226 leaves = [recurse(leaf) for leaf in expr.leaves] 227 if None in leaves: 228 return None 229 return "".join(leaves) 230 if expr.has_form("Repeated", 1): 231 leaf = recurse(expr.leaves[0]) 232 if leaf is not None: 233 return "({0})".format(leaf) + q["+"] 234 if expr.has_form("RepeatedNull", 1): 235 leaf = recurse(expr.leaves[0]) 236 if leaf is not None: 237 return "({0})".format(leaf) + q["*"] 238 if expr.has_form("Alternatives", None): 239 leaves = [recurse(leaf) for leaf in expr.leaves] 240 if all(leaf is not None for leaf in leaves): 241 return "|".join(leaves) 242 if expr.has_form("Shortest", 1): 243 return recurse(expr.leaves[0], quantifiers=_regex_shortest) 244 if expr.has_form("Longest", 1): 245 return recurse(expr.leaves[0], quantifiers=_regex_longest) 246 if expr.has_form("Pattern", 2) and isinstance(expr.leaves[0], Symbol): 247 name = expr.leaves[0].get_name() 248 patt = groups.get(name, None) 249 if patt is not None: 250 if expr.leaves[1].has_form("Blank", 0): 251 pass # ok, no warnings 252 elif not expr.leaves[1].sameQ(patt): 253 evaluation.message( 254 "StringExpression", "cond", expr.leaves[0], expr, expr.leaves[0] 255 ) 256 return "(?P=%s)" % _encode_pname(name) 257 else: 258 groups[name] = expr.leaves[1] 259 return "(?P<%s>%s)" % (_encode_pname(name), recurse(expr.leaves[1])) 260 261 return None 262 263 264def anchor_pattern(patt): 265 """ 266 anchors a regex in order to force matching against an entire string. 267 """ 268 if not patt.endswith(r"\Z"): 269 patt = patt + r"\Z" 270 if not patt.startswith(r"\A"): 271 patt = r"\A" + patt 272 return patt 273 274 275def mathics_split(patt, string, flags): 276 """ 277 Python's re.split includes the text of groups if they are capturing. 278 279 Furthermore, you can't split on empty matches. Trying to do this returns 280 the original string for Python < 3.5, raises a ValueError for 281 Python >= 3.5, <= X and works as expected for Python >= X, where 'X' is 282 some future version of Python (> 3.6). 283 284 For these reasons we implement our own split. 285 """ 286 # (start, end) indices of splits 287 indices = list((m.start(), m.end()) for m in re.finditer(patt, string, flags)) 288 289 # (start, end) indices of stuff to keep 290 indices = [(None, 0)] + indices + [(len(string), None)] 291 indices = [(indices[i][1], indices[i + 1][0]) for i in range(len(indices) - 1)] 292 293 # slice up the string 294 return [string[start:stop] for start, stop in indices] 295 296 297if version_info >= (3, 0): 298 299 def pack_bytes(codes): 300 return bytes(codes) 301 302 def unpack_bytes(codes): 303 return [int(code) for code in codes] 304 305 306else: 307 from struct import pack, unpack 308 309 def pack_bytes(codes): 310 return pack("B" * len(codes), *codes) 311 312 def unpack_bytes(codes): 313 return unpack("B" * len(codes), codes) 314 315 316class SystemCharacterEncoding(Predefined): 317 """ 318 <dl> 319 <dt>$SystemCharacterEncoding 320 321 </dl> 322 """ 323 324 name = "$SystemCharacterEncoding" 325 326 rules = { 327 "$SystemCharacterEncoding": '"' + SYSTEM_CHARACTER_ENCODING + '"', 328 } 329 330 331class CharacterEncoding(Predefined): 332 """ 333 <dl> 334 <dt>'CharacterEncoding' 335 <dd>specifies the default character encoding to use if no other encoding is 336 specified. 337 </dl> 338 """ 339 340 name = "$CharacterEncoding" 341 value = '"UTF-8"' 342 343 rules = { 344 "$CharacterEncoding": value, 345 } 346 347 348_encodings = { 349 # see https://docs.python.org/2/library/codecs.html#standard-encodings 350 "ASCII": "ascii", 351 "CP949": "cp949", 352 "CP950": "cp950", 353 "EUC-JP": "euc_jp", 354 "IBM-850": "cp850", 355 "ISOLatin1": "iso8859_1", 356 "ISOLatin2": "iso8859_2", 357 "ISOLatin3": "iso8859_3", 358 "ISOLatin4": "iso8859_4", 359 "ISOLatinCyrillic": "iso8859_5", 360 "ISO8859-1": "iso8859_1", 361 "ISO8859-2": "iso8859_2", 362 "ISO8859-3": "iso8859_3", 363 "ISO8859-4": "iso8859_4", 364 "ISO8859-5": "iso8859_5", 365 "ISO8859-6": "iso8859_6", 366 "ISO8859-7": "iso8859_7", 367 "ISO8859-8": "iso8859_8", 368 "ISO8859-9": "iso8859_9", 369 "ISO8859-10": "iso8859_10", 370 "ISO8859-13": "iso8859_13", 371 "ISO8859-14": "iso8859_14", 372 "ISO8859-15": "iso8859_15", 373 "ISO8859-16": "iso8859_16", 374 "koi8-r": "koi8_r", 375 "MacintoshCyrillic": "mac_cyrillic", 376 "MacintoshGreek": "mac_greek", 377 "MacintoshIcelandic": "mac_iceland", 378 "MacintoshRoman": "mac_roman", 379 "MacintoshTurkish": "mac_turkish", 380 "ShiftJIS": "shift_jis", 381 "Unicode": "utf_16", 382 "UTF-8": "utf_8", 383 "UTF8": "utf_8", 384 "WindowsANSI": "cp1252", 385 "WindowsBaltic": "cp1257", 386 "WindowsCyrillic": "cp1251", 387 "WindowsEastEurope": "cp1250", 388 "WindowsGreek": "cp1253", 389 "WindowsTurkish": "cp1254", 390} 391 392 393def to_python_encoding(encoding): 394 return _encodings.get(encoding) 395 396 397class CharacterEncodings(Predefined): 398 name = "$CharacterEncodings" 399 value = "{%s}" % ",".join(map(lambda s: '"%s"' % s, _encodings.keys())) 400 401 rules = { 402 "$CharacterEncodings": value, 403 } 404 405 406class StringExpression(BinaryOperator): 407 """ 408 <dl> 409 <dt>'StringExpression[s_1, s_2, ...]' 410 <dd>represents a sequence of strings and symbolic string objects $s_i$. 411 </dl> 412 413 >> "a" ~~ "b" // FullForm 414 = "ab" 415 416 #> "a" ~~ "b" ~~ "c" // FullForm 417 = "abc" 418 419 #> a ~~ b 420 = a ~~ b 421 """ 422 423 operator = "~~" 424 precedence = 135 425 attributes = ("Flat", "OneIdentity", "Protected") 426 427 messages = { 428 "invld": "Element `1` is not a valid string or pattern element in `2`.", 429 "cond": "Ignored restriction given for `1` in `2` as it does not match previous occurences of `1`.", 430 } 431 432 def apply(self, args, evaluation): 433 "StringExpression[args__String]" 434 args = args.get_sequence() 435 args = [arg.get_string_value() for arg in args] 436 if None in args: 437 return 438 return String("".join(args)) 439 440 441class RegularExpression(Builtin): 442 r""" 443 <dl> 444 <dt>'RegularExpression["regex"]' 445 <dd>represents the regex specified by the string $"regex"$. 446 </dl> 447 448 >> StringSplit["1.23, 4.56 7.89", RegularExpression["(\\s|,)+"]] 449 = {1.23, 4.56, 7.89} 450 451 #> RegularExpression["[abc]"] 452 = RegularExpression[[abc]] 453 454 ## Mathematica doesn't seem to verify the correctness of regex 455 #> StringSplit["ab23c", RegularExpression["[0-9]++"]] 456 : Element RegularExpression[[0-9]++] is not a valid string or pattern element in RegularExpression[[0-9]++]. 457 = StringSplit[ab23c, RegularExpression[[0-9]++]] 458 459 #> StringSplit["ab23c", RegularExpression[2]] 460 : Element RegularExpression[2] is not a valid string or pattern element in RegularExpression[2]. 461 = StringSplit[ab23c, RegularExpression[2]] 462 """ 463 464 465class NumberString(Builtin): 466 """ 467 <dl> 468 <dt>'NumberString' 469 <dd>represents the characters in a number. 470 </dl> 471 472 >> StringMatchQ["1234", NumberString] 473 = True 474 475 >> StringMatchQ["1234.5", NumberString] 476 = True 477 478 >> StringMatchQ["1.2`20", NumberString] 479 = False 480 481 #> StringMatchQ[".12", NumberString] 482 = True 483 #> StringMatchQ["12.", NumberString] 484 = True 485 #> StringMatchQ["12.31.31", NumberString] 486 = False 487 #> StringMatchQ[".", NumberString] 488 = False 489 #> StringMatchQ["-1.23", NumberString] 490 = True 491 #> StringMatchQ["+12.3", NumberString] 492 = True 493 #> StringMatchQ["+.2", NumberString] 494 = True 495 #> StringMatchQ["1.2e4", NumberString] 496 = False 497 """ 498 499 500class DigitCharacter(Builtin): 501 """ 502 <dl> 503 <dt>'DigitCharacter' 504 <dd>represents the digits 0-9. 505 </dl> 506 507 >> StringMatchQ["1", DigitCharacter] 508 = True 509 >> StringMatchQ["a", DigitCharacter] 510 = False 511 >> StringMatchQ["12", DigitCharacter] 512 = False 513 514 >> StringMatchQ["123245", DigitCharacter..] 515 = True 516 517 #> StringMatchQ["123245a6", DigitCharacter..] 518 = False 519 """ 520 521 522class Whitespace(Builtin): 523 r""" 524 <dl> 525 <dt>'Whitespace' 526 <dd>represents a sequence of whitespace characters. 527 </dl> 528 529 >> StringMatchQ["\r \n", Whitespace] 530 = True 531 532 >> StringSplit["a \n b \r\n c d", Whitespace] 533 = {a, b, c, d} 534 535 >> StringReplace[" this has leading and trailing whitespace \n ", (StartOfString ~~ Whitespace) | (Whitespace ~~ EndOfString) -> ""] <> " removed" // FullForm 536 = "this has leading and trailing whitespace removed" 537 """ 538 539 540class WhitespaceCharacter(Builtin): 541 r""" 542 <dl> 543 <dt>'WhitespaceCharacter' 544 <dd>represents a single whitespace character. 545 </dl> 546 547 >> StringMatchQ["\n", WhitespaceCharacter] 548 = True 549 550 >> StringSplit["a\nb\r\nc\rd", WhitespaceCharacter] 551 = {a, b, c, d} 552 553 For sequences of whitespace characters use 'Whitespace': 554 >> StringMatchQ[" \n", WhitespaceCharacter] 555 = False 556 >> StringMatchQ[" \n", Whitespace] 557 = True 558 """ 559 560 561class WordCharacter(Builtin): 562 r""" 563 <dl> 564 <dt>'WordCharacter' 565 <dd>represents a single letter or digit character. 566 </dl> 567 568 >> StringMatchQ[#, WordCharacter] &/@ {"1", "a", "A", ",", " "} 569 = {True, True, True, False, False} 570 571 Test whether a string is alphanumeric: 572 >> StringMatchQ["abc123DEF", WordCharacter..] 573 = True 574 >> StringMatchQ["$b;123", WordCharacter..] 575 = False 576 """ 577 578 579class StartOfString(Builtin): 580 r""" 581 <dl> 582 <dt>'StartOfString' 583 <dd>represents the start of a string. 584 </dl> 585 586 Test whether strings start with "a": 587 >> StringMatchQ[#, StartOfString ~~ "a" ~~ __] &/@ {"apple", "banana", "artichoke"} 588 = {True, False, True} 589 590 >> StringReplace["aba\nabb", StartOfString ~~ "a" -> "c"] 591 = cba 592 . abb 593 """ 594 595 596class EndOfString(Builtin): 597 r""" 598 <dl> 599 <dt>'EndOfString' 600 <dd>represents the end of a string. 601 </dl> 602 603 Test whether strings end with "e": 604 >> StringMatchQ[#, __ ~~ "e" ~~ EndOfString] &/@ {"apple", "banana", "artichoke"} 605 = {True, False, True} 606 607 >> StringReplace["aab\nabb", "b" ~~ EndOfString -> "c"] 608 = aab 609 . abc 610 """ 611 612 613class StartOfLine(Builtin): 614 r""" 615 <dl> 616 <dt>'StartOfString' 617 <dd>represents the start of a line in a string. 618 </dl> 619 620 >> StringReplace["aba\nbba\na\nab", StartOfLine ~~ "a" -> "c"] 621 = cba 622 . bba 623 . c 624 . cb 625 626 >> StringSplit["abc\ndef\nhij", StartOfLine] 627 = {abc 628 . , def 629 . , hij} 630 """ 631 632 633class EndOfLine(Builtin): 634 r""" 635 <dl> 636 <dt>'EndOfString' 637 <dd>represents the end of a line in a string. 638 </dl> 639 640 >> StringReplace["aba\nbba\na\nab", "a" ~~ EndOfLine -> "c"] 641 = abc 642 . bbc 643 . c 644 . ab 645 646 >> StringSplit["abc\ndef\nhij", EndOfLine] 647 = {abc, 648 . def, 649 . hij} 650 """ 651 652 653class WordBoundary(Builtin): 654 """ 655 <dl> 656 <dt>'WordBoundary' 657 <dd>represents the boundary between words. 658 </dl> 659 660 >> StringReplace["apple banana orange artichoke", "e" ~~ WordBoundary -> "E"] 661 = applE banana orangE artichokE 662 """ 663 664 665class LetterCharacter(Builtin): 666 """ 667 <dl> 668 <dt>'LetterCharacter' 669 <dd>represents letters. 670 </dl> 671 672 >> StringMatchQ[#, LetterCharacter] & /@ {"a", "1", "A", " ", "."} 673 = {True, False, True, False, False} 674 675 LetterCharacter also matches unicode characters. 676 >> StringMatchQ["\\[Lambda]", LetterCharacter] 677 = True 678 """ 679 680 681# FIXME: Generalize string.lower() and ord() 682def letter_number(chars: List[str], start_ord) -> List["Integer"]: 683 # Note caller has verified that everything isalpha() and 684 # each char has length 1. 685 return [Integer(ord(char.lower()) - start_ord) for char in chars] 686 687 688class Alphabet(Builtin): 689 """ 690 <dl> 691 <dt>'Alphabet'[] 692 <dd>gives the list of lowercase letters a-z in the English alphabet . 693 694 <dt>'Alphabet[$type$]' 695 <dd> gives the alphabet for the language or class $type$. 696 </dl> 697 698 >> Alphabet[] 699 = {a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z} 700 >> Alphabet["German"] 701 = {a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z} 702 703 """ 704 messages = { 705 "nalph": "The alphabet `` is not known or not available.", 706 } 707 708 rules = { 709 "Alphabet[]": """Alphabet["English"]""", 710 } 711 712 def apply(self, alpha, evaluation): 713 """Alphabet[alpha_String]""" 714 alphakey = alpha.get_string_value() 715 alphakey = alphabet_alias[alphakey] 716 if alphakey is None: 717 evaluation.message("Alphabet", "nalph", alpha) 718 return 719 alphabet = alphabet_descriptions.get(alphakey, None) 720 if alphabet is None: 721 evaluation.message("Alphabet", "nalph", alpha) 722 return 723 return Expression(SymbolList, *[String(c) for c in alphabet["Lowercase"]]) 724 725 726class LetterNumber(Builtin): 727 r""" 728 <dl> 729 <dt>'LetterNumber'[$c$] 730 <dd>returns the position of the character $c$ in the English alphabet. 731 732 <dt>'LetterNumber["string"]' 733 <dd>returns a list of the positions of characters in string. 734 <dt>'LetterNumber["string", $alpha$]' 735 <dd>returns a list of the positions of characters in string, regarding the alphabet $alpha$. 736 </dl> 737 738 >> LetterNumber["b"] 739 = 2 740 741 LetterNumber also works with uppercase characters 742 >> LetterNumber["B"] 743 = 2 744 745 >> LetterNumber["ss2!"] 746 = {19, 19, 0, 0} 747 748 Get positions of each of the letters in a string: 749 >> LetterNumber[Characters["Peccary"]] 750 = {16, 5, 3, 3, 1, 18, 25} 751 752 >> LetterNumber[{"P", "Pe", "P1", "eck"}] 753 = {16, {16, 5}, {16, 0}, {5, 3, 11}} 754 755 #> LetterNumber[4] 756 : The argument 4 is not a string. 757 = LetterNumber[4] 758 759 >> LetterNumber["\[Beta]", "Greek"] 760 = 2 761 762 """ 763 # FIXME: put the right unicode characters in a way that the 764 # following test works... 765 r""" 766 # #> LetterNumber["\[CapitalBeta]", "Greek"] 767 # = 2 768 769 """ 770 messages = { 771 "nalph": "The alphabet `` is not known or not available.", 772 "nas": ("The argument `1` is not a string."), 773 } 774 775 def apply_alpha_str(self, chars: List[Any], alpha: String, evaluation): 776 "LetterNumber[chars_, alpha_String]" 777 alphakey = alpha.get_string_value() 778 alphakey = alphabet_alias.get(alphakey, None) 779 if alphakey is None: 780 evaluation.message("LetterNumber", "nalph", alpha) 781 return 782 if alphakey == "English": 783 return self.apply(chars, evaluation) 784 alphabet = alphabet_descriptions.get(alphakey, None) 785 if alphabet is None: 786 evaluation.message("LetterNumber", "nalph", alpha) 787 return 788 # TODO: handle Uppercase 789 if isinstance(chars, String): 790 py_chars = chars.get_string_value() 791 if len(py_chars) == 1: 792 # FIXME generalize ord("a") 793 res = alphabet["Lowercase"].find(py_chars) + 1 794 if res == -1: 795 res = alphabet["Uppercase"].find(py_chars) + 1 796 return Integer(res) 797 else: 798 r = [] 799 for c in py_chars: 800 cp = alphabet["Lowercase"].find(c) + 1 801 if cp == -1: 802 cp = alphabet["Uppercase"].find(c) + 1 803 r.append(cp) 804 return Expression(SymbolList, *r) 805 elif chars.has_form("List", 1, None): 806 result = [] 807 for leaf in chars.leaves: 808 result.append(self.apply_alpha_str(leaf, alpha, evaluation)) 809 return Expression(SymbolList, *result) 810 else: 811 return evaluation.message(self.__class__.__name__, "nas", chars) 812 return None 813 814 def apply(self, chars: List[Any], evaluation): 815 "LetterNumber[chars_]" 816 817 start_ord = ord("a") - 1 818 if isinstance(chars, String): 819 py_chars = chars.get_string_value() 820 if len(py_chars) == 1: 821 # FIXME generalize ord("a") 822 return letter_number([py_chars[0]], start_ord)[0] 823 else: 824 r = [ 825 letter_number(c, start_ord)[0] if c.isalpha() else 0 826 for c in py_chars 827 ] 828 return Expression(SymbolList, *r) 829 elif chars.has_form("List", 1, None): 830 result = [] 831 for leaf in chars.leaves: 832 result.append(self.apply(leaf, evaluation)) 833 return Expression(SymbolList, *result) 834 else: 835 return evaluation.message(self.__class__.__name__, "nas", chars) 836 return None 837 838 839class HexidecimalCharacter(Builtin): 840 """ 841 <dl> 842 <dt>'HexidecimalCharacter' 843 <dd>represents the characters 0-9, a-f and A-F. 844 </dl> 845 846 >> StringMatchQ[#, HexidecimalCharacter] & /@ {"a", "1", "A", "x", "H", " ", "."} 847 = {True, True, True, False, False, False, False} 848 """ 849 850 851class DigitQ(Builtin): 852 """ 853 <dl> 854 <dt>'DigitQ[$string$]' 855 yields 'True' if all the characters in the $string$ are digits, and yields 'False' otherwise. 856 </dl> 857 858 >> DigitQ["9"] 859 = True 860 861 >> DigitQ["a"] 862 = False 863 864 >> DigitQ["01001101011000010111010001101000011010010110001101110011"] 865 = True 866 867 >> DigitQ["-123456789"] 868 = False 869 870 #> DigitQ[""] 871 = True 872 873 #> DigitQ["."] 874 = False 875 876 #> DigitQ[1==2] 877 = False 878 879 #> DigitQ[a=1] 880 = False 881 """ 882 883 rules = { 884 "DigitQ[string_]": ( 885 "If[StringQ[string], StringMatchQ[string, DigitCharacter...], False, False]" 886 ), 887 } 888 889 890class LetterQ(Builtin): 891 """ 892 <dl> 893 <dt>'LetterQ[$string$]' 894 yields 'True' if all the characters in the $string$ are letters, and yields 'False' otherwise. 895 </dl> 896 897 >> LetterQ["m"] 898 = True 899 900 >> LetterQ["9"] 901 = False 902 903 >> LetterQ["Mathics"] 904 = True 905 906 >> LetterQ["Welcome to Mathics"] 907 = False 908 909 #> LetterQ[""] 910 = True 911 912 #> LetterQ["\\[Alpha]\\[Beta]\\[Gamma]\\[Delta]\\[Epsilon]\\[Zeta]\\[Eta]\\[Theta]"] 913 = True 914 """ 915 916 rules = { 917 "LetterQ[string_]": ( 918 "If[StringQ[string], StringMatchQ[string, LetterCharacter...], False, False]" 919 ), 920 } 921 922 923class StringMatchQ(Builtin): 924 r""" 925 >> StringMatchQ["abc", "abc"] 926 = True 927 928 >> StringMatchQ["abc", "abd"] 929 = False 930 931 >> StringMatchQ["15a94xcZ6", (DigitCharacter | LetterCharacter)..] 932 = True 933 934 #> StringMatchQ["abc1", LetterCharacter] 935 = False 936 937 #> StringMatchQ["abc", "ABC"] 938 = False 939 #> StringMatchQ["abc", "ABC", IgnoreCase -> True] 940 = True 941 942 ## Words containing nonword characters 943 #> StringMatchQ[{"monkey", "don't", "AAA", "S&P"}, ___ ~~ Except[WordCharacter] ~~ ___] 944 = {False, True, False, True} 945 946 ## Try to match a literal number 947 #> StringMatchQ[1.5, NumberString] 948 : String or list of strings expected at position 1 in StringMatchQ[1.5, NumberString]. 949 = StringMatchQ[1.5, NumberString] 950 951 Use StringMatchQ as an operator 952 >> StringMatchQ[LetterCharacter]["a"] 953 = True 954 955 ## Abbreviated string patterns Issue #517 956 #> StringMatchQ["abcd", "abc*"] 957 = True 958 #> StringMatchQ["abc", "abc*"] 959 = True 960 #> StringMatchQ["abc\\", "abc\\"] 961 = True 962 #> StringMatchQ["abc*d", "abc\\*d"] 963 = True 964 #> StringMatchQ["abc*d", "abc\\**"] 965 = True 966 #> StringMatchQ["abcde", "a*f"] 967 = False 968 969 #> StringMatchQ["abcde", "a@e"] 970 = True 971 #> StringMatchQ["aBCDe", "a@e"] 972 = False 973 #> StringMatchQ["ae", "a@e"] 974 = False 975 """ 976 977 attributes = ("Listable",) 978 979 options = { 980 "IgnoreCase": "False", 981 "SpellingCorrections": "None", 982 } 983 984 messages = { 985 "strse": "String or list of strings expected at position `1` in `2`.", 986 } 987 988 rules = { 989 "StringMatchQ[patt_][expr_]": "StringMatchQ[expr, patt]", 990 } 991 992 def apply(self, string, patt, evaluation, options): 993 "StringMatchQ[string_, patt_, OptionsPattern[%(name)s]]" 994 py_string = string.get_string_value() 995 if py_string is None: 996 return evaluation.message( 997 "StringMatchQ", 998 "strse", 999 Integer1, 1000 Expression("StringMatchQ", string, patt), 1001 ) 1002 1003 re_patt = to_regex(patt, evaluation, abbreviated_patterns=True) 1004 if re_patt is None: 1005 return evaluation.message( 1006 "StringExpression", "invld", patt, Expression("StringExpression", patt) 1007 ) 1008 1009 re_patt = anchor_pattern(re_patt) 1010 1011 flags = re.MULTILINE 1012 if options["System`IgnoreCase"] == SymbolTrue: 1013 flags = flags | re.IGNORECASE 1014 1015 if re.match(re_patt, py_string, flags=flags) is None: 1016 return SymbolFalse 1017 else: 1018 return SymbolTrue 1019 1020 1021class StringJoin(BinaryOperator): 1022 """ 1023 <dl> 1024 <dt>'StringJoin["$s1$", "$s2$", ...]' 1025 <dd>returns the concatenation of the strings $s1$, $s2$, …. 1026 </dl> 1027 1028 >> StringJoin["a", "b", "c"] 1029 = abc 1030 >> "a" <> "b" <> "c" // InputForm 1031 = "abc" 1032 1033 'StringJoin' flattens lists out: 1034 >> StringJoin[{"a", "b"}] // InputForm 1035 = "ab" 1036 >> Print[StringJoin[{"Hello", " ", {"world"}}, "!"]] 1037 | Hello world! 1038 """ 1039 1040 operator = "<>" 1041 precedence = 600 1042 attributes = ("Flat", "OneIdentity") 1043 1044 def apply(self, items, evaluation): 1045 "StringJoin[items___]" 1046 1047 result = "" 1048 items = items.flatten(SymbolList) 1049 if items.get_head_name() == "System`List": 1050 items = items.leaves 1051 else: 1052 items = items.get_sequence() 1053 for item in items: 1054 if not isinstance(item, String): 1055 evaluation.message("StringJoin", "string") 1056 return 1057 result += item.value 1058 return String(result) 1059 1060 1061class StringSplit(Builtin): 1062 """ 1063 <dl> 1064 <dt>'StringSplit["$s$"]' 1065 <dd>splits the string $s$ at whitespace, discarding the 1066 whitespace and returning a list of strings. 1067 <dt>'StringSplit["$s$", "$d$"]' 1068 <dd>splits $s$ at the delimiter $d$. 1069 <dt>'StringSplit[$s$, {"$d1$", "$d2$", ...}]' 1070 <dd>splits $s$ using multiple delimiters. 1071 <dt>'StringSplit[{$s_1$, $s_2, ...}, {"$d1$", "$d2$", ...}]' 1072 <dd>returns a list with the result of applying the function to 1073 each element. 1074 </dl> 1075 1076 >> StringSplit["abc,123", ","] 1077 = {abc, 123} 1078 1079 >> StringSplit["abc 123"] 1080 = {abc, 123} 1081 1082 #> StringSplit[" abc 123 "] 1083 = {abc, 123} 1084 1085 >> StringSplit["abc,123.456", {",", "."}] 1086 = {abc, 123, 456} 1087 1088 >> StringSplit["a b c", RegularExpression[" +"]] 1089 = {a, b, c} 1090 1091 >> StringSplit[{"a b", "c d"}, RegularExpression[" +"]] 1092 = {{a, b}, {c, d}} 1093 1094 #> StringSplit["x", "x"] 1095 = {} 1096 1097 #> StringSplit[x] 1098 : String or list of strings expected at position 1 in StringSplit[x]. 1099 = StringSplit[x, Whitespace] 1100 1101 #> StringSplit["x", x] 1102 : Element x is not a valid string or pattern element in x. 1103 = StringSplit[x, x] 1104 1105 #> StringSplit["12312123", "12"..] 1106 = {3, 3} 1107 1108 #> StringSplit["abaBa", "b"] 1109 = {a, aBa} 1110 #> StringSplit["abaBa", "b", IgnoreCase -> True] 1111 = {a, a, a} 1112 """ 1113 1114 rules = { 1115 "StringSplit[s_]": "StringSplit[s, Whitespace]", 1116 } 1117 1118 options = { 1119 "IgnoreCase": "False", 1120 "MetaCharacters": "None", 1121 } 1122 1123 messages = { 1124 "strse": "String or list of strings expected at position `1` in `2`.", 1125 "pysplit": "As of Python 3.5 re.split does not handle empty pattern matches.", 1126 } 1127 1128 def apply(self, string, patt, evaluation, options): 1129 "StringSplit[string_, patt_, OptionsPattern[%(name)s]]" 1130 1131 if string.get_head_name() == "System`List": 1132 leaves = [self.apply(s, patt, evaluation, options) for s in string._leaves] 1133 return Expression(SymbolList, *leaves) 1134 1135 py_string = string.get_string_value() 1136 1137 if py_string is None: 1138 return evaluation.message( 1139 "StringSplit", "strse", Integer1, Expression("StringSplit", string) 1140 ) 1141 1142 if patt.has_form("List", None): 1143 patts = patt.get_leaves() 1144 else: 1145 patts = [patt] 1146 re_patts = [] 1147 for p in patts: 1148 py_p = to_regex(p, evaluation) 1149 if py_p is None: 1150 return evaluation.message("StringExpression", "invld", p, patt) 1151 re_patts.append(py_p) 1152 1153 flags = re.MULTILINE 1154 if options["System`IgnoreCase"] == SymbolTrue: 1155 flags = flags | re.IGNORECASE 1156 1157 result = [py_string] 1158 for re_patt in re_patts: 1159 result = [t for s in result for t in mathics_split(re_patt, s, flags=flags)] 1160 1161 return string_list(SymbolList, [String(x) for x in result if x != ""], evaluation) 1162 1163 1164class StringPosition(Builtin): 1165 """ 1166 <dl> 1167 <dt>'StringPosition["$string$", $patt$]' 1168 <dd>gives a list of starting and ending positions where $patt$ matches "$string$". 1169 <dt>'StringPosition["$string$", $patt$, $n$]' 1170 <dd>returns the first $n$ matches only. 1171 <dt>'StringPosition["$string$", {$patt1$, $patt2$, ...}, $n$]' 1172 <dd>matches multiple patterns. 1173 <dt>'StringPosition[{$s1$, $s2$, ...}, $patt$]' 1174 <dd>returns a list of matches for multiple strings. 1175 </dl> 1176 1177 >> StringPosition["123ABCxyABCzzzABCABC", "ABC"] 1178 = {{4, 6}, {9, 11}, {15, 17}, {18, 20}} 1179 1180 >> StringPosition["123ABCxyABCzzzABCABC", "ABC", 2] 1181 = {{4, 6}, {9, 11}} 1182 1183 'StringPosition' can be useful for searching through text. 1184 >> data = Import["ExampleData/EinsteinSzilLetter.txt"]; 1185 >> StringPosition[data, "uranium"] 1186 = {{299, 305}, {870, 876}, {1538, 1544}, {1671, 1677}, {2300, 2306}, {2784, 2790}, {3093, 3099}} 1187 1188 #> StringPosition["123ABCxyABCzzzABCABC", "ABC", -1] 1189 : Non-negative integer or Infinity expected at position 3 in StringPosition[123ABCxyABCzzzABCABC, ABC, -1]. 1190 = StringPosition[123ABCxyABCzzzABCABC, ABC, -1] 1191 1192 ## Overlaps 1193 #> StringPosition["1231221312112332", RegularExpression["[12]+"]] 1194 = {{1, 2}, {2, 2}, {4, 7}, {5, 7}, {6, 7}, {7, 7}, {9, 13}, {10, 13}, {11, 13}, {12, 13}, {13, 13}, {16, 16}} 1195 #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> False] 1196 = {{1, 2}, {4, 7}, {9, 13}, {16, 16}} 1197 #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> x] 1198 = {{1, 2}, {4, 7}, {9, 13}, {16, 16}} 1199 #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> All] 1200 : Overlaps -> All option is not currently implemented in Mathics. 1201 = {{1, 2}, {2, 2}, {4, 7}, {5, 7}, {6, 7}, {7, 7}, {9, 13}, {10, 13}, {11, 13}, {12, 13}, {13, 13}, {16, 16}} 1202 1203 #> StringPosition["21211121122", {"121", "11"}] 1204 = {{2, 4}, {4, 5}, {5, 6}, {6, 8}, {8, 9}} 1205 #> StringPosition["21211121122", {"121", "11"}, Overlaps -> False] 1206 = {{2, 4}, {5, 6}, {8, 9}} 1207 1208 #> StringPosition[{"abc", "abcda"}, "a"] 1209 = {{{1, 1}}, {{1, 1}, {5, 5}}} 1210 1211 #> StringPosition[{"abc"}, "a", Infinity] 1212 = {{{1, 1}}} 1213 1214 #> StringPosition["abc"]["123AabcDEabc"] 1215 = {{5, 7}, {10, 12}} 1216 """ 1217 1218 options = { 1219 "IgnoreCase": "False", 1220 "MetaCharacters": "None", 1221 "Overlaps": "True", 1222 } 1223 1224 messages = { 1225 "strse": "String or list of strings expected at position `1` in `2`.", 1226 "overall": "Overlaps -> All option is not currently implemented in Mathics.", 1227 "innf": "Non-negative integer or Infinity expected at position `2` in `1`.", 1228 } 1229 1230 rules = { 1231 "StringPosition[patt_][s_]": "StringPosition[s, patt]", 1232 } 1233 1234 def apply(self, string, patt, evaluation, options): 1235 "StringPosition[string_, patt_, OptionsPattern[StringPosition]]" 1236 1237 return self.apply_n( 1238 string, 1239 patt, 1240 Expression("DirectedInfinity", Integer1), 1241 evaluation, 1242 options, 1243 ) 1244 1245 def apply_n(self, string, patt, n, evaluation, options): 1246 "StringPosition[string_, patt_, n:(_Integer|DirectedInfinity[1]), OptionsPattern[StringPosition]]" 1247 1248 expr = Expression("StringPosition", string, patt, n) 1249 1250 # check n 1251 if n.has_form("DirectedInfinity", 1): 1252 py_n = float("inf") 1253 else: 1254 py_n = n.get_int_value() 1255 if py_n is None or py_n < 0: 1256 return evaluation.message("StringPosition", "innf", expr, Integer(3)) 1257 1258 # check options 1259 if options["System`Overlaps"] == SymbolTrue: 1260 overlap = True 1261 elif options["System`Overlaps"] == SymbolFalse: 1262 overlap = False 1263 elif options["System`Overlaps"] == Symbol("All"): 1264 # TODO 1265 evaluation.message("StringPosition", "overall") 1266 overlap = True 1267 else: 1268 overlap = False # unknown options are teated as False 1269 1270 # convert patterns 1271 if patt.has_form("List", None): 1272 patts = patt.get_leaves() 1273 else: 1274 patts = [patt] 1275 re_patts = [] 1276 for p in patts: 1277 py_p = to_regex(p, evaluation) 1278 if py_p is None: 1279 return evaluation.message("StringExpression", "invld", p, patt) 1280 re_patts.append(py_p) 1281 compiled_patts = [re.compile(re_patt) for re_patt in re_patts] 1282 1283 # string or list of strings 1284 if string.has_form("List", None): 1285 py_strings = [s.get_string_value() for s in string.leaves] 1286 if None in py_strings: 1287 return 1288 results = [ 1289 self.do_apply(py_string, compiled_patts, py_n, overlap) 1290 for py_string in py_strings 1291 ] 1292 return Expression(SymbolList, *results) 1293 else: 1294 py_string = string.get_string_value() 1295 if py_string is None: 1296 return 1297 return self.do_apply(py_string, compiled_patts, py_n, overlap) 1298 1299 @staticmethod 1300 def do_apply(py_string, compiled_patts, py_n, overlap): 1301 result = [] 1302 start = 0 1303 while start < len(py_string): 1304 found_match = False 1305 for compiled_patt in compiled_patts: 1306 m = compiled_patt.match(py_string, start) 1307 if m is None: 1308 continue 1309 found_match = True 1310 result.append([m.start() + 1, m.end()]) # 0 to 1 based indexing 1311 if len(result) == py_n: 1312 return from_python(result) 1313 if not overlap: 1314 start = m.end() 1315 if overlap or not found_match: 1316 start += 1 1317 return from_python(result) 1318 1319 1320class StringLength(Builtin): 1321 """ 1322 <dl> 1323 <dt>'StringLength["$string$"]' 1324 <dd>gives the length of $string$. 1325 </dl> 1326 1327 >> StringLength["abc"] 1328 = 3 1329 'StringLength' is listable: 1330 >> StringLength[{"a", "bc"}] 1331 = {1, 2} 1332 1333 >> StringLength[x] 1334 : String expected. 1335 = StringLength[x] 1336 """ 1337 1338 attributes = ("Listable",) 1339 1340 def apply(self, str, evaluation): 1341 "StringLength[str_]" 1342 1343 if not isinstance(str, String): 1344 evaluation.message("StringLength", "string") 1345 return 1346 return Integer(len(str.value)) 1347 1348 1349class _StringFind(Builtin): 1350 attributes = "Protected" 1351 1352 options = { 1353 "IgnoreCase": "False", 1354 "MetaCharacters": "None", 1355 } 1356 1357 messages = { 1358 "strse": "String or list of strings expected at position `1` in `2`.", 1359 "srep": "`1` is not a valid string replacement rule.", 1360 "innf": ( 1361 "Non-negative integer or Infinity expected at " "position `1` in `2`." 1362 ), 1363 } 1364 1365 def _find(py_stri, py_rules, py_n, flags): 1366 raise NotImplementedError() 1367 1368 def _apply(self, string, rule, n, evaluation, options, cases): 1369 if n.sameQ(Symbol("System`Private`Null")): 1370 expr = Expression(self.get_name(), string, rule) 1371 n = None 1372 else: 1373 expr = Expression(self.get_name(), string, rule, n) 1374 1375 # convert string 1376 if string.has_form("List", None): 1377 py_strings = [stri.get_string_value() for stri in string.leaves] 1378 if None in py_strings: 1379 return evaluation.message(self.get_name(), "strse", Integer1, expr) 1380 else: 1381 py_strings = string.get_string_value() 1382 if py_strings is None: 1383 return evaluation.message(self.get_name(), "strse", Integer1, expr) 1384 1385 # convert rule 1386 def convert_rule(r): 1387 if r.has_form("Rule", None) and len(r.leaves) == 2: 1388 py_s = to_regex(r.leaves[0], evaluation) 1389 if py_s is None: 1390 return evaluation.message( 1391 "StringExpression", "invld", r.leaves[0], r.leaves[0] 1392 ) 1393 py_sp = r.leaves[1] 1394 return py_s, py_sp 1395 elif cases: 1396 py_s = to_regex(r, evaluation) 1397 if py_s is None: 1398 return evaluation.message("StringExpression", "invld", r, r) 1399 return py_s, None 1400 1401 return evaluation.message(self.get_name(), "srep", r) 1402 1403 if rule.has_form("List", None): 1404 py_rules = [convert_rule(r) for r in rule.leaves] 1405 else: 1406 py_rules = [convert_rule(rule)] 1407 if None in py_rules: 1408 return None 1409 1410 # convert n 1411 if n is None: 1412 py_n = 0 1413 elif n == Expression("DirectedInfinity", Integer1): 1414 py_n = 0 1415 else: 1416 py_n = n.get_int_value() 1417 if py_n is None or py_n < 0: 1418 return evaluation.message(self.get_name(), "innf", Integer(3), expr) 1419 1420 # flags 1421 flags = re.MULTILINE 1422 if options["System`IgnoreCase"] == SymbolTrue: 1423 flags = flags | re.IGNORECASE 1424 1425 if isinstance(py_strings, list): 1426 return Expression( 1427 "List", 1428 *[ 1429 self._find(py_stri, py_rules, py_n, flags, evaluation) 1430 for py_stri in py_strings 1431 ] 1432 ) 1433 else: 1434 return self._find(py_strings, py_rules, py_n, flags, evaluation) 1435 1436 1437class StringReplace(_StringFind): 1438 """ 1439 <dl> 1440 <dt>'StringReplace["$string$", "$a$"->"$b$"]' 1441 <dd>replaces each occurrence of $old$ with $new$ in $string$. 1442 <dt>'StringReplace["$string$", {"$s1$"->"$sp1$", "$s2$"->"$sp2$"}]' 1443 <dd>performs multiple replacements of each $si$ by the 1444 corresponding $spi$ in $string$. 1445 <dt>'StringReplace["$string$", $srules$, $n$]' 1446 <dd>only performs the first $n$ replacements. 1447 <dt>'StringReplace[{"$string1$", "$string2$", ...}, $srules$]' 1448 <dd>performs the replacements specified by $srules$ on a list 1449 of strings. 1450 </dl> 1451 1452 StringReplace replaces all occurrences of one substring with another: 1453 >> StringReplace["xyxyxyyyxxxyyxy", "xy" -> "A"] 1454 = AAAyyxxAyA 1455 1456 Multiple replacements can be supplied: 1457 >> StringReplace["xyzwxyzwxxyzxyzw", {"xyz" -> "A", "w" -> "BCD"}] 1458 = ABCDABCDxAABCD 1459 1460 Only replace the first 2 occurences: 1461 >> StringReplace["xyxyxyyyxxxyyxy", "xy" -> "A", 2] 1462 = AAxyyyxxxyyxy 1463 1464 Also works for multiple rules: 1465 >> StringReplace["abba", {"a" -> "A", "b" -> "B"}, 2] 1466 = ABba 1467 1468 StringReplace acts on lists of strings too: 1469 >> StringReplace[{"xyxyxxy", "yxyxyxxxyyxy"}, "xy" -> "A"] 1470 = {AAxA, yAAxxAyA} 1471 1472 #> StringReplace["abcabc", "a" -> "b", Infinity] 1473 = bbcbbc 1474 #> StringReplace[x, "a" -> "b"] 1475 : String or list of strings expected at position 1 in StringReplace[x, a -> b]. 1476 = StringReplace[x, a -> b] 1477 #> StringReplace["xyzwxyzwaxyzxyzw", x] 1478 : x is not a valid string replacement rule. 1479 = StringReplace[xyzwxyzwaxyzxyzw, x] 1480 #> StringReplace["xyzwxyzwaxyzxyzw", x -> y] 1481 : Element x is not a valid string or pattern element in x. 1482 = StringReplace[xyzwxyzwaxyzxyzw, x -> y] 1483 #> StringReplace["abcabc", "a" -> "b", -1] 1484 : Non-negative integer or Infinity expected at position 3 in StringReplace[abcabc, a -> b, -1]. 1485 = StringReplace[abcabc, a -> b, -1] 1486 #> StringReplace["abc", "b" -> 4] 1487 : String expected. 1488 = a <> 4 <> c 1489 1490 #> StringReplace["01101100010", "01" .. -> "x"] 1491 = x1x100x0 1492 1493 #> StringReplace["abc abcb abdc", "ab" ~~ _ -> "X"] 1494 = X Xb Xc 1495 1496 #> StringReplace["abc abcd abcd", WordBoundary ~~ "abc" ~~ WordBoundary -> "XX"] 1497 = XX abcd abcd 1498 1499 #> StringReplace["abcd acbd", RegularExpression["[ab]"] -> "XX"] 1500 = XXXXcd XXcXXd 1501 1502 #> StringReplace["abcd acbd", RegularExpression["[ab]"] ~~ _ -> "YY"] 1503 = YYcd YYYY 1504 1505 #> StringReplace["abcdabcdaabcabcd", {"abc" -> "Y", "d" -> "XXX"}] 1506 = YXXXYXXXaYYXXX 1507 1508 1509 #> StringReplace[" Have a nice day. ", (StartOfString ~~ Whitespace) | (Whitespace ~~ EndOfString) -> ""] // FullForm 1510 = "Have a nice day." 1511 1512 #> StringReplace["xyXY", "xy" -> "01"] 1513 = 01XY 1514 #> StringReplace["xyXY", "xy" -> "01", IgnoreCase -> True] 1515 = 0101 1516 1517 StringReplace also can be used as an operator: 1518 >> StringReplace["y" -> "ies"]["city"] 1519 = cities 1520 """ 1521 1522 # TODO Special Characters 1523 """ 1524 #> StringReplace["product: A \\[CirclePlus] B" , "\\[CirclePlus]" -> "x"] 1525 = A x B 1526 """ 1527 1528 rules = { 1529 "StringReplace[rule_][string_]": "StringReplace[string, rule]", 1530 } 1531 1532 def _find(self, py_stri, py_rules, py_n, flags, evaluation): 1533 def cases(): 1534 k = 0 1535 for match, form in _parallel_match(py_stri, py_rules, flags, py_n): 1536 start, end = match.span() 1537 if start > k: 1538 yield String(py_stri[k:start]) 1539 yield _evaluate_match(form, match, evaluation) 1540 k = end 1541 if k < len(py_stri): 1542 yield String(py_stri[k:]) 1543 1544 return Expression("StringJoin", *list(cases())) 1545 1546 def apply(self, string, rule, n, evaluation, options): 1547 "%(name)s[string_, rule_, OptionsPattern[%(name)s], n_:System`Private`Null]" 1548 # this pattern is a slight hack to get around missing Shortest/Longest. 1549 return self._apply(string, rule, n, evaluation, options, False) 1550 1551 1552class StringReverse(Builtin): 1553 """ 1554 <dl> 1555 <dt>'StringReverse["$string$"]' 1556 <dd>reverses the order of the characters in "string". 1557 </dl> 1558 1559 >> StringReverse["live"] 1560 = evil 1561 """ 1562 1563 attributes = ("Listable", "Protected") 1564 1565 def apply(self, string, evaluation): 1566 "StringReverse[string_String]" 1567 return String(string.get_string_value()[::-1]) 1568 1569 1570class StringCases(_StringFind): 1571 """ 1572 <dl> 1573 <dt>'StringCases["$string$", $pattern$]' 1574 <dd>gives all occurences of $pattern$ in $string$. 1575 <dt>'StringReplace["$string$", $pattern$ -> $form$]' 1576 <dd>gives all instances of $form$ that stem from occurences of $pattern$ in $string$. 1577 <dt>'StringCases["$string$", {$pattern1$, $pattern2$, ...}]' 1578 <dd>gives all occurences of $pattern1$, $pattern2$, .... 1579 <dt>'StringReplace["$string$", $pattern$, $n$]' 1580 <dd>gives only the first $n$ occurences. 1581 <dt>'StringReplace[{"$string1$", "$string2$", ...}, $pattern$]' 1582 <dd>gives occurences in $string1$, $string2$, ... 1583 </dl> 1584 1585 >> StringCases["axbaxxb", "a" ~~ x_ ~~ "b"] 1586 = {axb} 1587 1588 >> StringCases["axbaxxb", "a" ~~ x__ ~~ "b"] 1589 = {axbaxxb} 1590 1591 >> StringCases["axbaxxb", Shortest["a" ~~ x__ ~~ "b"]] 1592 = {axb, axxb} 1593 1594 >> StringCases["-abc- def -uvw- xyz", Shortest["-" ~~ x__ ~~ "-"] -> x] 1595 = {abc, uvw} 1596 1597 >> StringCases["-öhi- -abc- -.-", "-" ~~ x : WordCharacter .. ~~ "-" -> x] 1598 = {öhi, abc} 1599 1600 >> StringCases["abc-abc xyz-uvw", Shortest[x : WordCharacter .. ~~ "-" ~~ x_] -> x] 1601 = {abc} 1602 1603 #> StringCases["abc-abc xyz-uvw", Shortest[x : WordCharacter .. ~~ "-" ~~ x : LetterCharacter] -> x] 1604 : Ignored restriction given for x in x : LetterCharacter as it does not match previous occurences of x. 1605 = {abc} 1606 1607 >> StringCases["abba", {"a" -> 10, "b" -> 20}, 2] 1608 = {10, 20} 1609 1610 >> StringCases["a#ä_123", WordCharacter] 1611 = {a, ä, 1, 2, 3} 1612 1613 >> StringCases["a#ä_123", LetterCharacter] 1614 = {a, ä} 1615 """ 1616 1617 rules = { 1618 "StringCases[rule_][string_]": "StringCases[string, rule]", 1619 } 1620 1621 def _find(self, py_stri, py_rules, py_n, flags, evaluation): 1622 def cases(): 1623 for match, form in _parallel_match(py_stri, py_rules, flags, py_n): 1624 if form is None: 1625 yield String(match.group(0)) 1626 else: 1627 yield _evaluate_match(form, match, evaluation) 1628 1629 return Expression(SymbolList, *list(cases())) 1630 1631 def apply(self, string, rule, n, evaluation, options): 1632 "%(name)s[string_, rule_, OptionsPattern[%(name)s], n_:System`Private`Null]" 1633 # this pattern is a slight hack to get around missing Shortest/Longest. 1634 return self._apply(string, rule, n, evaluation, options, True) 1635 1636 1637class StringRepeat(Builtin): 1638 """ 1639 <dl> 1640 <dt>'StringRepeat["$string$", $n$]' 1641 <dd>gives $string$ repeated $n$ times. 1642 <dt>'StringRepeat["$string$", $n$, $max$]' 1643 <dd>gives $string$ repeated $n$ times, but not more than $max$ characters. 1644 </dl> 1645 1646 >> StringRepeat["abc", 3] 1647 = abcabcabc 1648 1649 >> StringRepeat["abc", 10, 7] 1650 = abcabca 1651 1652 #> StringRepeat["x", 0] 1653 : A positive integer is expected at position 2 in StringRepeat[x, 0]. 1654 = StringRepeat[x, 0] 1655 """ 1656 1657 messages = { 1658 "intp": "A positive integer is expected at position `1` in `2`.", 1659 } 1660 1661 def apply(self, s, n, expression, evaluation): 1662 "StringRepeat[s_String, n_]" 1663 py_n = n.get_int_value() if isinstance(n, Integer) else 0 1664 if py_n < 1: 1665 evaluation.message("StringRepeat", "intp", 2, expression) 1666 else: 1667 return String(s.get_string_value() * py_n) 1668 1669 def apply_truncated(self, s, n, m, expression, evaluation): 1670 "StringRepeat[s_String, n_Integer, m_Integer]" 1671 py_n = n.get_int_value() if isinstance(n, Integer) else 0 1672 py_m = m.get_int_value() if isinstance(m, Integer) else 0 1673 1674 if py_n < 1: 1675 evaluation.message("StringRepeat", "intp", 2, expression) 1676 elif py_m < 1: 1677 evaluation.message("StringRepeat", "intp", 3, expression) 1678 else: 1679 py_s = s.get_string_value() 1680 py_n = min(1 + py_m // len(py_s), py_n) 1681 1682 return String((py_s * py_n)[:py_m]) 1683 1684 1685class Characters(Builtin): 1686 """ 1687 <dl> 1688 <dt>'Characters["$string$"]' 1689 <dd>returns a list of the characters in $string$. 1690 </dl> 1691 1692 >> Characters["abc"] 1693 = {a, b, c} 1694 1695 #> \\.78\\.79\\.7A 1696 = xyz 1697 1698 #> \\:0078\\:0079\\:007A 1699 = xyz 1700 1701 #> \\101\\102\\103\\061\\062\\063 1702 = ABC123 1703 1704 #> \\[Alpha]\\[Beta]\\[Gamma] 1705 = \u03B1\u03B2\u03B3 1706 """ 1707 1708 attributes = ("Listable",) 1709 1710 def apply(self, string, evaluation): 1711 "Characters[string_String]" 1712 1713 return Expression(SymbolList, *(String(c) for c in string.value)) 1714 1715 1716class CharacterRange(Builtin): 1717 """ 1718 <dl> 1719 <dt>'CharacterRange["$a$", "$b$"]' 1720 <dd>returns a list of the Unicode characters from $a$ to $b$ 1721 inclusive. 1722 </dl> 1723 1724 >> CharacterRange["a", "e"] 1725 = {a, b, c, d, e} 1726 >> CharacterRange["b", "a"] 1727 = {} 1728 """ 1729 1730 attributes = ("ReadProtected",) 1731 1732 messages = { 1733 "argtype": "Arguments `1` and `2` are not both strings of length 1.", 1734 } 1735 1736 def apply(self, start, stop, evaluation): 1737 "CharacterRange[start_String, stop_String]" 1738 1739 if len(start.value) != 1 or len(stop.value) != 1: 1740 evaluation.message("CharacterRange", "argtype", start, stop) 1741 return 1742 start = ord(start.value[0]) 1743 stop = ord(stop.value[0]) 1744 return Expression( 1745 "List", *[String(chr(code)) for code in range(start, stop + 1)] 1746 ) 1747 1748 1749class String_(Builtin): 1750 """ 1751 <dl> 1752 <dt>'String' 1753 <dd>is the head of strings. 1754 </dl> 1755 1756 >> Head["abc"] 1757 = String 1758 >> "abc" 1759 = abc 1760 1761 Use 'InputForm' to display quotes around strings: 1762 >> InputForm["abc"] 1763 = "abc" 1764 1765 'FullForm' also displays quotes: 1766 >> FullForm["abc" + 2] 1767 = Plus[2, "abc"] 1768 """ 1769 1770 name = "String" 1771 1772 1773class LowerCaseQ(Test): 1774 """ 1775 <dl> 1776 <dt>'LowerCaseQ[$s$]' 1777 <dd>returns True if $s$ consists wholly of lower case characters. 1778 </dl> 1779 1780 >> LowerCaseQ["abc"] 1781 = True 1782 1783 An empty string returns True. 1784 >> LowerCaseQ[""] 1785 = True 1786 """ 1787 1788 def test(self, s): 1789 return isinstance(s, String) and all(c.islower() for c in s.get_string_value()) 1790 1791 1792class ToLowerCase(Builtin): 1793 """ 1794 <dl> 1795 <dt>'ToLowerCase[$s$]' 1796 <dd>returns $s$ in all lower case. 1797 </dl> 1798 1799 >> ToLowerCase["New York"] 1800 = new york 1801 """ 1802 1803 attributes = ("Listable", "Protected") 1804 1805 def apply(self, s, evaluation): 1806 "ToLowerCase[s_String]" 1807 return String(s.get_string_value().lower()) 1808 1809 1810class UpperCaseQ(Test): 1811 """ 1812 <dl> 1813 <dt>'UpperCaseQ[$s$]' 1814 <dd>returns True if $s$ consists wholly of upper case characters. 1815 </dl> 1816 1817 >> UpperCaseQ["ABC"] 1818 = True 1819 1820 An empty string returns True. 1821 >> UpperCaseQ[""] 1822 = True 1823 """ 1824 1825 def test(self, s): 1826 return isinstance(s, String) and all(c.isupper() for c in s.get_string_value()) 1827 1828 1829class ToUpperCase(Builtin): 1830 """ 1831 <dl> 1832 <dt>'ToUpperCase[$s$]' 1833 <dd>returns $s$ in all upper case. 1834 </dl> 1835 1836 >> ToUpperCase["New York"] 1837 = NEW YORK 1838 """ 1839 1840 attributes = ("Listable", "Protected") 1841 1842 def apply(self, s, evaluation): 1843 "ToUpperCase[s_String]" 1844 return String(s.get_string_value().upper()) 1845 1846 1847class ToString(Builtin): 1848 """ 1849 <dl> 1850 <dt>'ToString[$expr$]' 1851 <dd>returns a string representation of $expr$. 1852 <dt>'ToString[$expr$, $form$]' 1853 <dd>returns a string representation of $expr$ in the form 1854 $form$. 1855 </dl> 1856 1857 >> ToString[2] 1858 = 2 1859 >> ToString[2] // InputForm 1860 = "2" 1861 >> ToString[a+b] 1862 = a + b 1863 >> "U" <> 2 1864 : String expected. 1865 = U <> 2 1866 >> "U" <> ToString[2] 1867 = U2 1868 >> ToString[Integrate[f[x],x], TeXForm] 1869 = \\int f\\left[x\\right] \\, dx 1870 1871 """ 1872 1873 options = { 1874 "CharacterEncoding": '"Unicode"', 1875 "FormatType": "OutputForm", 1876 "NumberMarks": "$NumberMarks", 1877 "PageHeight": "Infinity", 1878 "PageWidth": "Infinity", 1879 "TotalHeight": "Infinity", 1880 "TotalWidth": "Infinity", 1881 } 1882 1883 def apply_default(self, value, evaluation, options): 1884 "ToString[value_, OptionsPattern[ToString]]" 1885 return self.apply_form(value, Symbol("System`OutputForm"), evaluation, options) 1886 1887 def apply_form(self, value, form, evaluation, options): 1888 "ToString[value_, form_, OptionsPattern[ToString]]" 1889 encoding = options["System`CharacterEncoding"] 1890 text = value.format(evaluation, form.get_name(), encoding=encoding) 1891 text = text.boxes_to_text(evaluation=evaluation) 1892 return String(text) 1893 1894 1895class InterpretedBox(PrefixOperator): 1896 r""" 1897 <dl> 1898 <dt>'InterpretedBox[$box$]' 1899 <dd>is the ad hoc fullform for \! $box$. just 1900 for internal use... 1901 1902 >> \! \(2+2\) 1903 = 4 1904 </dl> 1905 """ 1906 1907 operator = "\\!" 1908 precedence = 670 1909 1910 def apply_dummy(self, boxes, evaluation): 1911 """InterpretedBox[boxes_]""" 1912 # TODO: the following is a very raw and dummy way to 1913 # handle these expressions. 1914 # In the first place, this should handle different kind 1915 # of boxes in different ways. 1916 reinput = boxes.boxes_to_text() 1917 return Expression("ToExpression", reinput).evaluate(evaluation) 1918 1919 1920class ToExpression(Builtin): 1921 r""" 1922 <dl> 1923 <dt>'ToExpression[$input$]' 1924 <dd>inteprets a given string as Mathics input. 1925 1926 <dt>'ToExpression[$input$, $form$]' 1927 <dd>reads the given input in the specified $form$. 1928 1929 <dt>'ToExpression[$input$, $form$, $h$]' 1930 <dd>applies the head $h$ to the expression before evaluating it. 1931 1932 </dl> 1933 1934 >> ToExpression["1 + 2"] 1935 = 3 1936 1937 >> ToExpression["{2, 3, 1}", InputForm, Max] 1938 = 3 1939 1940 >> ToExpression["2 3", InputForm] 1941 = 6 1942 1943 Note that newlines are like semicolons, not blanks. So so the return value is the second-line value. 1944 >> ToExpression["2\[NewLine]3"] 1945 = 3 1946 1947 #> ToExpression["log(x)", InputForm] 1948 = log x 1949 1950 #> ToExpression["1+"] 1951 : Incomplete expression; more input is needed (line 1 of "ToExpression['1+']"). 1952 = $Failed 1953 1954 #> ToExpression[] 1955 : ToExpression called with 0 arguments; between 1 and 3 arguments are expected. 1956 = ToExpression[] 1957 """ 1958 1959 # TODO: Other forms 1960 """ 1961 >> ToExpression["log(x)", TraditionalForm] 1962 = Log[x] 1963 >> ToExpression["log(x)", TraditionalForm] 1964 = Log[x] 1965 #> ToExpression["log(x)", StandardForm] 1966 = log x 1967 """ 1968 attributes = ("Listable", "Protected") 1969 1970 messages = { 1971 "argb": ( 1972 "`1` called with `2` arguments; " 1973 "between `3` and `4` arguments are expected." 1974 ), 1975 "interpfmt": ( 1976 "`1` is not a valid interpretation format. " 1977 "Valid interpretation formats include InputForm " 1978 "and any member of $BoxForms." 1979 ), 1980 "notstr": "The format type `1` is valid only for string input.", 1981 } 1982 1983 def apply(self, seq, evaluation): 1984 "ToExpression[seq__]" 1985 1986 # Organise Arguments 1987 py_seq = seq.get_sequence() 1988 if len(py_seq) == 1: 1989 (inp, form, head) = (py_seq[0], Symbol("InputForm"), None) 1990 elif len(py_seq) == 2: 1991 (inp, form, head) = (py_seq[0], py_seq[1], None) 1992 elif len(py_seq) == 3: 1993 (inp, form, head) = (py_seq[0], py_seq[1], py_seq[2]) 1994 else: 1995 assert len(py_seq) > 3 # 0 case handled by apply_empty 1996 evaluation.message( 1997 "ToExpression", 1998 "argb", 1999 "ToExpression", 2000 Integer(len(py_seq)), 2001 Integer1, 2002 Integer(3), 2003 ) 2004 return 2005 2006 # Apply the different forms 2007 if form == Symbol("InputForm"): 2008 if isinstance(inp, String): 2009 2010 # TODO: turn the below up into a function and call that. 2011 s = inp.get_string_value() 2012 short_s = s[:15] + "..." if len(s) > 16 else s 2013 with io.StringIO(s) as f: 2014 f.name = """ToExpression['%s']""" % short_s 2015 feeder = MathicsFileLineFeeder(f) 2016 while not feeder.empty(): 2017 try: 2018 query = parse(evaluation.definitions, feeder) 2019 except TranslateError: 2020 return SymbolFailed 2021 finally: 2022 feeder.send_messages(evaluation) 2023 if query is None: # blank line / comment 2024 continue 2025 result = query.evaluate(evaluation) 2026 2027 else: 2028 result = inp 2029 else: 2030 evaluation.message("ToExpression", "interpfmt", form) 2031 return 2032 2033 # Apply head if present 2034 if head is not None: 2035 result = Expression(head, result).evaluate(evaluation) 2036 2037 return result 2038 2039 def apply_empty(self, evaluation): 2040 "ToExpression[]" 2041 evaluation.message( 2042 "ToExpression", "argb", "ToExpression", Integer0, Integer1, Integer(3) 2043 ) 2044 return 2045 2046 2047class ToCharacterCode(Builtin): 2048 """ 2049 <dl> 2050 <dt>'ToCharacterCode["$string$"]' 2051 <dd>converts the string to a list of character codes (Unicode 2052 codepoints). 2053 <dt>'ToCharacterCode[{"$string1$", "$string2$", ...}]' 2054 <dd>converts a list of strings to character codes. 2055 </dl> 2056 2057 >> ToCharacterCode["abc"] 2058 = {97, 98, 99} 2059 >> FromCharacterCode[%] 2060 = abc 2061 2062 >> ToCharacterCode["\\[Alpha]\\[Beta]\\[Gamma]"] 2063 = {945, 946, 947} 2064 2065 >> ToCharacterCode["ä", "UTF8"] 2066 = {195, 164} 2067 2068 >> ToCharacterCode["ä", "ISO8859-1"] 2069 = {228} 2070 2071 >> ToCharacterCode[{"ab", "c"}] 2072 = {{97, 98}, {99}} 2073 2074 #> ToCharacterCode[{"ab"}] 2075 = {{97, 98}} 2076 2077 #> ToCharacterCode[{{"ab"}}] 2078 : String or list of strings expected at position 1 in ToCharacterCode[{{ab}}]. 2079 = ToCharacterCode[{{ab}}] 2080 2081 >> ToCharacterCode[{"ab", x}] 2082 : String or list of strings expected at position 1 in ToCharacterCode[{ab, x}]. 2083 = ToCharacterCode[{ab, x}] 2084 2085 >> ListPlot[ToCharacterCode["plot this string"], Filling -> Axis] 2086 = -Graphics- 2087 2088 #> ToCharacterCode[x] 2089 : String or list of strings expected at position 1 in ToCharacterCode[x]. 2090 = ToCharacterCode[x] 2091 2092 #> ToCharacterCode[""] 2093 = {} 2094 """ 2095 2096 messages = { 2097 "strse": "String or list of strings expected at position `1` in `2`.", 2098 } 2099 2100 def _encode(self, string, encoding, evaluation): 2101 exp = Expression("ToCharacterCode", string) 2102 2103 if string.has_form("List", None): 2104 string = [substring.get_string_value() for substring in string.leaves] 2105 if any(substring is None for substring in string): 2106 evaluation.message("ToCharacterCode", "strse", Integer1, exp) 2107 return None 2108 else: 2109 string = string.get_string_value() 2110 if string is None: 2111 evaluation.message("ToCharacterCode", "strse", Integer1, exp) 2112 return None 2113 2114 if encoding == "Unicode": 2115 2116 def convert(s): 2117 return Expression(SymbolList, *[Integer(ord(code)) for code in s]) 2118 2119 else: 2120 py_encoding = to_python_encoding(encoding) 2121 if py_encoding is None: 2122 evaluation.message("General", "charcode", encoding) 2123 return 2124 2125 def convert(s): 2126 return Expression( 2127 "List", *[Integer(x) for x in unpack_bytes(s.encode(py_encoding))] 2128 ) 2129 2130 if isinstance(string, list): 2131 return Expression(SymbolList, *[convert(substring) for substring in string]) 2132 elif isinstance(string, str): 2133 return convert(string) 2134 2135 def apply_default(self, string, evaluation): 2136 "ToCharacterCode[string_]" 2137 return self._encode(string, "Unicode", evaluation) 2138 2139 def apply(self, string, encoding, evaluation): 2140 "ToCharacterCode[string_, encoding_String]" 2141 return self._encode(string, encoding.get_string_value(), evaluation) 2142 2143 2144class _InvalidCodepointError(ValueError): 2145 pass 2146 2147 2148class FromCharacterCode(Builtin): 2149 """ 2150 <dl> 2151 <dt>'FromCharacterCode[$n$]' 2152 <dd>returns the character corresponding to Unicode codepoint $n$. 2153 <dt>'FromCharacterCode[{$n1$, $n2$, ...}]' 2154 <dd>returns a string with characters corresponding to $n_i$. 2155 <dt>'FromCharacterCode[{{$n11$, $n12$, ...}, {$n21$, $n22$, ...}, ...}]' 2156 <dd>returns a list of strings. 2157 </dl> 2158 2159 >> FromCharacterCode[100] 2160 = d 2161 2162 >> FromCharacterCode[228, "ISO8859-1"] 2163 = ä 2164 2165 >> FromCharacterCode[{100, 101, 102}] 2166 = def 2167 >> ToCharacterCode[%] 2168 = {100, 101, 102} 2169 2170 >> FromCharacterCode[{{97, 98, 99}, {100, 101, 102}}] 2171 = {abc, def} 2172 2173 >> ToCharacterCode["abc 123"] // FromCharacterCode 2174 = abc 123 2175 2176 #> #1 == ToCharacterCode[FromCharacterCode[#1]] & [RandomInteger[{0, 65535}, 100]] 2177 = True 2178 2179 #> FromCharacterCode[{}] // InputForm 2180 = "" 2181 2182 #> FromCharacterCode[65536] 2183 : A character code, which should be a non-negative integer less than 65536, is expected at position 1 in {65536}. 2184 = FromCharacterCode[65536] 2185 #> FromCharacterCode[-1] 2186 : Non-negative machine-sized integer expected at position 1 in FromCharacterCode[-1]. 2187 = FromCharacterCode[-1] 2188 #> FromCharacterCode[444444444444444444444444444444444444] 2189 : Non-negative machine-sized integer expected at position 1 in FromCharacterCode[444444444444444444444444444444444444]. 2190 = FromCharacterCode[444444444444444444444444444444444444] 2191 2192 #> FromCharacterCode[{100, 101, -1}] 2193 : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, -1}. 2194 = FromCharacterCode[{100, 101, -1}] 2195 #> FromCharacterCode[{100, 101, 65536}] 2196 : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, 65536}. 2197 = FromCharacterCode[{100, 101, 65536}] 2198 #> FromCharacterCode[{100, 101, x}] 2199 : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, x}. 2200 = FromCharacterCode[{100, 101, x}] 2201 #> FromCharacterCode[{100, {101}}] 2202 : A character code, which should be a non-negative integer less than 65536, is expected at position 2 in {100, {101}}. 2203 = FromCharacterCode[{100, {101}}] 2204 2205 #> FromCharacterCode[{{97, 98, 99}, {100, 101, x}}] 2206 : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, x}. 2207 = FromCharacterCode[{{97, 98, 99}, {100, 101, x}}] 2208 #> FromCharacterCode[{{97, 98, x}, {100, 101, x}}] 2209 : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {97, 98, x}. 2210 = FromCharacterCode[{{97, 98, x}, {100, 101, x}}] 2211 """ 2212 2213 messages = { 2214 "notunicode": ( 2215 "A character code, which should be a non-negative integer less " 2216 "than 65536, is expected at position `2` in `1`." 2217 ), 2218 "intnm": ( 2219 "Non-negative machine-sized integer expected at " "position `2` in `1`." 2220 ), 2221 "utf8": "The given codes could not be decoded as utf-8.", 2222 } 2223 2224 def _decode(self, n, encoding, evaluation): 2225 exp = Expression("FromCharacterCode", n) 2226 2227 py_encoding = to_python_encoding(encoding) 2228 if py_encoding is None: 2229 evaluation.message("General", "charcode", encoding) 2230 return 2231 2232 def convert_codepoint_list(l): 2233 if encoding == "Unicode": 2234 s = "" 2235 for i, ni in enumerate(l): 2236 pyni = ni.get_int_value() 2237 if not (pyni is not None and 0 <= pyni <= 0xFFFF): 2238 evaluation.message( 2239 "FromCharacterCode", 2240 "notunicode", 2241 Expression(SymbolList, *l), 2242 Integer(i + 1), 2243 ) 2244 raise _InvalidCodepointError 2245 s += chr(pyni) 2246 return s 2247 else: 2248 codes = [x.get_int_value() & 0xFF for x in l] 2249 return pack_bytes(codes).decode(py_encoding) 2250 2251 try: 2252 if n.has_form("List", None): 2253 if not n.get_leaves(): 2254 return String("") 2255 # Mathematica accepts FromCharacterCode[{{100}, 101}], 2256 # so to match this, just check the first leaf to see 2257 # if we're dealing with nested lists. 2258 elif n.get_leaves()[0].has_form("List", None): 2259 list_of_strings = [] 2260 for leaf in n.get_leaves(): 2261 if leaf.has_form("List", None): 2262 stringi = convert_codepoint_list(leaf.get_leaves()) 2263 else: 2264 stringi = convert_codepoint_list([leaf]) 2265 list_of_strings.append(String(stringi)) 2266 return Expression(SymbolList, *list_of_strings) 2267 else: 2268 return String(convert_codepoint_list(n.get_leaves())) 2269 else: 2270 pyn = n.get_int_value() 2271 if not (isinstance(pyn, int) and pyn > 0 and pyn < sys.maxsize): 2272 return evaluation.message( 2273 "FromCharacterCode", "intnm", exp, Integer1 2274 ) 2275 return String(convert_codepoint_list([n])) 2276 except _InvalidCodepointError: 2277 return 2278 except UnicodeDecodeError: 2279 evaluation.message(self.get_name(), "utf8") 2280 return 2281 2282 assert False, "can't get here" 2283 2284 def apply_default(self, n, evaluation): 2285 "FromCharacterCode[n_]" 2286 return self._decode(n, "Unicode", evaluation) 2287 2288 def apply(self, n, encoding, evaluation): 2289 "FromCharacterCode[n_, encoding_String]" 2290 return self._decode(n, encoding.get_string_value(), evaluation) 2291 2292 2293class StringQ(Test): 2294 """ 2295 <dl> 2296 <dt>'StringQ[$expr$]' 2297 <dd>returns 'True' if $expr$ is a 'String', or 'False' otherwise. 2298 </dl> 2299 2300 >> StringQ["abc"] 2301 = True 2302 >> StringQ[1.5] 2303 = False 2304 >> Select[{"12", 1, 3, 5, "yz", x, y}, StringQ] 2305 = {12, yz} 2306 """ 2307 2308 def test(self, expr): 2309 return isinstance(expr, String) 2310 2311 2312class StringTake(Builtin): 2313 """ 2314 <dl> 2315 <dt>'StringTake["$string$", $n$]' 2316 <dd>gives the first $n$ characters in $string$. 2317 2318 <dt>'StringTake["$string$", -$n$]' 2319 <dd>gives the last $n$ characters in $string$. 2320 2321 <dt>'StringTake["$string$", {$n$}]' 2322 <dd>gives the $n$th character in $string$. 2323 2324 <dt>'StringTake["$string$", {$m$, $n$}]' 2325 <dd>gives characters $m$ through $n$ in $string$. 2326 2327 <dt>'StringTake["$string$", {$m$, $n$, $s$}]' 2328 <dd>gives characters $m$ through $n$ in steps of $s$. 2329 2330 <dt>'StringTake[{$s1$, $s2$, ...} $spec$}]' 2331 <dd>gives the list of results for each of the $si$. 2332 </dl> 2333 2334 >> StringTake["abcde", 2] 2335 = ab 2336 >> StringTake["abcde", 0] 2337 = #<--# 2338 >> StringTake["abcde", -2] 2339 = de 2340 >> StringTake["abcde", {2}] 2341 = b 2342 >> StringTake["abcd", {2,3}] 2343 = bc 2344 >> StringTake["abcdefgh", {1, 5, 2}] 2345 = ace 2346 2347 Take the last 2 characters from several strings: 2348 >> StringTake[{"abcdef", "stuv", "xyzw"}, -2] 2349 = {ef, uv, zw} 2350 2351 StringTake also supports standard sequence specifications 2352 >> StringTake["abcdef", All] 2353 = abcdef 2354 2355 #> StringTake["abcd", 0] // InputForm 2356 = "" 2357 #> StringTake["abcd", {3, 2}] // InputForm 2358 = "" 2359 #> StringTake["", {1, 0}] // InputForm 2360 = "" 2361 2362 #> StringTake["abc", {0, 0}] 2363 : Cannot take positions 0 through 0 in "abc". 2364 = StringTake[abc, {0, 0}] 2365 2366 #> StringTake[{2, 4},2] 2367 : String or list of strings expected at position 1. 2368 = StringTake[{2, 4}, 2] 2369 2370 #> StringTake["kkkl",Graphics[{}]] 2371 : Integer or a list of sequence specifications expected at position 2. 2372 = StringTake[kkkl, -Graphics-] 2373 """ 2374 2375 messages = { 2376 "strse": "String or list of strings expected at position 1.", 2377 # FIXME: mseqs should be: Sequence specification (+n, -n, {+n}, {-n}, {m, n}, or {m, n, s}) or a list 2378 # of sequence specifications expected at position 2 in 2379 "mseqs": "Integer or a list of sequence specifications expected at position 2.", 2380 "take": 'Cannot take positions `1` through `2` in "`3`".', 2381 } 2382 2383 def apply(self, string, seqspec, evaluation): 2384 "StringTake[string_String, seqspec_]" 2385 result = string.get_string_value() 2386 if result is None: 2387 return evaluation.message("StringTake", "strse") 2388 2389 if isinstance(seqspec, Integer): 2390 pos = seqspec.get_int_value() 2391 if pos >= 0: 2392 seq = (1, pos, 1) 2393 else: 2394 seq = (pos, None, 1) 2395 else: 2396 seq = convert_seq(seqspec) 2397 2398 if seq is None: 2399 return evaluation.message("StringTake", "mseqs") 2400 2401 start, stop, step = seq 2402 py_slice = python_seq(start, stop, step, len(result)) 2403 2404 if py_slice is None: 2405 return evaluation.message("StringTake", "take", start, stop, string) 2406 2407 return String(result[py_slice]) 2408 2409 def apply_strings(self, strings, spec, evaluation): 2410 "StringTake[strings__, spec_]" 2411 result_list = [] 2412 for string in strings.leaves: 2413 result = self.apply(string, spec, evaluation) 2414 if result is None: 2415 return None 2416 result_list.append(result) 2417 return Expression("List", *result_list) 2418 2419 2420 2421class StringDrop(Builtin): 2422 """ 2423 <dl> 2424 <dt>'StringDrop["$string$", $n$]' 2425 <dd>gives $string$ with the first $n$ characters dropped. 2426 <dt>'StringDrop["$string$", -$n$]' 2427 <dd>gives $string$ with the last $n$ characters dropped. 2428 <dt>'StringDrop["$string$", {$n$}]' 2429 <dd>gives $string$ with the $n$th character dropped. 2430 <dt>'StringDrop["$string$", {$m$, $n$}]' 2431 <dd>gives $string$ with the characters $m$ through $n$ dropped. 2432 </dl> 2433 2434 >> StringDrop["abcde", 2] 2435 = cde 2436 >> StringDrop["abcde", -2] 2437 = abc 2438 >> StringDrop["abcde", {2}] 2439 = acde 2440 >> StringDrop["abcde", {2,3}] 2441 = ade 2442 >> StringDrop["abcd",{3,2}] 2443 = abcd 2444 >> StringDrop["abcd",0] 2445 = abcd 2446 """ 2447 2448 messages = { 2449 "strse": "String expected at position 1.", 2450 "mseqs": "Integer or list of two Integers are expected at position 2.", 2451 "drop": 'Cannot drop positions `1` through `2` in "`3`".', 2452 } 2453 2454 def apply_with_n(self, string, n, evaluation): 2455 "StringDrop[string_,n_Integer]" 2456 if not isinstance(string, String): 2457 return evaluation.message("StringDrop", "strse") 2458 if isinstance(n, Integer): 2459 pos = n.value 2460 if pos > len(string.get_string_value()): 2461 return evaluation.message("StringDrop", "drop", 1, pos, string) 2462 if pos < -len(string.get_string_value()): 2463 return evaluation.message("StringDrop", "drop", pos, -1, string) 2464 if pos > 0: 2465 return String(string.get_string_value()[pos:]) 2466 if pos < 0: 2467 return String(string.get_string_value()[:(pos)]) 2468 if pos == 0: 2469 return string 2470 return evaluation.message("StringDrop", "mseqs") 2471 2472 def apply_with_ni_nf(self, string, ni, nf, evaluation): 2473 "StringDrop[string_,{ni_Integer,nf_Integer}]" 2474 if not isinstance(string, String): 2475 return evaluation.message("StringDrop", "strse", string) 2476 2477 if ni.value == 0 or nf.value == 0: 2478 return evaluation.message("StringDrop", "drop", ni, nf) 2479 fullstring = string.get_string_value() 2480 lenfullstring = len(fullstring) 2481 posi = ni.value 2482 if posi < 0: 2483 posi = lenfullstring + posi + 1 2484 posf = nf.value 2485 if posf < 0: 2486 posf = lenfullstring + posf + 1 2487 if posf > lenfullstring or posi > lenfullstring or posf <= 0 or posi <= 0: 2488 # positions out or range 2489 return evaluation.message("StringDrop", "drop", ni, nf, fullstring) 2490 if posf < posi: 2491 return string # this is what actually mma does 2492 return String(fullstring[: (posi - 1)] + fullstring[posf:]) 2493 2494 def apply_with_ni(self, string, ni, evaluation): 2495 "StringDrop[string_,{ni_Integer}]" 2496 if not isinstance(string, String): 2497 return evaluation.message("StringDrop", "strse", string) 2498 if ni.value == 0: 2499 return evaluation.message("StringDrop", "drop", ni, ni) 2500 fullstring = string.get_string_value() 2501 lenfullstring = len(fullstring) 2502 posi = ni.value 2503 if posi < 0: 2504 posi = lenfullstring + posi + 1 2505 if posi > lenfullstring or posi <= 0: 2506 return evaluation.message("StringDrop", "drop", ni, ni, fullstring) 2507 return String(fullstring[: (posi - 1)] + fullstring[posi:]) 2508 2509 def apply(self, string, something, evaluation): 2510 "StringDrop[string_,something___]" 2511 if not isinstance(string, String): 2512 return evaluation.message("StringDrop", "strse") 2513 return evaluation.message("StringDrop", "mseqs") 2514 2515 2516class HammingDistance(Builtin): 2517 """ 2518 <dl> 2519 <dt>'HammingDistance[$u$, $v$]' 2520 <dd>returns the Hamming distance between $u$ and $v$, i.e. the number of different elements. 2521 $u$ and $v$ may be lists or strings. 2522 </dl> 2523 2524 >> HammingDistance[{1, 0, 1, 0}, {1, 0, 0, 1}] 2525 = 2 2526 2527 >> HammingDistance["time", "dime"] 2528 = 1 2529 2530 >> HammingDistance["TIME", "dime", IgnoreCase -> True] 2531 = 1 2532 """ 2533 2534 messages = { 2535 "idim": "`1` and `2` must be of same length.", 2536 } 2537 2538 options = { 2539 "IgnoreCase": "False", 2540 } 2541 2542 @staticmethod 2543 def _compute(u, v, sameQ, evaluation): 2544 if len(u) != len(v): 2545 evaluation.message("HammingDistance", "idim", u, v) 2546 return None 2547 else: 2548 return Integer(sum(0 if sameQ(x, y) else 1 for x, y in zip(u, v))) 2549 2550 def apply_list(self, u, v, evaluation): 2551 "HammingDistance[u_List, v_List]" 2552 return HammingDistance._compute( 2553 u.leaves, v.leaves, lambda x, y: x.sameQ(y), evaluation 2554 ) 2555 2556 def apply_string(self, u, v, evaluation, options): 2557 "HammingDistance[u_String, v_String, OptionsPattern[HammingDistance]]" 2558 ignore_case = self.get_option(options, "IgnoreCase", evaluation) 2559 py_u = u.get_string_value() 2560 py_v = v.get_string_value() 2561 if ignore_case and ignore_case.is_true(): 2562 py_u = py_u.lower() 2563 py_v = py_v.lower() 2564 return HammingDistance._compute(py_u, py_v, lambda x, y: x == y, evaluation) 2565 2566 2567class _StringDistance(Builtin): 2568 options = {"IgnoreCase": "False"} 2569 2570 def apply(self, a, b, evaluation, options): 2571 "%(name)s[a_, b_, OptionsPattern[%(name)s]]" 2572 if isinstance(a, String) and isinstance(b, String): 2573 py_a = a.get_string_value() 2574 py_b = b.get_string_value() 2575 if options["System`IgnoreCase"] == SymbolTrue: 2576 if hasattr(str, "casefold"): 2577 2578 def normalize(c): 2579 return unicodedata.normalize("NFKD", c.casefold()) 2580 2581 py_a = [normalize(c) for c in py_a] 2582 py_b = [normalize(c) for c in py_b] 2583 else: # python2, PyPy 2584 py_a = py_a.lower() 2585 py_b = py_b.lower() 2586 return Integer(self._distance(py_a, py_b, lambda u, v: u == v)) 2587 elif a.get_head_name() == "System`List" and b.get_head_name() == "System`List": 2588 return Integer(self._distance(a.leaves, b.leaves, lambda u, v: u.sameQ(v))) 2589 else: 2590 return Expression("EditDistance", a, b) 2591 2592 2593# Levenshtein's algorithm is defined by the following construction: 2594# (adapted from https://de.wikipedia.org/wiki/Levenshtein-Distanz) 2595# 2596# given two strings s1, s2, we build a matrix D sized (len(s1) + 1, 2597# len(s2) + 1) and fill it using the following rules: 2598# 2599# (1) D(0, 0) = 0 2600# (2) D(i, 0) = i, 1 <= i <= len(s1) 2601# (3) D(0, j) = j, 1 <= j <= len(s2) 2602# (4) D(i, j) = minimum of 2603# D(i - 1, j - 1) + 0 if s1(j) = s2(j) 2604# D(i - 1, j - 1) + 1 (substitution) 2605# D(i, j - 1) + 1 (insertion) 2606# D(i - 1, j) + 1 (deletion) 2607# 2608# The computed distance will be in D(len(s1) + 1, len(s2) + 1). 2609# 2610# note: double brackets indicate 1-based indices below, e.g. s1[[1]] 2611 2612 2613def _one_based(l): # makes an enumerated generator 1-based 2614 return ((i + 1, x) for i, x in l) 2615 2616 2617def _prev_curr(l): # yields pairs of (x[i - 1], x[i]) for i in 1, 2, ... 2618 prev = None 2619 for curr in l: 2620 yield prev, curr 2621 prev = curr 2622 2623 2624def _levenshtein_d0(s2): # compute D(0, ...) 2625 return list(range(len(s2) + 1)) # see (1), (3) 2626 2627 2628def _levenshtein_di(c1, s2, i, d_prev, sameQ, cost): # compute one new row 2629 # given c1 = s1[i], s2, i, d_prev = D(i - 1, ...), compute D(i, ...) 2630 2631 yield i # start with D(i, 0) = i, see (2) 2632 d_curr_prev_j = i # d_curr_prev_j stores D(i, j - 1) 2633 2634 for j, c2 in _one_based(enumerate(s2)): # c2 = s2[[j]] 2635 cond = 0 if sameQ(c1, c2) else cost 2636 2637 d_curr_j = min( # see (4) 2638 d_prev[j - 1] + cond, # D(i - 1, j - 1) + cond; substitution 2639 d_curr_prev_j + 1, # D(i, j - 1) + 1; insertion 2640 d_prev[j] + 1, 2641 ) # D(i - 1, j) + 1; deletion 2642 2643 yield d_curr_j 2644 d_curr_prev_j = d_curr_j 2645 2646 2647def _levenshtein(s1, s2, sameQ: Callable[..., bool]): 2648 d_prev = _levenshtein_d0(s2) 2649 for i, c1 in _one_based(enumerate(s1)): # c1 = s1[[i]] 2650 d_prev = list(_levenshtein_di(c1, s2, i, d_prev, sameQ, 1)) 2651 return d_prev[-1] 2652 2653 2654def _damerau_levenshtein(s1, s2, sameQ: Callable[..., bool]): 2655 # _damerau_levenshtein works like _levenshtein, except for one additional 2656 # rule covering transposition: 2657 # 2658 # if i > 1 and j > 1 and a[i] == b[j - 1] and a[i - 1] == b[j] then 2659 # D(i, j) = minimum(D(i, j), D(i - 2, j - 2) + transposition_cost) 2660 2661 def row(d_prev_prev, d_prev, i, prev_c1, c1, cost): 2662 # given c1 = s1[i], d_prev_prev = D(i - 2), d_prev = D(i - 1), 2663 # prev_c1 = s1[[i - 1]], c1 = s1[[i]], compute D(i, ...) 2664 for j, d_curr_j in enumerate(_levenshtein_di(c1, s2, i, d_prev, sameQ, cost)): 2665 if i > 1 and j > 1: 2666 if sameQ(c1, s2[j - 2]) and sameQ(prev_c1, s2[j - 1]): # transposition? 2667 # i.e. if s1[[i]] = s2[[j-1]] and s1[[i-1]] = s2[[j]] 2668 d_curr_j = min(d_curr_j, d_prev_prev[j - 2] + cost) 2669 yield d_curr_j 2670 2671 d_prev_prev = None 2672 d_prev = _levenshtein_d0(s2) 2673 for i, (prev_c1, c1) in _one_based(enumerate(_prev_curr(s1))): 2674 d_curr = list(row(d_prev_prev, d_prev, i, prev_c1, c1, 1)) 2675 d_prev_prev = d_prev 2676 d_prev = d_curr 2677 2678 return d_prev[-1] 2679 2680 2681def _levenshtein_like_or_border_cases(s1, s2, sameQ: Callable[..., bool], compute): 2682 if len(s1) == len(s2) and all(sameQ(c1, c2) for c1, c2 in zip(s1, s2)): 2683 return 0 2684 2685 if len(s1) < len(s2): 2686 s1, s2 = s2, s1 2687 2688 if len(s2) == 0: 2689 return len(s1) 2690 2691 return compute(s1, s2, sameQ) 2692 2693 2694class EditDistance(_StringDistance): 2695 """ 2696 <dl> 2697 <dt>'EditDistance[$a$, $b$]' 2698 <dd>returns the Levenshtein distance of $a$ and $b$, which is defined as the minimum number of 2699 insertions, deletions and substitutions on the constituents of $a$ and $b$ needed to transform 2700 one into the other. 2701 </dl> 2702 2703 >> EditDistance["kitten", "kitchen"] 2704 = 2 2705 2706 >> EditDistance["abc", "ac"] 2707 = 1 2708 2709 >> EditDistance["abc", "acb"] 2710 = 2 2711 2712 >> EditDistance["azbc", "abxyc"] 2713 = 3 2714 2715 The IgnoreCase option makes EditDistance ignore the case of letters: 2716 >> EditDistance["time", "Thyme"] 2717 = 3 2718 2719 >> EditDistance["time", "Thyme", IgnoreCase -> True] 2720 = 2 2721 2722 EditDistance also works on lists: 2723 >> EditDistance[{1, E, 2, Pi}, {1, E, Pi, 2}] 2724 = 2 2725 """ 2726 2727 def _distance(self, s1, s2, sameQ: Callable[..., bool]): 2728 return _levenshtein_like_or_border_cases(s1, s2, sameQ, _levenshtein) 2729 2730 2731class DamerauLevenshteinDistance(_StringDistance): 2732 """ 2733 <dl> 2734 <dt>'DamerauLevenshteinDistance[$a$, $b$]' 2735 <dd>returns the Damerau-Levenshtein distance of $a$ and $b$, which is defined as the minimum number of 2736 transpositions, insertions, deletions and substitutions needed to transform one into the other. 2737 In contrast to EditDistance, DamerauLevenshteinDistance counts transposition of adjacent items (e.g. 2738 "ab" into "ba") as one operation of change. 2739 </dl> 2740 2741 >> DamerauLevenshteinDistance["kitten", "kitchen"] 2742 = 2 2743 2744 >> DamerauLevenshteinDistance["abc", "ac"] 2745 = 1 2746 2747 >> DamerauLevenshteinDistance["abc", "acb"] 2748 = 1 2749 2750 >> DamerauLevenshteinDistance["azbc", "abxyc"] 2751 = 3 2752 2753 The IgnoreCase option makes DamerauLevenshteinDistance ignore the case of letters: 2754 >> DamerauLevenshteinDistance["time", "Thyme"] 2755 = 3 2756 2757 >> DamerauLevenshteinDistance["time", "Thyme", IgnoreCase -> True] 2758 = 2 2759 2760 DamerauLevenshteinDistance also works on lists: 2761 >> DamerauLevenshteinDistance[{1, E, 2, Pi}, {1, E, Pi, 2}] 2762 = 1 2763 """ 2764 2765 def _distance(self, s1, s2, sameQ: Callable[..., bool]): 2766 return _levenshtein_like_or_border_cases(s1, s2, sameQ, _damerau_levenshtein) 2767 2768 2769class RemoveDiacritics(Builtin): 2770 """ 2771 <dl> 2772 <dt>'RemoveDiacritics[$s$]' 2773 <dd>returns a version of $s$ with all diacritics removed. 2774 </dl> 2775 2776 >> RemoveDiacritics["en prononçant pêcher et pécher"] 2777 = en prononcant pecher et pecher 2778 2779 >> RemoveDiacritics["piñata"] 2780 = pinata 2781 """ 2782 2783 def apply(self, s, evaluation): 2784 "RemoveDiacritics[s_String]" 2785 return String( 2786 unicodedata.normalize("NFKD", s.get_string_value()) 2787 .encode("ascii", "ignore") 2788 .decode("ascii") 2789 ) 2790 2791 2792class Transliterate(Builtin): 2793 """ 2794 <dl> 2795 <dt>'Transliterate[$s$]' 2796 <dd>transliterates a text in some script into an ASCII string. 2797 </dl> 2798 2799 # The following examples were taken from 2800 # https://en.wikipedia.org/wiki/Iliad, 2801 # https://en.wikipedia.org/wiki/Russian_language, and 2802 # https://en.wikipedia.org/wiki/Hiragana 2803 2804 >> Transliterate["μήτηρ γάρ τέ μέ φησι θεὰ Θέτις ἀργυρόπεζα"] 2805 = meter gar te me phesi thea Thetis arguropeza 2806 2807 >> Transliterate["Алекса́ндр Пу́шкин"] 2808 = Aleksandr Pushkin 2809 2810 >> Transliterate["つかう"] 2811 = tsukau 2812 """ 2813 2814 requires = ("unidecode",) 2815 2816 def apply(self, s, evaluation): 2817 "Transliterate[s_String]" 2818 from unidecode import unidecode 2819 2820 return String(unidecode(s.get_string_value())) 2821 2822 2823class StringTrim(Builtin): 2824 """ 2825 <dl> 2826 <dt>'StringTrim[$s$]' 2827 <dd>returns a version of $s$ with whitespace removed from start and end. 2828 </dl> 2829 2830 >> StringJoin["a", StringTrim[" \\tb\\n "], "c"] 2831 = abc 2832 2833 >> StringTrim["ababaxababyaabab", RegularExpression["(ab)+"]] 2834 = axababya 2835 """ 2836 2837 def apply(self, s, evaluation): 2838 "StringTrim[s_String]" 2839 return String(s.get_string_value().strip(" \t\n")) 2840 2841 def apply_pattern(self, s, patt, expression, evaluation): 2842 "StringTrim[s_String, patt_]" 2843 text = s.get_string_value() 2844 if not text: 2845 return s 2846 2847 py_patt = to_regex(patt, evaluation) 2848 if py_patt is None: 2849 return evaluation.message("StringExpression", "invld", patt, expression) 2850 2851 if not py_patt.startswith(r"\A"): 2852 left_patt = r"\A" + py_patt 2853 else: 2854 left_patt = py_patt 2855 2856 if not py_patt.endswith(r"\Z"): 2857 right_patt = py_patt + r"\Z" 2858 else: 2859 right_patt = py_patt 2860 2861 m = re.search(left_patt, text) 2862 left = m.end(0) if m else 0 2863 2864 m = re.search(right_patt, text) 2865 right = m.start(0) if m else len(text) 2866 2867 return String(text[left:right]) 2868 2869 2870class StringInsert(Builtin): 2871 """ 2872 <dl> 2873 <dt>'StringInsert["$string$", "$snew$", $n$]' 2874 <dd>yields a string with $snew$ inserted starting at position $n$ in $string$. 2875 2876 <dt>'StringInsert["$string$", "$snew$", -$n$]' 2877 <dd>inserts a at position $n$ from the end of "$string$". 2878 2879 <dt>'StringInsert["$string$", "$snew$", {$n_1$, $n_2$, ...}]' 2880 <dd>inserts a copy of $snew$ at each position $n_i$ in $string$; 2881 the $n_i$ are taken before any insertion is done. 2882 2883 <dt>'StringInsert[{$s_1$, $s_2$, ...}, "$snew$", $n$]' 2884 <dd>gives the list of resutls for each of the $s_i$. 2885 </dl> 2886 2887 >> StringInsert["noting", "h", 4] 2888 = nothing 2889 2890 #> StringInsert["abcdefghijklm", "X", 15] 2891 : Cannot insert at position 15 in abcdefghijklm. 2892 = StringInsert[abcdefghijklm, X, 15] 2893 2894 #> StringInsert[abcdefghijklm, "X", 4] 2895 : String or list of strings expected at position 1 in StringInsert[abcdefghijklm, X, 4]. 2896 = StringInsert[abcdefghijklm, X, 4] 2897 2898 #> StringInsert["abcdefghijklm", X, 4] 2899 : String expected at position 2 in StringInsert[abcdefghijklm, X, 4]. 2900 = StringInsert[abcdefghijklm, X, 4] 2901 2902 #> StringInsert["abcdefghijklm", "X", a] 2903 : Position specification a in StringInsert[abcdefghijklm, X, a] is not a machine-sized integer or a list of machine-sized integers. 2904 = StringInsert[abcdefghijklm, X, a] 2905 2906 #> StringInsert["abcdefghijklm", "X", 0] 2907 : Cannot insert at position 0 in abcdefghijklm. 2908 = StringInsert[abcdefghijklm, X, 0] 2909 2910 >> StringInsert["note", "d", -1] 2911 = noted 2912 2913 >> StringInsert["here", "t", -5] 2914 = there 2915 2916 #> StringInsert["abcdefghijklm", "X", -15] 2917 : Cannot insert at position -15 in abcdefghijklm. 2918 = StringInsert[abcdefghijklm, X, -15] 2919 2920 >> StringInsert["adac", "he", {1, 5}] 2921 = headache 2922 2923 #> StringInsert["abcdefghijklm", "X", {1, -1, 14, -14}] 2924 = XXabcdefghijklmXX 2925 2926 #> StringInsert["abcdefghijklm", "X", {1, 0}] 2927 : Cannot insert at position 0 in abcdefghijklm. 2928 = StringInsert[abcdefghijklm, X, {1, 0}] 2929 2930 #> StringInsert["", "X", {1}] 2931 = X 2932 2933 #> StringInsert["", "X", {1, -1}] 2934 = XX 2935 2936 #> StringInsert["", "", {1}] 2937 = #<--# 2938 2939 #> StringInsert["", "X", {1, 2}] 2940 : Cannot insert at position 2 in . 2941 = StringInsert[, X, {1, 2}] 2942 2943 #> StringInsert["abcdefghijklm", "", {1, 2, 3, 4 ,5, -6}] 2944 = abcdefghijklm 2945 2946 #> StringInsert["abcdefghijklm", "X", {}] 2947 = abcdefghijklm 2948 2949 >> StringInsert[{"something", "sometimes"}, " ", 5] 2950 = {some thing, some times} 2951 2952 #> StringInsert[{"abcdefghijklm", "Mathics"}, "X", 13] 2953 : Cannot insert at position 13 in Mathics. 2954 = {abcdefghijklXm, StringInsert[Mathics, X, 13]} 2955 2956 #> StringInsert[{"", ""}, "", {1, 1, 1, 1}] 2957 = {, } 2958 2959 #> StringInsert[{"abcdefghijklm", "Mathics"}, "X", {0, 2}] 2960 : Cannot insert at position 0 in abcdefghijklm. 2961 : Cannot insert at position 0 in Mathics. 2962 = {StringInsert[abcdefghijklm, X, {0, 2}], StringInsert[Mathics, X, {0, 2}]} 2963 2964 #> StringInsert[{"abcdefghijklm", Mathics}, "X", {1, 2}] 2965 : String or list of strings expected at position 1 in StringInsert[{abcdefghijklm, Mathics}, X, {1, 2}]. 2966 = StringInsert[{abcdefghijklm, Mathics}, X, {1, 2}] 2967 2968 #> StringInsert[{"", "Mathics"}, "X", {1, 1, -1}] 2969 = {XXX, XXMathicsX} 2970 2971 >> StringInsert["1234567890123456", ".", Range[-16, -4, 3]] 2972 = 1.234.567.890.123.456""" 2973 2974 messages = { 2975 "strse": "String or list of strings expected at position `1` in `2`.", 2976 "string": "String expected at position `1` in `2`.", 2977 "ins": "Cannot insert at position `1` in `2`.", 2978 "psl": "Position specification `1` in `2` is not a machine-sized integer or a list of machine-sized integers.", 2979 } 2980 2981 def _insert(self, str, add, lpos, evaluation): 2982 for pos in lpos: 2983 if abs(pos) < 1 or abs(pos) > len(str) + 1: 2984 evaluation.message("StringInsert", "ins", Integer(pos), String(str)) 2985 return evaluation.format_output( 2986 Expression( 2987 "StringInsert", str, add, lpos[0] if len(lpos) == 1 else lpos 2988 ) 2989 ) 2990 2991 # Create new list of position which are rearranged 2992 pos_limit = len(str) + 2 2993 listpos = [p if p > 0 else pos_limit + p for p in lpos] 2994 listpos.sort() 2995 2996 result = "" 2997 start = 0 2998 for pos in listpos: 2999 stop = pos - 1 3000 result += str[start:stop] + add 3001 start = stop 3002 else: 3003 result += str[start : len(str)] 3004 3005 return result 3006 3007 def apply(self, strsource, strnew, pos, evaluation): 3008 "StringInsert[strsource_, strnew_, pos_]" 3009 3010 exp = Expression("StringInsert", strsource, strnew, pos) 3011 3012 py_strnew = strnew.get_string_value() 3013 if py_strnew is None: 3014 return evaluation.message("StringInsert", "string", Integer(2), exp) 3015 3016 # Check and create list of position 3017 listpos = [] 3018 if pos.has_form("List", None): 3019 leaves = pos.get_leaves() 3020 if not leaves: 3021 return strsource 3022 else: 3023 for i, posi in enumerate(leaves): 3024 py_posi = posi.get_int_value() 3025 if py_posi is None: 3026 return evaluation.message("StringInsert", "psl", pos, exp) 3027 listpos.append(py_posi) 3028 else: 3029 py_pos = pos.get_int_value() 3030 if py_pos is None: 3031 return evaluation.message("StringInsert", "psl", pos, exp) 3032 listpos.append(py_pos) 3033 3034 # Check and perform the insertion 3035 if strsource.has_form("List", None): 3036 py_strsource = [sub.get_string_value() for sub in strsource.leaves] 3037 if any(sub is None for sub in py_strsource): 3038 return evaluation.message("StringInsert", "strse", Integer1, exp) 3039 return Expression( 3040 "List", 3041 *[ 3042 String(self._insert(s, py_strnew, listpos, evaluation)) 3043 for s in py_strsource 3044 ] 3045 ) 3046 else: 3047 py_strsource = strsource.get_string_value() 3048 if py_strsource is None: 3049 return evaluation.message("StringInsert", "strse", Integer1, exp) 3050 return String(self._insert(py_strsource, py_strnew, listpos, evaluation)) 3051 3052 3053def _pattern_search(name, string, patt, evaluation, options, matched): 3054 # Get the pattern list and check validity for each 3055 if patt.has_form("List", None): 3056 patts = patt.get_leaves() 3057 else: 3058 patts = [patt] 3059 re_patts = [] 3060 for p in patts: 3061 py_p = to_regex(p, evaluation) 3062 if py_p is None: 3063 return evaluation.message("StringExpression", "invld", p, patt) 3064 re_patts.append(py_p) 3065 3066 flags = re.MULTILINE 3067 if options["System`IgnoreCase"] == SymbolTrue: 3068 flags = flags | re.IGNORECASE 3069 3070 def _search(patts, str, flags, matched): 3071 if any(re.search(p, str, flags=flags) for p in patts): 3072 return SymbolTrue if matched else SymbolFalse 3073 return SymbolFalse if matched else SymbolTrue 3074 3075 # Check string validity and perform regex searchhing 3076 if string.has_form("List", None): 3077 py_s = [s.get_string_value() for s in string.leaves] 3078 if any(s is None for s in py_s): 3079 return evaluation.message( 3080 name, "strse", Integer1, Expression(name, string, patt) 3081 ) 3082 return Expression(SymbolList, *[_search(re_patts, s, flags, matched) for s in py_s]) 3083 else: 3084 py_s = string.get_string_value() 3085 if py_s is None: 3086 return evaluation.message( 3087 name, "strse", Integer1, Expression(name, string, patt) 3088 ) 3089 return _search(re_patts, py_s, flags, matched) 3090 3091 3092class StringContainsQ(Builtin): 3093 """ 3094 <dl> 3095 <dt>'StringContainsQ["$string$", $patt$]' 3096 <dd>returns True if any part of $string$ matches $patt$, and returns False otherwise. 3097 <dt>'StringContainsQ[{"s1", "s2", ...}, patt]' 3098 <dd>returns the list of results for each element of string list. 3099 <dt>'StringContainsQ[patt]' 3100 <dd>represents an operator form of StringContainsQ that can be applied to an expression. 3101 </dl> 3102 3103 >> StringContainsQ["mathics", "m" ~~ __ ~~ "s"] 3104 = True 3105 3106 >> StringContainsQ["mathics", "a" ~~ __ ~~ "m"] 3107 = False 3108 3109 #> StringContainsQ["Hello", "o"] 3110 = True 3111 3112 #> StringContainsQ["a"]["abcd"] 3113 = True 3114 3115 #> StringContainsQ["Mathics", "ma", IgnoreCase -> False] 3116 = False 3117 3118 >> StringContainsQ["Mathics", "MA" , IgnoreCase -> True] 3119 = True 3120 3121 #> StringContainsQ["", "Empty String"] 3122 = False 3123 3124 #> StringContainsQ["", ___] 3125 = True 3126 3127 #> StringContainsQ["Empty Pattern", ""] 3128 = True 3129 3130 #> StringContainsQ[notastring, "n"] 3131 : String or list of strings expected at position 1 in StringContainsQ[notastring, n]. 3132 = StringContainsQ[notastring, n] 3133 3134 #> StringContainsQ["Welcome", notapattern] 3135 : Element notapattern is not a valid string or pattern element in notapattern. 3136 = StringContainsQ[Welcome, notapattern] 3137 3138 >> StringContainsQ[{"g", "a", "laxy", "universe", "sun"}, "u"] 3139 = {False, False, False, True, True} 3140 3141 #> StringContainsQ[{}, "list of string is empty"] 3142 = {} 3143 3144 >> StringContainsQ["e" ~~ ___ ~~ "u"] /@ {"The Sun", "Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"} 3145 = {True, True, True, False, False, False, False, False, True} 3146 3147 ## special cases, Mathematica allows list of patterns 3148 #> StringContainsQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}] 3149 = {False, False, True, True, False} 3150 3151 #> StringContainsQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}, IgnoreCase -> True] 3152 = {False, False, True, True, True} 3153 3154 #> StringContainsQ[{"A", "Galaxy", "Far", "Far", "Away"}, {}] 3155 = {False, False, False, False, False} 3156 3157 #> StringContainsQ[{"A", Galaxy, "Far", "Far", Away}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}] 3158 : String or list of strings expected at position 1 in StringContainsQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}]. 3159 = StringContainsQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}] 3160 3161 #> StringContainsQ[{"A", "Galaxy", "Far", "Far", "Away"}, {F ~~ __ ~~ "r", aw ~~ ___}] 3162 : Element F ~~ __ ~~ r is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}. 3163 = StringContainsQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}] 3164 ## Mathematica can detemine correct invalid element in the pattern, it reports error: 3165 ## Element F is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}. 3166 """ 3167 3168 options = { 3169 "IgnoreCase": "False", 3170 } 3171 3172 rules = { 3173 "StringContainsQ[patt_][expr_]": "StringContainsQ[expr, patt]", 3174 } 3175 3176 messages = { 3177 "strse": "String or list of strings expected at position `1` in `2`.", 3178 } 3179 3180 def apply(self, string, patt, evaluation, options): 3181 "StringContainsQ[string_, patt_, OptionsPattern[%(name)s]]" 3182 return _pattern_search( 3183 self.__class__.__name__, string, patt, evaluation, options, True 3184 ) 3185 3186 3187class StringFreeQ(Builtin): 3188 """ 3189 <dl> 3190 <dt>'StringFreeQ["$string$", $patt$]' 3191 <dd>returns True if no substring in $string$ matches the string expression $patt$, and returns False otherwise. 3192 <dt>'StringFreeQ[{"s1", "s2", ...}, patt]' 3193 <dd>returns the list of results for each element of string list. 3194 <dt>'StringFreeQ["string", {p1, p2, ...}]' 3195 <dd>returns True if no substring matches any of the $pi$. 3196 <dt>'StringFreeQ[patt]' 3197 <dd>represents an operator form of StringFreeQ that can be applied to an expression. 3198 </dl> 3199 3200 >> StringFreeQ["mathics", "m" ~~ __ ~~ "s"] 3201 = False 3202 3203 >> StringFreeQ["mathics", "a" ~~ __ ~~ "m"] 3204 = True 3205 3206 #> StringFreeQ["Hello", "o"] 3207 = False 3208 3209 #> StringFreeQ["a"]["abcd"] 3210 = False 3211 3212 #> StringFreeQ["Mathics", "ma", IgnoreCase -> False] 3213 = True 3214 3215 >> StringFreeQ["Mathics", "MA" , IgnoreCase -> True] 3216 = False 3217 3218 #> StringFreeQ["", "Empty String"] 3219 = True 3220 3221 #> StringFreeQ["", ___] 3222 = False 3223 3224 #> StringFreeQ["Empty Pattern", ""] 3225 = False 3226 3227 #> StringFreeQ[notastring, "n"] 3228 : String or list of strings expected at position 1 in StringFreeQ[notastring, n]. 3229 = StringFreeQ[notastring, n] 3230 3231 #> StringFreeQ["Welcome", notapattern] 3232 : Element notapattern is not a valid string or pattern element in notapattern. 3233 = StringFreeQ[Welcome, notapattern] 3234 3235 >> StringFreeQ[{"g", "a", "laxy", "universe", "sun"}, "u"] 3236 = {True, True, True, False, False} 3237 3238 #> StringFreeQ[{}, "list of string is empty"] 3239 = {} 3240 3241 >> StringFreeQ["e" ~~ ___ ~~ "u"] /@ {"The Sun", "Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"} 3242 = {False, False, False, True, True, True, True, True, False} 3243 3244 #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}] 3245 = {True, True, False, False, True} 3246 3247 >> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}, IgnoreCase -> True] 3248 = {True, True, False, False, False} 3249 3250 #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {}] 3251 = {True, True, True, True, True} 3252 3253 #> StringFreeQ[{"A", Galaxy, "Far", "Far", Away}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}] 3254 : String or list of strings expected at position 1 in StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}]. 3255 = StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}] 3256 3257 #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {F ~~ __ ~~ "r", aw ~~ ___}] 3258 : Element F ~~ __ ~~ r is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}. 3259 = StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}] 3260 ## Mathematica can detemine correct invalid element in the pattern, it reports error: 3261 ## Element F is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}. 3262 """ 3263 3264 options = { 3265 "IgnoreCase": "False", 3266 } 3267 3268 rules = { 3269 "StringFreeQ[patt_][expr_]": "StringFreeQ[expr, patt]", 3270 } 3271 3272 messages = { 3273 "strse": "String or list of strings expected at position `1` in `2`.", 3274 } 3275 3276 def apply(self, string, patt, evaluation, options): 3277 "StringFreeQ[string_, patt_, OptionsPattern[%(name)s]]" 3278 return _pattern_search( 3279 self.__class__.__name__, string, patt, evaluation, options, False 3280 ) 3281 3282 3283class StringRiffle(Builtin): 3284 """ 3285 <dl> 3286 <dt>'StringRiffle[{s1, s2, s3, ...}]' 3287 <dd>returns a new string by concatenating all the $si$, with spaces inserted between them. 3288 <dt>'StringRiffle[list, sep]' 3289 <dd>inserts the separator $sep$ between all elements in $list$. 3290 <dt>'StringRiffle[list, {"left", "sep", "right"}]' 3291 <dd>use $left$ and $right$ as delimiters after concatenation. 3292 3293 ## These 2 forms are not currently implemented 3294 ## <dt>'StringRiffle[{{s11, s12, ...}, {s21, s22, ...}, ...}]' 3295 ## <dd>returns a new string by concatenating the $sij$, and inserting spaces at the lowest level and newlines at the higher level. 3296 ## <dt>'StringRiffle[list, sep1, sep2, ...]' 3297 ## <dd>inserts separator $sepi$ between elements of list at level i. 3298 </dl> 3299 3300 >> StringRiffle[{"a", "b", "c", "d", "e"}] 3301 = a b c d e 3302 3303 #> StringRiffle[{a, b, c, "d", e, "f"}] 3304 = a b c d e f 3305 3306 ## 1st is not a list 3307 #> StringRiffle["abcdef"] 3308 : List expected at position 1 in StringRiffle[abcdef]. 3309 : StringRiffle called with 1 argument; 2 or more arguments are expected. 3310 = StringRiffle[abcdef] 3311 3312 #> StringRiffle[{"", "", ""}] // FullForm 3313 = " " 3314 3315 ## This form is not supported 3316 #> StringRiffle[{{"a", "b"}, {"c", "d"}}] 3317 : Sublist form in position 1 is is not implemented yet. 3318 = StringRiffle[{{a, b}, {c, d}}] 3319 3320 >> StringRiffle[{"a", "b", "c", "d", "e"}, ", "] 3321 = a, b, c, d, e 3322 3323 #> StringRiffle[{"a", "b", "c", "d", "e"}, sep] 3324 : String expected at position 2 in StringRiffle[{a, b, c, d, e}, sep]. 3325 = StringRiffle[{a, b, c, d, e}, sep] 3326 3327 >> StringRiffle[{"a", "b", "c", "d", "e"}, {"(", " ", ")"}] 3328 = (a b c d e) 3329 3330 #> StringRiffle[{"a", "b", "c", "d", "e"}, {" ", ")"}] 3331 : String expected at position 2 in StringRiffle[{a, b, c, d, e}, { , )}]. 3332 = StringRiffle[{a, b, c, d, e}, { , )}] 3333 #> StringRiffle[{"a", "b", "c", "d", "e"}, {left, " ", "."}] 3334 : String expected at position 2 in StringRiffle[{a, b, c, d, e}, {left, , .}]. 3335 = StringRiffle[{a, b, c, d, e}, {left, , .}] 3336 3337 ## This form is not supported 3338 #> StringRiffle[{"a", "b", "c"}, "+", "-"] 3339 ## Mathematica result: a+b+c, but we are not support multiple separators 3340 : Multiple separators form is not implemented yet. 3341 = StringRiffle[{a, b, c}, +, -] 3342 """ 3343 3344 attributes = ("ReadProtected",) 3345 3346 messages = { 3347 "list": "List expected at position `1` in `2`.", 3348 "argmu": "StringRiffle called with 1 argument; 2 or more arguments are expected.", 3349 "argm": "StringRiffle called with 0 arguments; 2 or more arguments are expected.", 3350 "string": "String expected at position `1` in `2`.", 3351 "sublist": "Sublist form in position 1 is is not implemented yet.", 3352 "mulsep": "Multiple separators form is not implemented yet.", 3353 } 3354 3355 def apply(self, liststr, seps, evaluation): 3356 "StringRiffle[liststr_, seps___]" 3357 separators = seps.get_sequence() 3358 exp = ( 3359 Expression("StringRiffle", liststr, seps) 3360 if separators 3361 else Expression("StringRiffle", liststr) 3362 ) 3363 3364 # Validate separators 3365 if len(separators) > 1: 3366 return evaluation.message("StringRiffle", "mulsep") 3367 elif len(separators) == 1: 3368 if separators[0].has_form("List", None): 3369 if len(separators[0].leaves) != 3 or any( 3370 not isinstance(s, String) for s in separators[0].leaves 3371 ): 3372 return evaluation.message("StringRiffle", "string", Integer(2), exp) 3373 elif not isinstance(separators[0], String): 3374 return evaluation.message("StringRiffle", "string", Integer(2), exp) 3375 3376 # Validate list of string 3377 if not liststr.has_form("List", None): 3378 evaluation.message("StringRiffle", "list", Integer1, exp) 3379 return evaluation.message("StringRiffle", "argmu", exp) 3380 elif any(leaf.has_form("List", None) for leaf in liststr.leaves): 3381 return evaluation.message("StringRiffle", "sublist") 3382 3383 # Determine the separation token 3384 left, right = "", "" 3385 if len(separators) == 0: 3386 sep = " " 3387 else: 3388 if separators[0].has_form("List", None): 3389 left = separators[0].leaves[0].value 3390 sep = separators[0].leaves[1].value 3391 right = separators[0].leaves[2].value 3392 else: 3393 sep = separators[0].get_string_value() 3394 3395 # Getting all together 3396 result = left 3397 for i in range(len(liststr.leaves)): 3398 text = ( 3399 liststr.leaves[i] 3400 .format(evaluation, "System`OutputForm") 3401 .boxes_to_text(evaluation=evaluation) 3402 ) 3403 if i == len(liststr.leaves) - 1: 3404 result += text + right 3405 else: 3406 result += text + sep 3407 3408 return String(result) 3409