1# -*- coding: utf-8 -*- 2# cython: language_level=3, py2_import=True 3# 4# Cython Scanner - Lexical Definitions 5# 6 7from __future__ import absolute_import, unicode_literals 8 9raw_prefixes = "rR" 10bytes_prefixes = "bB" 11string_prefixes = "fFuU" + bytes_prefixes 12char_prefixes = "cC" 13any_string_prefix = raw_prefixes + string_prefixes + char_prefixes 14IDENT = 'IDENT' 15 16 17def make_lexicon(): 18 from ..Plex import \ 19 Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \ 20 TEXT, IGNORE, Method, State, Lexicon, Range 21 22 nonzero_digit = Any("123456789") 23 digit = Any("0123456789") 24 bindigit = Any("01") 25 octdigit = Any("01234567") 26 hexdigit = Any("0123456789ABCDEFabcdef") 27 indentation = Bol + Rep(Any(" \t")) 28 29 # The list of valid unicode identifier characters are pretty slow to generate at runtime, 30 # and require Python3, so are just included directly here 31 # (via the generated code block at the bottom of the file) 32 unicode_start_character = (Any(unicode_start_ch_any) | Range(unicode_start_ch_range)) 33 unicode_continuation_character = ( 34 unicode_start_character | 35 Any(unicode_continuation_ch_any) | Range(unicode_continuation_ch_range)) 36 37 def underscore_digits(d): 38 return Rep1(d) + Rep(Str("_") + Rep1(d)) 39 40 def prefixed_digits(prefix, digits): 41 return prefix + Opt(Str("_")) + underscore_digits(digits) 42 43 decimal = underscore_digits(digit) 44 dot = Str(".") 45 exponent = Any("Ee") + Opt(Any("+-")) + decimal 46 decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal) 47 48 #name = letter + Rep(letter | digit) 49 name = unicode_start_character + Rep(unicode_continuation_character) 50 intconst = (prefixed_digits(nonzero_digit, digit) | # decimal literals with underscores must not start with '0' 51 (Str("0") + (prefixed_digits(Any("Xx"), hexdigit) | 52 prefixed_digits(Any("Oo"), octdigit) | 53 prefixed_digits(Any("Bb"), bindigit) )) | 54 underscore_digits(Str('0')) # 0_0_0_0... is allowed as a decimal literal 55 | Rep1(digit) # FIXME: remove these Py2 style decimal/octal literals (PY_VERSION_HEX < 3) 56 ) 57 intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu"))) 58 intliteral = intconst + intsuffix 59 fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent) 60 imagconst = (intconst | fltconst) + Any("jJ") 61 62 # invalid combinations of prefixes are caught in p_string_literal 63 beginstring = Opt(Rep(Any(string_prefixes + raw_prefixes)) | 64 Any(char_prefixes) 65 ) + (Str("'") | Str('"') | Str("'''") | Str('"""')) 66 two_oct = octdigit + octdigit 67 three_oct = octdigit + octdigit + octdigit 68 two_hex = hexdigit + hexdigit 69 four_hex = two_hex + two_hex 70 escapeseq = Str("\\") + (two_oct | three_oct | 71 Str('N{') + Rep(AnyBut('}')) + Str('}') | 72 Str('u') + four_hex | Str('x') + two_hex | 73 Str('U') + four_hex + four_hex | AnyChar) 74 75 bra = Any("([{") 76 ket = Any(")]}") 77 punct = Any(":,;+-*/|&<>=.%`~^?!@") 78 diphthong = Str("==", "<>", "!=", "<=", ">=", "<<", ">>", "**", "//", 79 "+=", "-=", "*=", "/=", "%=", "|=", "^=", "&=", 80 "<<=", ">>=", "**=", "//=", "->", "@=", "&&", "||") 81 spaces = Rep1(Any(" \t\f")) 82 escaped_newline = Str("\\\n") 83 lineterm = Eol + Opt(Str("\n")) 84 85 comment = Str("#") + Rep(AnyBut("\n")) 86 87 return Lexicon([ 88 (name, Method('normalize_ident')), 89 (intliteral, Method('strip_underscores', symbol='INT')), 90 (fltconst, Method('strip_underscores', symbol='FLOAT')), 91 (imagconst, Method('strip_underscores', symbol='IMAG')), 92 (punct | diphthong, TEXT), 93 94 (bra, Method('open_bracket_action')), 95 (ket, Method('close_bracket_action')), 96 (lineterm, Method('newline_action')), 97 98 (beginstring, Method('begin_string_action')), 99 100 (comment, IGNORE), 101 (spaces, IGNORE), 102 (escaped_newline, IGNORE), 103 104 State('INDENT', [ 105 (comment + lineterm, Method('commentline')), 106 (Opt(spaces) + Opt(comment) + lineterm, IGNORE), 107 (indentation, Method('indentation_action')), 108 (Eof, Method('eof_action')) 109 ]), 110 111 State('SQ_STRING', [ 112 (escapeseq, 'ESCAPE'), 113 (Rep1(AnyBut("'\"\n\\")), 'CHARS'), 114 (Str('"'), 'CHARS'), 115 (Str("\n"), Method('unclosed_string_action')), 116 (Str("'"), Method('end_string_action')), 117 (Eof, 'EOF') 118 ]), 119 120 State('DQ_STRING', [ 121 (escapeseq, 'ESCAPE'), 122 (Rep1(AnyBut('"\n\\')), 'CHARS'), 123 (Str("'"), 'CHARS'), 124 (Str("\n"), Method('unclosed_string_action')), 125 (Str('"'), Method('end_string_action')), 126 (Eof, 'EOF') 127 ]), 128 129 State('TSQ_STRING', [ 130 (escapeseq, 'ESCAPE'), 131 (Rep1(AnyBut("'\"\n\\")), 'CHARS'), 132 (Any("'\""), 'CHARS'), 133 (Str("\n"), 'NEWLINE'), 134 (Str("'''"), Method('end_string_action')), 135 (Eof, 'EOF') 136 ]), 137 138 State('TDQ_STRING', [ 139 (escapeseq, 'ESCAPE'), 140 (Rep1(AnyBut('"\'\n\\')), 'CHARS'), 141 (Any("'\""), 'CHARS'), 142 (Str("\n"), 'NEWLINE'), 143 (Str('"""'), Method('end_string_action')), 144 (Eof, 'EOF') 145 ]), 146 147 (Eof, Method('eof_action')) 148 ], 149 150 # FIXME: Plex 1.9 needs different args here from Plex 1.1.4 151 #debug_flags = scanner_debug_flags, 152 #debug_file = scanner_dump_file 153 ) 154 155 156# BEGIN GENERATED CODE 157# generated with: 158# cpython 3.10.0a0 (heads/master:2b0e654f91, May 29 2020, 16:17:52) 159 160unicode_start_ch_any = ( 161 u"_ªµºˬˮͿΆΌՙەۿܐޱߺࠚࠤࠨऽॐলঽৎৼਫ਼ઽૐૹଽୱஃஜௐఽಀಽೞഽൎලาຄລາຽໆༀဿၡႎჇჍቘዀៗៜᢪᪧᳺὙ" 162 u"ὛὝιⁱⁿℂℇℕℤΩℨⅎⴧⴭⵯꣻꧏꩺꪱꫀꫂיִמּﹱﹳﹷﹹﹻﹽ" 163 u"" 164) 165unicode_start_ch_range = ( 166 u"AZazÀÖØöøˁˆˑˠˤͰʹͶͷͻͽΈΊΎΡΣϵϷҁҊԯԱՖՠֈאתׯײؠيٮٯٱۓۥۦۮۯۺۼܒܯݍޥߊߪߴߵࠀࠕ" 167 u"ࡀࡘࡠࡪࢠࢴࢶࣇऄहक़ॡॱঀঅঌএঐওনপরশহড়ঢ়য়ৡৰৱਅਊਏਐਓਨਪਰਲਲ਼ਵਸ਼ਸਹਖ਼ੜੲੴઅઍએઑઓનપરલળવહ" 168 u"ૠૡଅଌଏଐଓନପରଲଳଵହଡ଼ଢ଼ୟୡஅஊஎஐஒகஙசஞடணதநபமஹఅఌఎఐఒనపహౘౚౠౡಅಌಎಐಒನಪಳವಹೠೡೱೲ" 169 u"ഄഌഎഐഒഺൔൖൟൡൺൿඅඖකනඳරවෆกะเๆກຂຆຊຌຣວະເໄໜໟཀཇཉཬྈྌကဪၐၕၚၝၥၦၮၰၵႁႠჅაჺჼቈ" 170 u"ቊቍቐቖቚቝበኈኊኍነኰኲኵኸኾዂዅወዖዘጐጒጕጘፚᎀᎏᎠᏵᏸᏽᐁᙬᙯᙿᚁᚚᚠᛪᛮᛸᜀᜌᜎᜑᜠᜱᝀᝑᝠᝬᝮᝰកឳᠠᡸᢀᢨ" 171 u"ᢰᣵᤀᤞᥐᥭᥰᥴᦀᦫᦰᧉᨀᨖᨠᩔᬅᬳᭅᭋᮃᮠᮮᮯᮺᯥᰀᰣᱍᱏᱚᱽᲀᲈᲐᲺᲽᲿᳩᳬᳮᳳᳵᳶᴀᶿḀἕἘἝἠὅὈὍὐὗὟώᾀᾴ" 172 u"ᾶᾼῂῄῆῌῐΐῖΊῠῬῲῴῶῼₐₜℊℓ℘ℝKℹℼℿⅅⅉⅠↈⰀⰮⰰⱞⱠⳤⳫⳮⳲⳳⴀⴥⴰⵧⶀⶖⶠⶦⶨⶮⶰⶶⶸⶾⷀⷆⷈⷎⷐⷖ" 173 u"ⷘⷞ々〇〡〩〱〵〸〼ぁゖゝゟァヺーヿㄅㄯㄱㆎㆠㆿㇰㇿ㐀䶿一鿼ꀀꒌꓐꓽꔀꘌꘐꘟꘪꘫꙀꙮꙿꚝꚠꛯꜗꜟꜢꞈꞋꞿꟂꟊꟵꠁꠃꠅꠇꠊ" 174 u"ꠌꠢꡀꡳꢂꢳꣲꣷꣽꣾꤊꤥꤰꥆꥠꥼꦄꦲꧠꧤꧦꧯꧺꧾꨀꨨꩀꩂꩄꩋꩠꩶꩾꪯꪵꪶꪹꪽꫛꫝꫠꫪꫲꫴꬁꬆꬉꬎꬑꬖꬠꬦꬨꬮꬰꭚꭜꭩꭰꯢ" 175 u"가힣ힰퟆퟋퟻ豈舘並龎ffstﬓﬗײַﬨשׁזּטּלּנּסּףּפּצּﮱﯓﱝﱤﴽﵐﶏﶒﷇﷰﷹﹿﻼAZazヲンᅠ하ᅦᅧᅬᅭᅲᅳᅵ" 176 u"" 177 u"" 178 u"" 179 u"" 180 u"" 181 u"" 182 u"" 183) 184unicode_continuation_ch_any = ( 185 u"··়ׇֿٰܑ߽ৗ਼৾ੑੵ઼଼ஂௗ಼ൗ්ූัັ༹༵༷࿆᳭ᢩ៝᳴⁔⵿⃡꙯ꠂ꠆ꠋ꠬ꧥꩃﬞꪰ꫁_" 186 u"" 187) 188unicode_continuation_ch_range = ( 189 u"09ֽׁׂًؚ֑ׅ̀ͯ҃҇ׄؐ٩۪ۭۖۜ۟ۤۧۨ۰۹ܰ݊ަް߀߉࡙࡛࣓ࣣ߫߳ࠖ࠙ࠛࠣࠥࠧࠩ࠭࣡ःऺ़ाॏ॑ॗॢॣ०९ঁঃ" 190 u"াৄেৈো্ৢৣ০৯ਁਃਾੂੇੈੋ੍੦ੱઁઃાૅેૉો્ૢૣ૦૯ૺ૿ଁଃାୄେୈୋ୍୕ୗୢୣ୦୯ாூெைொ்௦௯ఀఄాౄ" 191 u"ెైొ్ౕౖౢౣ౦౯ಁಃಾೄೆೈೊ್ೕೖೢೣ೦೯ഀഃ഻഼ാൄെൈൊ്ൢൣ൦൯ඁඃාුෘෟ෦෯ෲෳำฺ็๎๐๙ຳຼ່ໍ໐໙" 192 u"༘༙༠༩༾༿྄ཱ྆྇ྍྗྙྼါှ၀၉ၖၙၞၠၢၤၧၭၱၴႂႍႏႝ፝፟፩፱ᜒ᜔ᜲ᜴ᝒᝓᝲᝳ឴៓០៩᠋᠍᠐᠙ᤠᤫᤰ᤻᥆᥏᧐᧚" 193 u"ᨗᨛᩕᩞ᩠᩿᩼᪉᪐᪙᪽ᪿᫀ᪰ᬀᬄ᬴᭄᭐᭙᭫᭳ᮀᮂᮡᮭ᮰᮹᯦᯳ᰤ᰷᱀᱉᱐᱙᳔᳨᳐᳒᳷᷹᷿᳹᷀᷻‿⁀⃥゙゚〪〯⃐⃜⃰⳯⳱ⷠⷿ" 194 u"꘠꘩ꙴ꙽ꚞꚟ꛰꛱ꠣꠧꢀꢁꢴꣅ꣐꣙꣠꣱ꣿ꤉ꤦ꤭ꥇ꥓ꦀꦃ꦳꧀꧐꧙꧰꧹ꨩꨶꩌꩍ꩐꩙ꩻꩽꪴꪲꪷꪸꪾ꪿ꫫꫯꫵ꫶ꯣꯪ꯬꯭꯰꯹︀️︠︯" 195 u"︳︴﹍﹏09゙゚" 196 u"" 197 u"" 198 u"" 199) 200 201# END GENERATED CODE 202