1import builtins 2import keyword 3 4 5def matches_any(name, alternates): 6 "Return a named group pattern matching list of alternates." 7 return "(?P<%s>" % name + "|".join(alternates) + ")" 8 9 10KEYWORD = r"\b" + matches_any("keyword", keyword.kwlist) + r"\b" 11_builtinlist = [ 12 str(name) for name in dir(builtins) if not name.startswith("_") and name not in keyword.kwlist 13] 14 15# Not really built-ins, but special names nevertheless 16_builtinlist.append("self") 17_builtinlist.append("cls") 18 19 20# TODO: move builtin handling to global-local 21BUILTIN = r"([^.'\"\\#]\b|^)" + matches_any("builtin", _builtinlist) + r"\b" 22NUMBER = matches_any( 23 "number", 24 [ 25 r"\b0[bB][_0-1]+", 26 r"\b0[oO][_0-7]+", 27 r"\b0[xX][_0-9a-fA-F]+", 28 r"\b(\d[_\d]*(\.[_\d]*)?|\.\d[_\d]*)([eE][+-]?[_\d]+)?j?", 29 ], 30) 31# TODO: would it make regex too slow? VARIABLE = matches_any("VARIABLE", [...]) 32 33METHOD_CALL = matches_any("method_call", [r"(?<=\.)([\w_]+)(?=\()"]) 34FUNCTION_CALL = matches_any("function_call", [r"(?:(?<=^)|(?<=[^._\w]))([\w_]+)(?=\()"]) 35 36COMMENT = matches_any("comment", [r"#[^\n]*"]) 37MAGIC_COMMAND = matches_any("magic", [r"^%[^\n]*"]) # used only in shell 38STRINGPREFIX = r"(\br|u|ur|R|U|UR|Ur|uR|b|B|br|Br|bR|BR|rb|rB|Rb|RB|f|F|fr|Fr|fR|FR|rf|rF|Rf|RF)?" 39 40SQSTRING_OPEN = STRINGPREFIX + r"'[^'\\\n]*(\\.[^'\\\n]*)*\n?" 41SQSTRING_CLOSED = STRINGPREFIX + r"'[^'\\\n]*(\\.[^'\\\n]*)*'" 42 43DQSTRING_OPEN = STRINGPREFIX + r'"[^"\\\n]*(\\.[^"\\\n]*)*\n?' 44DQSTRING_CLOSED = STRINGPREFIX + r'"[^"\\\n]*(\\.[^"\\\n]*)*"' 45 46SQ3STRING = STRINGPREFIX + r"'''[^'\\]*((\\.|'(?!''))[^'\\]*)*(''')?" 47DQ3STRING = STRINGPREFIX + r'"""[^"\\]*((\\.|"(?!""))[^"\\]*)*(""")?' 48 49SQ3DELIMITER = STRINGPREFIX + "'''" 50DQ3DELIMITER = STRINGPREFIX + '"""' 51COMMENT_WITH_Q3DELIMITER = matches_any("q3comment", [r"#[^\n]*('''|\"\"\")[^\n]*"]) 52 53STRING_OPEN = matches_any("open_string", [SQSTRING_OPEN, DQSTRING_OPEN]) 54STRING_CLOSED = matches_any("string", [SQSTRING_CLOSED, DQSTRING_CLOSED]) 55STRING3_DELIMITER = matches_any("DELIMITER3", [SQ3DELIMITER, DQ3DELIMITER]) 56STRING3 = matches_any("string3", [DQ3STRING, SQ3STRING]) 57 58TAB = matches_any("tab", "\t") 59