1"""Regexps to match html elements 2""" 3 4import re 5 6attr_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*" 7 8unquoted = "[^\"'=<>`\\x00-\\x20]+" 9single_quoted = "'[^']*'" 10double_quoted = '"[^"]*"' 11 12attr_value = "(?:" + unquoted + "|" + single_quoted + "|" + double_quoted + ")" 13 14attribute = "(?:\\s+" + attr_name + "(?:\\s*=\\s*" + attr_value + ")?)" 15 16open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>" 17 18close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>" 19comment = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->" 20processing = "<[?][\\s\\S]*?[?]>" 21declaration = "<![A-Z]+\\s+[^>]*>" 22cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>" 23 24HTML_TAG_RE = re.compile( 25 "^(?:" 26 + open_tag 27 + "|" 28 + close_tag 29 + "|" 30 + comment 31 + "|" 32 + processing 33 + "|" 34 + declaration 35 + "|" 36 + cdata 37 + ")" 38) 39HTML_OPEN_CLOSE_TAG_STR = "^(?:" + open_tag + "|" + close_tag + ")" 40HTML_OPEN_CLOSE_TAG_RE = re.compile(HTML_OPEN_CLOSE_TAG_STR) 41