1"""Regexps to match html elements
2"""
3
4import re
5
6attr_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
7
8unquoted = "[^\"'=<>`\\x00-\\x20]+"
9single_quoted = "'[^']*'"
10double_quoted = '"[^"]*"'
11
12attr_value = "(?:" + unquoted + "|" + single_quoted + "|" + double_quoted + ")"
13
14attribute = "(?:\\s+" + attr_name + "(?:\\s*=\\s*" + attr_value + ")?)"
15
16open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>"
17
18close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>"
19comment = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"
20processing = "<[?][\\s\\S]*?[?]>"
21declaration = "<![A-Z]+\\s+[^>]*>"
22cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
23
24HTML_TAG_RE = re.compile(
25    "^(?:"
26    + open_tag
27    + "|"
28    + close_tag
29    + "|"
30    + comment
31    + "|"
32    + processing
33    + "|"
34    + declaration
35    + "|"
36    + cdata
37    + ")"
38)
39HTML_OPEN_CLOSE_TAG_STR = "^(?:" + open_tag + "|" + close_tag + ")"
40HTML_OPEN_CLOSE_TAG_RE = re.compile(HTML_OPEN_CLOSE_TAG_STR)
41