1#!/usr/bin/env python
2
3from __future__ import print_function, absolute_import
4
5import sys, os, os.path, re, codecs
6
7BUILD_SOURCE_FILE = os.path.join("src", "lxml", "xmlerror.pxi")
8BUILD_DEF_FILE    = os.path.join("src", "lxml", "includes", "xmlerror.pxd")
9
10if len(sys.argv) < 2 or sys.argv[1].lower() in ('-h', '--help'):
11    print("This script generates the constants in file %s" % BUILD_SOURCE_FILE)
12    print("Call as")
13    print(sys.argv[0], "/path/to/libxml2-doc-dir")
14    sys.exit(len(sys.argv) > 1)
15
16HTML_DIR = os.path.join(sys.argv[1], 'html')
17os.stat(HTML_DIR) # raise an error if we can't find it
18
19sys.path.insert(0, 'src')
20from lxml import etree
21
22# map enum name to Python variable name and alignment for constant name
23ENUM_MAP = {
24    'xmlErrorLevel'       : ('__ERROR_LEVELS',  'XML_ERR_'),
25    'xmlErrorDomain'      : ('__ERROR_DOMAINS', 'XML_FROM_'),
26    'xmlParserErrors'     : ('__PARSER_ERROR_TYPES',   'XML_'),
27#    'xmlXPathError'       : ('__XPATH_ERROR_TYPES',   ''),
28#    'xmlSchemaValidError' : ('__XMLSCHEMA_ERROR_TYPES',   'XML_'),
29    'xmlRelaxNGValidErr'  : ('__RELAXNG_ERROR_TYPES',   'XML_'),
30    }
31
32ENUM_ORDER = (
33    'xmlErrorLevel',
34    'xmlErrorDomain',
35    'xmlParserErrors',
36#    'xmlXPathError',
37#    'xmlSchemaValidError',
38    'xmlRelaxNGValidErr')
39
40COMMENT = """
41# This section is generated by the script '%s'.
42
43""" % os.path.basename(sys.argv[0])
44
45def split(lines):
46    lines = iter(lines)
47    pre = []
48    for line in lines:
49        pre.append(line)
50        if line.startswith('#') and "BEGIN: GENERATED CONSTANTS" in line:
51            break
52    pre.append('')
53    for line in lines:
54        if line.startswith('#') and "END: GENERATED CONSTANTS" in line:
55            break
56    post = ['', line]
57    post.extend(lines)
58    post.append('')
59    return pre, post
60
61def regenerate_file(filename, result):
62    # read .pxi source file
63    f = codecs.open(filename, 'r', encoding="utf-8")
64    pre, post = split(f)
65    f.close()
66
67    # write .pxi source file
68    f = codecs.open(filename, 'w', encoding="utf-8")
69    f.write(''.join(pre))
70    f.write(COMMENT)
71    f.write('\n'.join(result))
72    f.write(''.join(post))
73    f.close()
74
75collect_text = etree.XPath("string()")
76find_enums = etree.XPath(
77    "//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]",
78    namespaces = {'html' : 'http://www.w3.org/1999/xhtml'})
79
80def parse_enums(html_dir, html_filename, enum_dict):
81    PARSE_ENUM_NAME  = re.compile(r'\s*enum\s+(\w+)\s*{', re.I).match
82    PARSE_ENUM_VALUE = re.compile(r'\s*=\s+([0-9]+)\s*(?::\s*(.*))?').match
83    tree = etree.parse(os.path.join(html_dir, html_filename))
84    enums = find_enums(tree)
85    for enum in enums:
86        enum_name = PARSE_ENUM_NAME(collect_text(enum))
87        if not enum_name:
88            continue
89        enum_name = enum_name.group(1)
90        if enum_name not in ENUM_MAP:
91            continue
92        print("Found enum", enum_name)
93        entries = []
94        for child in enum:
95            name = child.text
96            match = PARSE_ENUM_VALUE(child.tail)
97            if not match:
98                print("Ignoring enum %s (failed to parse field '%s')" % (
99                        enum_name, name))
100                break
101            value, descr = match.groups()
102            entries.append((name, int(value), descr))
103        else:
104            enum_dict[enum_name] = entries
105    return enum_dict
106
107enum_dict = {}
108parse_enums(HTML_DIR, 'libxml-xmlerror.html',   enum_dict)
109#parse_enums(HTML_DIR, 'libxml-xpath.html',      enum_dict)
110#parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict)
111parse_enums(HTML_DIR, 'libxml-relaxng.html',    enum_dict)
112
113# regenerate source files
114pxi_result = []
115append_pxi = pxi_result.append
116pxd_result = []
117append_pxd = pxd_result.append
118
119append_pxd('cdef extern from "libxml/xmlerror.h":')
120
121ctypedef_indent = ' '*4
122constant_indent = ctypedef_indent*2
123
124for enum_name in ENUM_ORDER:
125    constants = enum_dict[enum_name]
126    pxi_name, prefix = ENUM_MAP[enum_name]
127
128    append_pxd(ctypedef_indent + 'ctypedef enum %s:' % enum_name)
129    append_pxi('cdef object %s = """\\' % pxi_name)
130
131    prefix_len = len(prefix)
132    length = 2  # each string ends with '\n\0'
133    for name, val, descr in constants:
134        if descr and descr != str(val):
135            line = '%-50s = %7d # %s' % (name, val, descr)
136        else:
137            line = '%-50s = %7d' % (name, val)
138        append_pxd(constant_indent + line)
139
140        if name[:prefix_len] == prefix and len(name) > prefix_len:
141            name = name[prefix_len:]
142        line = '%s=%d' % (name, val)
143        append_pxi(line)
144        length += len(line) + 2  # + '\n\0'
145
146    append_pxd('')
147    append_pxi('"""')
148    append_pxi('')
149
150# write source files
151print("Updating file %s" % BUILD_SOURCE_FILE)
152regenerate_file(BUILD_SOURCE_FILE, pxi_result)
153
154print("Updating file %s" % BUILD_DEF_FILE)
155regenerate_file(BUILD_DEF_FILE,    pxd_result)
156
157print("Done")
158