1#!/usr/bin/env python3
2#
3#  This file is a modification of the `mkdoc.py` script in pybind11:
4#      - File: https://github.com/pybind/pybind11/blob/master/tools/mkdoc.py
5#      - License: https://github.com/pybind/pybind11/blob/master/LICENSE
6#
7#  Where it differs:
8#  - process_comment: added substitutions.  cheap hack: turn all rst into markdown ```
9#        - Not necessarily ideal, but at least doesn't break the tables
10#
11#        s = re.sub(r'\\class\s+%s.*' % cpp_group, '', s)   # removes \class Thing file.h nanogui/file.h
12#        s = re.sub(r'\\struct\s+%s.*' % cpp_group, '', s)  # removes \struct Thing file.h nanogui/file.h
13#        s = re.sub(r'\\rst', r'```\n\n', s)
14#        s = re.sub(r'\\endrst', r'```\n\n', s)
15#        s = re.sub(r'.. note::', r'Note:', s)
16#        s = re.sub(r'.. warning::', r'Warning:', s)
17#        s = re.sub(r'.. code-block::\s*\w*', r'', s)
18#
19#  - process_comment: near the end, have to treat .. code-block:: specially
20#
21#        for line in x.splitlines():
22#            if len(line) < 4:
23#                result += line.strip()
24#            else:
25#                # this is a .. code-block:: indentation (three spaces)
26#                if line.startswith('   ') and line[3] != ' ':
27#                    result += line[3:].strip() + '\n'
28#                else:
29#                    result += line.strip() + '\n'
30#
31#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
32#
33#  Extract documentation from C++ header files to use it in Python bindings
34#
35
36import os
37import sys
38import platform
39import re
40import textwrap
41
42# This file is tailored to the NanoGUI documentation build system, the clang
43# module required is present in a full recursive clone here.
44base_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
45clang_parent_folder = os.path.join(base_path, "ext/pybind11/tools/")
46if not os.path.isdir(clang_parent_folder):
47    raise RuntimeError(
48         "The NanoGUI dependencies repository (pybind11, etc.) appear to be missing!\n"
49         "You probably did not clone the project with --recursive. It is possible to recover\n"
50         "by calling 'git submodule update --init --recursive'"
51    )
52else:
53    sys.path.insert(0, clang_parent_folder)
54
55# Now we can import clang
56from clang import cindex
57from clang.cindex import CursorKind
58from collections import OrderedDict
59from threading import Thread, Semaphore
60from multiprocessing import cpu_count
61
62RECURSE_LIST = [
63    CursorKind.TRANSLATION_UNIT,
64    CursorKind.NAMESPACE,
65    CursorKind.CLASS_DECL,
66    CursorKind.STRUCT_DECL,
67    CursorKind.ENUM_DECL,
68    CursorKind.CLASS_TEMPLATE
69]
70
71PRINT_LIST = [
72    CursorKind.CLASS_DECL,
73    CursorKind.STRUCT_DECL,
74    CursorKind.ENUM_DECL,
75    CursorKind.ENUM_CONSTANT_DECL,
76    CursorKind.CLASS_TEMPLATE,
77    CursorKind.FUNCTION_DECL,
78    CursorKind.FUNCTION_TEMPLATE,
79    CursorKind.CONVERSION_FUNCTION,
80    CursorKind.CXX_METHOD,
81    CursorKind.CONSTRUCTOR,
82    CursorKind.FIELD_DECL
83]
84
85CPP_OPERATORS = {
86    '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
87    '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
88    'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
89    '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
90    'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
91    '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
92    'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
93}
94
95CPP_OPERATORS = OrderedDict(
96    sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
97
98job_count = cpu_count()
99job_semaphore = Semaphore(job_count)
100
101registered_names = dict()
102
103
104def d(s):
105    return s.decode('utf8')
106
107
108def sanitize_name(name):
109    global registered_names
110    name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
111    for k, v in CPP_OPERATORS.items():
112        name = name.replace('operator%s' % k, 'operator_%s' % v)
113    name = re.sub('<.*>', '', name)
114    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
115    name = re.sub('_$', '', re.sub('_+', '_', name))
116    if name in registered_names:
117        registered_names[name] += 1
118        name += '_' + str(registered_names[name])
119    else:
120        registered_names[name] = 1
121    return '__doc_' + name
122
123
124def process_comment(comment):
125    result = ''
126
127    # Remove C++ comment syntax
128    leading_spaces = float('inf')
129    for s in comment.expandtabs(tabsize=4).splitlines():
130        s = s.strip()
131        if s.startswith('/*'):
132            s = s[2:].lstrip('*')
133        elif s.endswith('*/'):
134            s = s[:-2].rstrip('*')
135        elif s.startswith('///'):
136            s = s[3:]
137        if s.startswith('*'):
138            s = s[1:]
139        if len(s) > 0:
140            leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
141        result += s + '\n'
142
143    if leading_spaces != float('inf'):
144        result2 = ""
145        for s in result.splitlines():
146            result2 += s[leading_spaces:] + '\n'
147        result = result2
148
149    # Doxygen tags
150    cpp_group = '([\w:]+)'
151    param_group = '([\[\w:\]]+)'
152
153    s = result
154
155    s = re.sub(r'\\class\s+%s.*' % cpp_group, '', s)   # removes \class Thing file.h nanogui/file.h
156    s = re.sub(r'\\struct\s+%s.*' % cpp_group, '', s)  # removes \struct Thing file.h nanogui/file.h
157    s = re.sub(r'\\rst', r'```\n\n', s)
158    s = re.sub(r'\\endrst', r'```\n\n', s)
159    s = re.sub(r'.. note::', r'Note:', s)
160    s = re.sub(r'.. warning::', r'Warning:', s)
161    s = re.sub(r'.. code-block::\s*\w*', r'', s)
162
163    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
164    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
165    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
166    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
167    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
168    s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
169    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
170               r'\n\n$Parameter ``\2``:\n\n', s)
171    s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
172               r'\n\n$Template parameter ``\2``:\n\n', s)
173
174    for in_, out_ in {
175        'return': 'Returns',
176        'author': 'Author',
177        'authors': 'Authors',
178        'copyright': 'Copyright',
179        'date': 'Date',
180        'remark': 'Remark',
181        'sa': 'See also',
182        'see': 'See also',
183        'extends': 'Extends',
184        'throw': 'Throws',
185        'throws': 'Throws'
186    }.items():
187        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
188
189    s = re.sub(r'\\details\s*', r'\n\n', s)
190    s = re.sub(r'\\brief\s*', r'', s)
191    s = re.sub(r'\\short\s*', r'', s)
192    s = re.sub(r'\\ref\s*', r'', s)
193
194    s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
195               r"```\n\1\n```\n", s, flags=re.DOTALL)
196
197    # HTML/TeX tags
198    s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
199    s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
200    s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
201    s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
202    s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
203    s = re.sub(r'<li>', r'\n\n* ', s)
204    s = re.sub(r'</?ul>', r'', s)
205    s = re.sub(r'</li>', r'\n\n', s)
206
207    s = s.replace('``true``', '``True``')
208    s = s.replace('``false``', '``False``')
209
210    # Re-flow text
211    wrapper = textwrap.TextWrapper()
212    wrapper.expand_tabs = True
213    wrapper.replace_whitespace = True
214    wrapper.drop_whitespace = True
215    wrapper.width = 70
216    wrapper.initial_indent = wrapper.subsequent_indent = ''
217
218    result = ''
219    in_code_segment = False
220    for x in re.split(r'(```)', s):
221        if x == '```':
222            if not in_code_segment:
223                result += '```\n'
224            else:
225                result += '\n```\n\n'
226            in_code_segment = not in_code_segment
227        elif in_code_segment:
228            for line in x.splitlines():
229                if len(line) < 4:
230                    result += line.strip()
231                else:
232                    # this is a .. code-block:: indentation (three spaces)
233                    if line.startswith('   ') and line[3] != ' ':
234                        result += line[3:].strip() + '\n'
235                    else:
236                        result += line.strip() + '\n'
237        else:
238            for y in re.split(r'(?: *\n *){2,}', x):
239                wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
240                if len(wrapped) > 0 and wrapped[0] == '$':
241                    result += wrapped[1:] + '\n'
242                    wrapper.initial_indent = \
243                        wrapper.subsequent_indent = ' ' * 4
244                else:
245                    if len(wrapped) > 0:
246                        result += wrapped + '\n\n'
247                    wrapper.initial_indent = wrapper.subsequent_indent = ''
248    return result.rstrip().lstrip('\n')
249
250
251def extract(filename, node, prefix, output):
252    num_extracted = 0
253    if not (node.location.file is None or
254            os.path.samefile(d(node.location.file.name), filename)):
255        return 0
256    if node.kind in RECURSE_LIST:
257        sub_prefix = prefix
258        if node.kind != CursorKind.TRANSLATION_UNIT:
259            if len(sub_prefix) > 0:
260                sub_prefix += '_'
261            sub_prefix += d(node.spelling)
262        for i in node.get_children():
263            num_extracted += extract(filename, i, sub_prefix, output)
264        if num_extracted == 0:
265            return 0
266    if node.kind in PRINT_LIST:
267        comment = d(node.raw_comment) if node.raw_comment is not None else ''
268        comment = process_comment(comment)
269        sub_prefix = prefix
270        if len(sub_prefix) > 0:
271            sub_prefix += '_'
272        if len(node.spelling) > 0:
273            name = sanitize_name(sub_prefix + d(node.spelling))
274            output.append('\nstatic const char *%s =%sR"doc(%s)doc";' %
275                (name, '\n' if '\n' in comment else ' ', comment))
276            num_extracted += 1
277    return num_extracted
278
279
280class ExtractionThread(Thread):
281    def __init__(self, filename, parameters, output):
282        Thread.__init__(self)
283        self.filename = filename
284        self.parameters = parameters
285        self.output = output
286        job_semaphore.acquire()
287
288    def run(self):
289        print('Processing "%s" ..' % self.filename, file=sys.stderr)
290        try:
291            index = cindex.Index(
292                cindex.conf.lib.clang_createIndex(False, True))
293            tu = index.parse(self.filename, self.parameters)
294            extract(self.filename, tu.cursor, '', self.output)
295        finally:
296            job_semaphore.release()
297
298if __name__ == '__main__':
299    parameters = ['-x', 'c++', '-std=c++11']
300    filenames = []
301
302    if platform.system() == 'Darwin':
303        dev_path = '/Applications/Xcode.app/Contents/Developer/'
304        lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
305        sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
306        libclang = lib_dir + 'libclang.dylib'
307
308        if os.path.exists(libclang):
309            cindex.Config.set_library_path(os.path.dirname(libclang))
310
311        if os.path.exists(sdk_dir):
312            sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
313            parameters.append('-isysroot')
314            parameters.append(sysroot_dir)
315
316    for item in sys.argv[1:]:
317        if item.startswith('-'):
318            parameters.append(item)
319        else:
320            filenames.append(item)
321
322    if len(filenames) == 0:
323        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
324        exit(-1)
325
326    print('''/*
327  This file contains docstrings for the Python bindings.
328  Do not edit! These were automatically extracted by mkdoc.py
329 */
330
331#define __EXPAND(x)                                      x
332#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
333#define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
334#define __CAT1(a, b)                                     a ## b
335#define __CAT2(a, b)                                     __CAT1(a, b)
336#define __DOC1(n1)                                       __doc_##n1
337#define __DOC2(n1, n2)                                   __doc_##n1##_##n2
338#define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
339#define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
340#define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
341#define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
342#define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
343#define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
344
345#if defined(__GNUG__)
346#pragma GCC diagnostic push
347#pragma GCC diagnostic ignored "-Wunused-variable"
348#endif
349''')
350
351    output = []
352    for filename in filenames:
353        thr = ExtractionThread(filename, parameters, output)
354        thr.start()
355
356    print('Waiting for jobs to finish ..', file=sys.stderr)
357    for i in range(job_count):
358        job_semaphore.acquire()
359
360    output.sort()
361    for l in output:
362        print(l)
363
364    print('''
365#if defined(__GNUG__)
366#pragma GCC diagnostic pop
367#endif
368''')
369