1#!/usr/bin/env python3
2
3# Copyright 2020 David Robillard <d@drobilla.net>
4#
5# Permission to use, copy, modify, and/or distribute this software for any
6# purpose with or without fee is hereby granted, provided that the above
7# copyright notice and this permission notice appear in all copies.
8#
9# THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17"""
18Write Sphinx markup from Doxygen XML.
19
20Takes a path to a directory of XML generated by Doxygen, and emits a directory
21with a reStructuredText file for every documented symbol.
22"""
23
24import argparse
25import os
26import sys
27import textwrap
28import xml.etree.ElementTree
29
30__author__ = "David Robillard"
31__date__ = "2020-11-18"
32__email__ = "d@drobilla.net"
33__license__ = "ISC"
34__version__ = __date__.replace("-", ".")
35
36
37def load_index(index_path):
38    """
39    Load the index from XML.
40
41    :returns: A dictionary from ID to skeleton records with basic information
42    for every documented entity.  Some records have an ``xml_filename`` key
43    with the filename of a definition file.  These files will be loaded later
44    to flesh out the records in the index.
45    """
46
47    root = xml.etree.ElementTree.parse(index_path).getroot()
48    index = {}
49
50    for compound in root:
51        compound_id = compound.get("refid")
52        compound_kind = compound.get("kind")
53        compound_name = compound.find("name").text
54        if compound_kind in ["dir", "file", "page"]:
55            continue
56
57        # Add record for compound (compounds appear only once in the index)
58        assert compound_id not in index
59        index[compound_id] = {
60            "kind": compound_kind,
61            "name": compound_name,
62            "xml_filename": compound_id + ".xml",
63            "children": [],
64        }
65
66        name_prefix = (
67            ("%s::" % compound_name) if compound_kind == "namespace" else ""
68        )
69
70        for child in compound.findall("member"):
71            if child.get("refid") in index:
72                assert compound_kind == "group"
73                continue
74
75            # Everything has a kind and a name
76            child_record = {
77                "kind": child.get("kind"),
78                "name": name_prefix + child.find("name").text,
79            }
80
81            if child.get("kind") == "enum":
82                # Enums are not compounds, but we want to resolve the parent of
83                # their values so they are not written as top level documents
84                child_record["children"] = []
85
86            if child.get("kind") == "enumvalue":
87                # Remove namespace prefix
88                child_record["name"] = child.find("name").text
89
90            index[child.get("refid")] = child_record
91
92    return index
93
94
95def resolve_index(index, root):
96    """
97    Walk a definition document and extend the index for linking.
98
99    This does two things: sets the "parent" and "children" fields of all
100    applicable records, and sets the "strong" field of enums so that the
101    correct Sphinx role can be used when referring to them.
102    """
103
104    def add_child(index, parent_id, child_id):
105        parent = index[parent_id]
106        child = index[child_id]
107
108        if child["kind"] == "enumvalue":
109            assert parent["kind"] == "enum"
110            assert "parent" not in child or child["parent"] == parent_id
111            child["parent"] = parent_id
112
113        else:
114            if parent["kind"] in ["class", "struct", "union"]:
115                assert "parent" not in child or child["parent"] == parent_id
116                child["parent"] = parent_id
117
118        if child_id not in parent["children"]:
119            parent["children"] += [child_id]
120
121    compound = root.find("compounddef")
122    compound_kind = compound.get("kind")
123
124    if compound_kind == "group":
125        for subgroup in compound.findall("innergroup"):
126            add_child(index, compound.get("id"), subgroup.get("refid"))
127
128        for klass in compound.findall("innerclass"):
129            add_child(index, compound.get("id"), klass.get("refid"))
130
131    for section in compound.findall("sectiondef"):
132        if section.get("kind").startswith("private"):
133            for member in section.findall("memberdef"):
134                if member.get("id") in index:
135                    del index[member.get("id")]
136        else:
137            for member in section.findall("memberdef"):
138                member_id = member.get("id")
139                add_child(index, compound.get("id"), member_id)
140
141                if member.get("kind") == "enum":
142                    index[member_id]["strong"] = member.get("strong") == "yes"
143                    for value in member.findall("enumvalue"):
144                        add_child(index, member_id, value.get("id"))
145
146
147def sphinx_role(record, lang):
148    """
149    Return the Sphinx role used for a record.
150
151    This is used for the description directive like ".. c:function::", and
152    links like ":c:func:`foo`.
153    """
154
155    kind = record["kind"]
156
157    if kind in ["class", "function", "namespace", "struct", "union"]:
158        return lang + ":" + kind
159
160    if kind == "define":
161        return "c:macro"
162
163    if kind == "enum":
164        return lang + (":enum-class" if record["strong"] else ":enum")
165
166    if kind == "typedef":
167        return lang + ":type"
168
169    if kind == "enumvalue":
170        return lang + ":enumerator"
171
172    if kind == "variable":
173        return lang + (":member" if "parent" in record else ":var")
174
175    raise RuntimeError("No known role for kind '%s'" % kind)
176
177
178def child_identifier(lang, parent_name, child_name):
179    """
180    Return the identifier for an enum value or struct member.
181
182    Sphinx, for some reason, uses a different syntax for this in C and C++.
183    """
184
185    separator = "::" if lang == "cpp" else "."
186
187    return "%s%s%s" % (parent_name, separator, child_name)
188
189
190def link_markup(index, lang, refid):
191    """Return a Sphinx link for a Doxygen reference."""
192
193    record = index[refid]
194    kind, name = record["kind"], record["name"]
195    role = sphinx_role(record, lang)
196
197    if kind in ["class", "enum", "struct", "typedef", "union"]:
198        return ":%s:`%s`" % (role, name)
199
200    if kind == "function":
201        return ":%s:func:`%s`" % (lang, name)
202
203    if kind == "enumvalue":
204        parent_name = index[record["parent"]]["name"]
205        return ":%s:`%s`" % (role, child_identifier(lang, parent_name, name))
206
207    if kind == "variable":
208        if "parent" not in record:
209            return ":%s:var:`%s`" % (lang, name)
210
211        parent_name = index[record["parent"]]["name"]
212        return ":%s:`%s`" % (role, child_identifier(lang, parent_name, name))
213
214    if kind == "define":
215        return ":%s:macro:`%s`" % (lang, name)
216
217    raise RuntimeError("Unknown link target kind: %s" % kind)
218
219
220def indent(markup, depth):
221    """
222    Indent markup to a depth level.
223
224    Like textwrap.indent() but takes an integer and works in reST indentation
225    levels for clarity."
226    """
227
228    return textwrap.indent(markup, "   " * depth)
229
230
231def heading(text, level):
232    """
233    Return a ReST heading at a given level.
234
235    Follows the style in the Python documentation guide, see
236    <https://devguide.python.org/documenting/#sections>.
237    """
238
239    assert 1 <= level <= 6
240
241    chars = ("#", "*", "=", "-", "^", '"')
242    line = chars[level] * len(text)
243
244    return "%s%s\n%s\n\n" % (line + "\n" if level < 3 else "", text, line)
245
246
247def dox_to_rst(index, lang, node):
248    """
249    Convert documentation commands (docCmdGroup) to Sphinx markup.
250
251    This is used to convert the content of descriptions in the documentation.
252    It recursively parses all children tags and raises a RuntimeError if any
253    unknown tag is encountered.
254    """
255
256    def field_value(markup):
257        """Return a value for a field as a single line or indented block."""
258        if "\n" in markup.strip():
259            return "\n" + indent(markup, 1)
260
261        return " " + markup.strip()
262
263    if node.tag == "lsquo":
264        return "‘"
265
266    if node.tag == "rsquo":
267        return "’"
268
269    if node.tag == "computeroutput":
270        assert len(node) == 0
271        return "``%s``" % node.text
272
273    if node.tag == "itemizedlist":
274        markup = ""
275        for item in node.findall("listitem"):
276            assert len(item) == 1
277            markup += "\n- %s" % dox_to_rst(index, lang, item[0])
278
279        return markup
280
281    if node.tag == "para":
282        markup = node.text if node.text is not None else ""
283        for child in node:
284            markup += dox_to_rst(index, lang, child)
285            markup += child.tail if child.tail is not None else ""
286
287        return markup.strip() + "\n\n"
288
289    if node.tag == "parameterlist":
290        markup = ""
291        for item in node.findall("parameteritem"):
292            name = item.find("parameternamelist/parametername")
293            description = item.find("parameterdescription")
294            assert len(description) == 1
295            markup += "\n\n:param %s:%s" % (
296                name.text,
297                field_value(dox_to_rst(index, lang, description[0])),
298            )
299
300        return markup + "\n"
301
302    if node.tag == "programlisting":
303        return "\n.. code-block:: %s\n\n%s" % (
304            lang,
305            indent(plain_text(node), 1),
306        )
307
308    if node.tag == "ref":
309        refid = node.get("refid")
310        if refid not in index:
311            sys.stderr.write("warning: Unresolved link: %s\n" % refid)
312            return node.text
313
314        assert len(node) == 0
315        assert len(link_markup(index, lang, refid)) > 0
316        return link_markup(index, lang, refid)
317
318    if node.tag == "simplesect":
319        assert len(node) == 1
320
321        if node.get("kind") == "return":
322            return "\n:returns:" + field_value(
323                dox_to_rst(index, lang, node[0])
324            ) + "\n"
325
326        if node.get("kind") == "see":
327            return dox_to_rst(index, lang, node[0])
328
329        raise RuntimeError("Unknown simplesect kind: %s" % node.get("kind"))
330
331    if node.tag == "ulink":
332        return "`%s <%s>`_" % (node.text, node.get("url"))
333
334    raise RuntimeError("Unknown documentation command: %s" % node.tag)
335
336
337def description_markup(index, lang, node):
338    """Return the markup for a brief or detailed description."""
339
340    assert node.tag == "briefdescription" or node.tag == "detaileddescription"
341    assert not (node.tag == "briefdescription" and len(node) > 1)
342    assert len(node.text.strip()) == 0
343
344    return "".join([dox_to_rst(index, lang, child) for child in node]).strip()
345
346
347def set_descriptions(index, lang, definition, record):
348    """Set a record's brief/detailed descriptions from the XML definition."""
349
350    for tag in ["briefdescription", "detaileddescription"]:
351        node = definition.find(tag)
352        if node is not None:
353            record[tag] = description_markup(index, lang, node)
354
355
356def set_template_params(node, record):
357    """Set a record's template_params from the XML definition."""
358
359    template_param_list = node.find("templateparamlist")
360    if template_param_list is not None:
361        params = []
362        for param in template_param_list.findall("param"):
363            if param.find("declname") is not None:
364                # Value parameter
365                type_text = plain_text(param.find("type"))
366                name_text = plain_text(param.find("declname"))
367
368                params += ["%s %s" % (type_text, name_text)]
369            else:
370                # Type parameter
371                params += ["%s" % (plain_text(param.find("type")))]
372
373        record["template_params"] = "%s" % ", ".join(params)
374
375
376def plain_text(node):
377    """
378    Return the plain text of a node with all tags ignored.
379
380    This is needed where Doxygen may include refs but Sphinx needs plain text
381    because it parses things itself to generate links.
382    """
383
384    if node.tag == "sp":
385        markup = " "
386    elif node.text is not None:
387        markup = node.text
388    else:
389        markup = ""
390
391    for child in node:
392        markup += plain_text(child)
393        markup += child.tail if child.tail is not None else ""
394
395    return markup
396
397
398def local_name(name):
399    """Return a name with all namespace prefixes stripped."""
400
401    return name[name.rindex("::") + 2 :] if "::" in name else name
402
403
404def read_definition_doc(index, lang, root):
405    """Walk a definition document and update described records in the index."""
406
407    # Set descriptions for the compound itself
408    compound = root.find("compounddef")
409    compound_record = index[compound.get("id")]
410    set_descriptions(index, lang, compound, compound_record)
411    set_template_params(compound, compound_record)
412
413    if compound.find("title") is not None:
414        compound_record["title"] = compound.find("title").text.strip()
415
416    # Set documentation for all children
417    for section in compound.findall("sectiondef"):
418        if section.get("kind").startswith("private"):
419            continue
420
421        for member in section.findall("memberdef"):
422            kind = member.get("kind")
423            record = index[member.get("id")]
424            set_descriptions(index, lang, member, record)
425            set_template_params(member, record)
426
427            if compound.get("kind") in ["class", "struct", "union"]:
428                assert kind in ["function", "typedef", "variable"]
429                record["type"] = plain_text(member.find("type"))
430
431            if kind == "enum":
432                for value in member.findall("enumvalue"):
433                    set_descriptions(
434                        index, lang, value, index[value.get("id")]
435                    )
436
437            elif kind == "function":
438                record["prototype"] = "%s %s%s" % (
439                    plain_text(member.find("type")),
440                    member.find("name").text,
441                    member.find("argsstring").text,
442                )
443
444            elif kind == "typedef":
445                name = local_name(record["name"])
446                args_text = member.find("argsstring").text
447                target_text = plain_text(member.find("type"))
448                if args_text is not None:  # Function pointer
449                    assert target_text[-2:] == "(*" and args_text[0] == ")"
450                    record["type"] = target_text + args_text
451                    record["definition"] = target_text + name + args_text
452                else:  # Normal named typedef
453                    assert target_text is not None
454                    record["type"] = target_text
455                    if member.find("definition").text.startswith("using"):
456                        record["definition"] = "%s = %s" % (
457                            name,
458                            target_text,
459                        )
460                    else:
461                        record["definition"] = "%s %s" % (
462                            target_text,
463                            name,
464                        )
465
466            elif kind == "variable":
467                record["definition"] = member.find("definition").text
468
469
470def declaration_string(record):
471    """
472    Return the string that describes a declaration.
473
474    This is what follows the directive, and is in C/C++ syntax, except without
475    keywords like "typedef" and "using" as expected by Sphinx.  For example,
476    "struct ThingImpl Thing" or "void run(int value)".
477    """
478
479    kind = record["kind"]
480    result = ""
481
482    if "template_params" in record:
483        result = "template <%s> " % record["template_params"]
484
485    if kind == "function":
486        result += record["prototype"]
487    elif kind == "typedef":
488        result += record["definition"]
489    elif kind == "variable":
490        if "parent" in record:
491            result += "%s %s" % (record["type"], local_name(record["name"]))
492        else:
493            result += record["definition"]
494    elif "type" in record:
495        result += "%s %s" % (record["type"], local_name(record["name"]))
496    else:
497        result += local_name(record["name"])
498
499    return result
500
501
502def document_markup(index, lang, record):
503    """Return the complete document that describes some documented entity."""
504
505    kind = record["kind"]
506    role = sphinx_role(record, lang)
507    name = record["name"]
508    markup = ""
509
510    if name != local_name(name):
511        markup += ".. cpp:namespace:: %s\n\n" % name[0 : name.rindex("::")]
512
513    # Write top-level directive
514    markup += ".. %s:: %s\n" % (role, declaration_string(record))
515
516    # Write main description blurb
517    markup += "\n" + indent(record["briefdescription"] + "\n", 1)
518    if len(record["detaileddescription"]) > 0:
519        markup += "\n" + indent(record["detaileddescription"], 1) + "\n"
520
521    assert (
522        kind in ["class", "enum", "namespace", "struct", "union"]
523        or "children" not in record
524    )
525
526    # Sphinx C++ namespaces work by setting a scope, they have no content
527    child_indent = 0 if kind == "namespace" else 1
528
529    # Write inline children if applicable
530    markup += "\n" if "children" in record else ""
531    for child_id in record.get("children", []):
532        child_record = index[child_id]
533        child_role = sphinx_role(child_record, lang)
534
535        child_header = ".. %s:: %s\n\n" % (
536            child_role,
537            declaration_string(child_record),
538        )
539
540        markup += "\n"
541        markup += indent(child_header, child_indent)
542        markup += indent(child_record["briefdescription"], child_indent + 1)
543        markup += indent(child_record["detaileddescription"], child_indent + 1)
544
545    return markup
546
547
548def symbol_filename(name):
549    """Adapt the name of a symbol to be suitable for use as a filename."""
550
551    return name.replace("::", "__")
552
553
554def emit_groups(index, lang, output_dir, force):
555    """Write a description file for every group documented in the index."""
556
557    for record in index.values():
558        if record["kind"] != "group":
559            continue
560
561        name = record["name"]
562        filename = os.path.join(output_dir, "%s.rst" % name)
563        if not force and os.path.exists(filename):
564            raise FileExistsError("File already exists: '%s'" % filename)
565
566        with open(filename, "w") as rst:
567            rst.write(heading(record["title"], 1))
568
569            # Get all child group and symbol names
570            child_groups = {}
571            child_symbols = {}
572            for child_id in record["children"]:
573                child = index[child_id]
574                if child["kind"] == "group":
575                    child_groups[child["name"]] = child
576                else:
577                    child_symbols[child["name"]] = child
578
579            # Emit description (document body)
580            if len(record["briefdescription"]) > 0:
581                rst.write(record["briefdescription"] + "\n\n")
582            if len(record["detaileddescription"]) > 0:
583                rst.write(record["detaileddescription"] + "\n\n")
584
585            if len(child_groups) > 0:
586                # Emit TOC for child groups
587                rst.write(".. toctree::\n\n")
588                for name, group in child_groups.items():
589                    rst.write(indent(group["name"], 1) + "\n")
590
591            # Emit symbols in sorted order
592            for name, symbol in child_symbols.items():
593                rst.write("\n")
594                rst.write(document_markup(index, lang, symbol))
595                rst.write("\n")
596
597
598def run(index_xml_path, output_dir, language, force):
599    """Write a directory of Sphinx files from a Doxygen XML directory."""
600
601    # Build skeleton index from index.xml
602    xml_dir = os.path.dirname(index_xml_path)
603    index = load_index(index_xml_path)
604
605    # Load all definition documents
606    definition_docs = []
607    for record in index.values():
608        if "xml_filename" in record:
609            xml_path = os.path.join(xml_dir, record["xml_filename"])
610            definition_docs += [xml.etree.ElementTree.parse(xml_path)]
611
612    # Do an initial pass of the definition documents to resolve the index
613    for root in definition_docs:
614        resolve_index(index, root)
615
616    # Finally read the documentation from definition documents
617    for root in definition_docs:
618        read_definition_doc(index, language, root)
619
620    # Create output directory
621    try:
622        os.makedirs(output_dir)
623    except OSError:
624        pass
625
626    # Emit output files
627    emit_groups(index, language, output_dir, force)
628
629
630if __name__ == "__main__":
631    ap = argparse.ArgumentParser(
632        usage="%(prog)s [OPTION]... XML_DIR OUTPUT_DIR",
633        description=__doc__,
634        formatter_class=argparse.RawDescriptionHelpFormatter,
635    )
636
637    ap.add_argument(
638        "-f",
639        "--force",
640        action="store_true",
641        help="overwrite files",
642    )
643
644    ap.add_argument(
645        "-l",
646        "--language",
647        default="c",
648        choices=["c", "cpp"],
649        help="language domain for output",
650    )
651
652    ap.add_argument("index_xml_path", help="path index.xml from Doxygen")
653    ap.add_argument("output_dir", help="output directory")
654
655    run(**vars(ap.parse_args(sys.argv[1:])))
656